From c0d62ad9e6b0783f73b795c82626254e2239d2f0 Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Tue, 31 Mar 2009 14:07:08 -0700 Subject: [PATCH 1/3] 6761600: Use sse 4.2 in intrinsics Use SSE 4.2 in intrinsics for String.{compareTo/equals/indexOf} and Arrays.equals. Reviewed-by: kvn, never, jrose --- hotspot/src/cpu/sparc/vm/sparc.ad | 215 ++++++++++ hotspot/src/cpu/x86/vm/assembler_x86.cpp | 48 +++ hotspot/src/cpu/x86/vm/assembler_x86.hpp | 8 + hotspot/src/cpu/x86/vm/vm_version_x86.cpp | 5 + hotspot/src/cpu/x86/vm/x86_32.ad | 427 +++++++++++++++--- hotspot/src/cpu/x86/vm/x86_64.ad | 428 ++++++++++++++++--- hotspot/src/share/vm/adlc/formssel.cpp | 14 +- hotspot/src/share/vm/classfile/vmSymbols.hpp | 3 +- hotspot/src/share/vm/opto/classes.hpp | 2 + hotspot/src/share/vm/opto/gcm.cpp | 6 + hotspot/src/share/vm/opto/lcm.cpp | 2 + hotspot/src/share/vm/opto/library_call.cpp | 240 ++++++++--- hotspot/src/share/vm/opto/loopnode.cpp | 2 + hotspot/src/share/vm/opto/matcher.cpp | 4 + hotspot/src/share/vm/opto/memnode.cpp | 26 +- hotspot/src/share/vm/opto/memnode.hpp | 48 +++ hotspot/src/share/vm/runtime/arguments.cpp | 3 - hotspot/src/share/vm/runtime/globals.hpp | 8 +- 18 files changed, 1297 insertions(+), 192 deletions(-) diff --git a/hotspot/src/cpu/sparc/vm/sparc.ad b/hotspot/src/cpu/sparc/vm/sparc.ad index df6e9049ebb..c65cc5495e6 100644 --- a/hotspot/src/cpu/sparc/vm/sparc.ad +++ b/hotspot/src/cpu/sparc/vm/sparc.ad @@ -3003,6 +3003,202 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{ __ bind(Ldone); %} +enc_class enc_String_Equals(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result) %{ + Label Lword, Lword_loop, Lpost_word, Lchar, Lchar_loop, Ldone; + MacroAssembler _masm(&cbuf); + + Register str1_reg = reg_to_register_object($str1$$reg); + Register str2_reg = reg_to_register_object($str2$$reg); + Register tmp1_reg = reg_to_register_object($tmp1$$reg); + Register tmp2_reg = reg_to_register_object($tmp2$$reg); + Register result_reg = reg_to_register_object($result$$reg); + + // Get the first character position in both strings + // [8] char array, [12] offset, [16] count + int value_offset = java_lang_String:: value_offset_in_bytes(); + int offset_offset = java_lang_String::offset_offset_in_bytes(); + int count_offset = java_lang_String:: count_offset_in_bytes(); + + // load str1 (jchar*) base address into tmp1_reg + __ load_heap_oop(Address(str1_reg, 0, value_offset), tmp1_reg); + __ ld(Address(str1_reg, 0, offset_offset), result_reg); + __ add(tmp1_reg, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp1_reg); + __ ld(Address(str1_reg, 0, count_offset), str1_reg); // hoisted + __ sll(result_reg, exact_log2(sizeof(jchar)), result_reg); + __ load_heap_oop(Address(str2_reg, 0, value_offset), tmp2_reg); // hoisted + __ add(result_reg, tmp1_reg, tmp1_reg); + + // load str2 (jchar*) base address into tmp2_reg + // __ ld_ptr(Address(str2_reg, 0, value_offset), tmp2_reg); // hoisted + __ ld(Address(str2_reg, 0, offset_offset), result_reg); + __ add(tmp2_reg, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp2_reg); + __ ld(Address(str2_reg, 0, count_offset), str2_reg); // hoisted + __ sll(result_reg, exact_log2(sizeof(jchar)), result_reg); + __ cmp(str1_reg, str2_reg); // hoisted + __ add(result_reg, tmp2_reg, tmp2_reg); + + __ sll(str1_reg, exact_log2(sizeof(jchar)), str1_reg); + __ br(Assembler::notEqual, true, Assembler::pt, Ldone); + __ delayed()->mov(G0, result_reg); // not equal + + __ br_zero(Assembler::equal, true, Assembler::pn, str1_reg, Ldone); + __ delayed()->add(G0, 1, result_reg); //equals + + __ cmp(tmp1_reg, tmp2_reg); //same string ? + __ brx(Assembler::equal, true, Assembler::pn, Ldone); + __ delayed()->add(G0, 1, result_reg); + + //rename registers + Register limit_reg = str1_reg; + Register chr2_reg = str2_reg; + Register chr1_reg = result_reg; + // tmp{12} are the base pointers + + //check for alignment and position the pointers to the ends + __ or3(tmp1_reg, tmp2_reg, chr1_reg); + __ andcc(chr1_reg, 0x3, chr1_reg); // notZero means at least one not 4-byte aligned + __ br(Assembler::notZero, false, Assembler::pn, Lchar); + __ delayed()->nop(); + + __ bind(Lword); + __ and3(limit_reg, 0x2, O7); //remember the remainder (either 0 or 2) + __ andn(limit_reg, 0x3, limit_reg); + __ br_zero(Assembler::zero, false, Assembler::pn, limit_reg, Lpost_word); + __ delayed()->nop(); + + __ add(tmp1_reg, limit_reg, tmp1_reg); + __ add(tmp2_reg, limit_reg, tmp2_reg); + __ neg(limit_reg); + + __ lduw(tmp1_reg, limit_reg, chr1_reg); + __ bind(Lword_loop); + __ lduw(tmp2_reg, limit_reg, chr2_reg); + __ cmp(chr1_reg, chr2_reg); + __ br(Assembler::notEqual, true, Assembler::pt, Ldone); + __ delayed()->mov(G0, result_reg); + __ inccc(limit_reg, 2*sizeof(jchar)); + // annul LDUW if branch i s not taken to prevent access past end of string + __ br(Assembler::notZero, true, Assembler::pt, Lword_loop); //annul on taken + __ delayed()->lduw(tmp1_reg, limit_reg, chr1_reg); // hoisted + + __ bind(Lpost_word); + __ br_zero(Assembler::zero, true, Assembler::pt, O7, Ldone); + __ delayed()->add(G0, 1, result_reg); + + __ lduh(tmp1_reg, 0, chr1_reg); + __ lduh(tmp2_reg, 0, chr2_reg); + __ cmp (chr1_reg, chr2_reg); + __ br(Assembler::notEqual, true, Assembler::pt, Ldone); + __ delayed()->mov(G0, result_reg); + __ ba(false,Ldone); + __ delayed()->add(G0, 1, result_reg); + + __ bind(Lchar); + __ add(tmp1_reg, limit_reg, tmp1_reg); + __ add(tmp2_reg, limit_reg, tmp2_reg); + __ neg(limit_reg); //negate count + + __ lduh(tmp1_reg, limit_reg, chr1_reg); + __ bind(Lchar_loop); + __ lduh(tmp2_reg, limit_reg, chr2_reg); + __ cmp(chr1_reg, chr2_reg); + __ br(Assembler::notEqual, true, Assembler::pt, Ldone); + __ delayed()->mov(G0, result_reg); //not equal + __ inccc(limit_reg, sizeof(jchar)); + // annul LDUH if branch is not taken to prevent access past end of string + __ br(Assembler::notZero, true, Assembler::pt, Lchar_loop); //annul on taken + __ delayed()->lduh(tmp1_reg, limit_reg, chr1_reg); // hoisted + + __ add(G0, 1, result_reg); //equal + + __ bind(Ldone); + %} + +enc_class enc_Array_Equals(o0RegP ary1, o1RegP ary2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result) %{ + Label Lvector, Ldone, Lloop; + MacroAssembler _masm(&cbuf); + + Register ary1_reg = reg_to_register_object($ary1$$reg); + Register ary2_reg = reg_to_register_object($ary2$$reg); + Register tmp1_reg = reg_to_register_object($tmp1$$reg); + Register tmp2_reg = reg_to_register_object($tmp2$$reg); + Register result_reg = reg_to_register_object($result$$reg); + + int length_offset = arrayOopDesc::length_offset_in_bytes(); + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); + + // return true if the same array + __ cmp(ary1_reg, ary2_reg); + __ br(Assembler::equal, true, Assembler::pn, Ldone); + __ delayed()->add(G0, 1, result_reg); // equal + + __ br_null(ary1_reg, true, Assembler::pn, Ldone); + __ delayed()->mov(G0, result_reg); // not equal + + __ br_null(ary2_reg, true, Assembler::pn, Ldone); + __ delayed()->mov(G0, result_reg); // not equal + + //load the lengths of arrays + __ ld(Address(ary1_reg, 0, length_offset), tmp1_reg); + __ ld(Address(ary2_reg, 0, length_offset), tmp2_reg); + + // return false if the two arrays are not equal length + __ cmp(tmp1_reg, tmp2_reg); + __ br(Assembler::notEqual, true, Assembler::pn, Ldone); + __ delayed()->mov(G0, result_reg); // not equal + + __ br_zero(Assembler::zero, true, Assembler::pn, tmp1_reg, Ldone); + __ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal + + // load array addresses + __ add(ary1_reg, base_offset, ary1_reg); + __ add(ary2_reg, base_offset, ary2_reg); + + // renaming registers + Register chr1_reg = tmp2_reg; // for characters in ary1 + Register chr2_reg = result_reg; // for characters in ary2 + Register limit_reg = tmp1_reg; // length + + // set byte count + __ sll(limit_reg, exact_log2(sizeof(jchar)), limit_reg); + __ andcc(limit_reg, 0x2, chr1_reg); //trailing character ? + __ br(Assembler::zero, false, Assembler::pt, Lvector); + __ delayed()->nop(); + + //compare the trailing char + __ sub(limit_reg, sizeof(jchar), limit_reg); + __ lduh(ary1_reg, limit_reg, chr1_reg); + __ lduh(ary2_reg, limit_reg, chr2_reg); + __ cmp(chr1_reg, chr2_reg); + __ br(Assembler::notEqual, true, Assembler::pt, Ldone); + __ delayed()->mov(G0, result_reg); // not equal + + // only one char ? + __ br_zero(Assembler::zero, true, Assembler::pn, limit_reg, Ldone); + __ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal + + __ bind(Lvector); + // Shift ary1_reg and ary2_reg to the end of the arrays, negate limit + __ add(ary1_reg, limit_reg, ary1_reg); + __ add(ary2_reg, limit_reg, ary2_reg); + __ neg(limit_reg, limit_reg); + + __ lduw(ary1_reg, limit_reg, chr1_reg); + __ bind(Lloop); + __ lduw(ary2_reg, limit_reg, chr2_reg); + __ cmp(chr1_reg, chr2_reg); + __ br(Assembler::notEqual, false, Assembler::pt, Ldone); + __ delayed()->mov(G0, result_reg); // not equal + __ inccc(limit_reg, 2*sizeof(jchar)); + // annul LDUW if branch is not taken to prevent access past end of string + __ br(Assembler::notZero, true, Assembler::pt, Lloop); //annul on taken + __ delayed()->lduw(ary1_reg, limit_reg, chr1_reg); // hoisted + + __ add(G0, 1, result_reg); // equals + + __ bind(Ldone); + %} + enc_class enc_rethrow() %{ cbuf.set_inst_mark(); Register temp_reg = G3; @@ -9015,6 +9211,25 @@ instruct string_compare(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, note ins_pipe(long_memory_op); %} +instruct string_equals(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result, + o7RegI tmp3, flagsReg ccr) %{ + match(Set result (StrEquals str1 str2)); + effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL ccr, KILL tmp3); + ins_cost(300); + format %{ "String Equals $str1,$str2 -> $result" %} + ins_encode( enc_String_Equals(str1, str2, tmp1, tmp2, result) ); + ins_pipe(long_memory_op); +%} + +instruct array_equals(o0RegP ary1, o1RegP ary2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result, + flagsReg ccr) %{ + match(Set result (AryEq ary1 ary2)); + effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL ccr); + ins_cost(300); + format %{ "Array Equals $ary1,$ary2 -> $result" %} + ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result)); + ins_pipe(long_memory_op); +%} //---------- Population Count Instructions ------------------------------------- diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.cpp b/hotspot/src/cpu/x86/vm/assembler_x86.cpp index dbf2f2b664a..35ee7e54019 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp @@ -2173,6 +2173,31 @@ void Assembler::orl(Register dst, Register src) { emit_arith(0x0B, 0xC0, dst, src); } +void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { + assert(VM_Version::supports_sse4_2(), ""); + + InstructionMark im(this); + emit_byte(0x66); + prefix(src, dst); + emit_byte(0x0F); + emit_byte(0x3A); + emit_byte(0x61); + emit_operand(dst, src); + emit_byte(imm8); +} + +void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { + assert(VM_Version::supports_sse4_2(), ""); + + emit_byte(0x66); + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0x3A); + emit_byte(0x61); + emit_byte(0xC0 | encode); + emit_byte(imm8); +} + // generic void Assembler::pop(Register dst) { int encode = prefix_and_encode(dst->encoding()); @@ -2330,6 +2355,29 @@ void Assembler::psrlq(XMMRegister dst, int shift) { emit_byte(shift); } +void Assembler::ptest(XMMRegister dst, Address src) { + assert(VM_Version::supports_sse4_1(), ""); + + InstructionMark im(this); + emit_byte(0x66); + prefix(src, dst); + emit_byte(0x0F); + emit_byte(0x38); + emit_byte(0x17); + emit_operand(dst, src); +} + +void Assembler::ptest(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); + + emit_byte(0x66); + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0x38); + emit_byte(0x17); + emit_byte(0xC0 | encode); +} + void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); emit_byte(0x66); diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.hpp b/hotspot/src/cpu/x86/vm/assembler_x86.hpp index a5efad8d22b..89ac6dd5326 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp @@ -1226,6 +1226,10 @@ private: void orq(Register dst, Address src); void orq(Register dst, Register src); + // SSE4.2 string instructions + void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8); + void pcmpestri(XMMRegister xmm1, Address src, int imm8); + void popl(Address dst); #ifdef _LP64 @@ -1260,6 +1264,10 @@ private: // Shift Right Logical Quadword Immediate void psrlq(XMMRegister dst, int shift); + // Logical Compare Double Quadword + void ptest(XMMRegister dst, XMMRegister src); + void ptest(XMMRegister dst, Address src); + // Interleave Low Bytes void punpcklbw(XMMRegister dst, XMMRegister src); diff --git a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp index 1307dd1e54f..ccf6b429282 100644 --- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp +++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp @@ -408,6 +408,11 @@ void VM_Version::get_processor_features() { UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus } } + if( supports_sse4_2() && UseSSE >= 4 ) { + if( FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { + UseSSE42Intrinsics = true; + } + } } } diff --git a/hotspot/src/cpu/x86/vm/x86_32.ad b/hotspot/src/cpu/x86/vm/x86_32.ad index cd64cfbf9f2..509a39972fe 100644 --- a/hotspot/src/cpu/x86/vm/x86_32.ad +++ b/hotspot/src/cpu/x86/vm/x86_32.ad @@ -3694,12 +3694,16 @@ encode %{ } %} - enc_class enc_String_Compare() %{ + enc_class enc_String_Compare(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2, + eAXRegI tmp3, eBXRegI tmp4, eCXRegI result) %{ Label ECX_GOOD_LABEL, LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, CONT_LABEL, WHILE_HEAD_LABEL; MacroAssembler masm(&cbuf); + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); + XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg); + // Get the first character position in both strings // [8] char array, [12] offset, [16] count int value_offset = java_lang_String::value_offset_in_bytes(); @@ -3717,7 +3721,6 @@ encode %{ // Compute the minimum of the string lengths(rsi) and the // difference of the string lengths (stack) - if (VM_Version::supports_cmov()) { masm.movl(rdi, Address(rdi, count_offset)); masm.movl(rsi, Address(rsi, count_offset)); @@ -3731,7 +3734,7 @@ encode %{ masm.movl(rsi, rdi); masm.subl(rdi, rcx); masm.push(rdi); - masm.jcc(Assembler::lessEqual, ECX_GOOD_LABEL); + masm.jccb(Assembler::lessEqual, ECX_GOOD_LABEL); masm.movl(rsi, rcx); // rsi holds min, rcx is unused } @@ -3756,7 +3759,7 @@ encode %{ Label LSkip2; // Check if the strings start at same location masm.cmpptr(rbx,rax); - masm.jcc(Assembler::notEqual, LSkip2); + masm.jccb(Assembler::notEqual, LSkip2); // Check if the length difference is zero (from stack) masm.cmpl(Address(rsp, 0), 0x0); @@ -3766,9 +3769,52 @@ encode %{ masm.bind(LSkip2); } - // Shift rax, and rbx, to the end of the arrays, negate min - masm.lea(rax, Address(rax, rsi, Address::times_2, 2)); - masm.lea(rbx, Address(rbx, rsi, Address::times_2, 2)); + // Advance to next character + masm.addptr(rax, 2); + masm.addptr(rbx, 2); + + if (UseSSE42Intrinsics) { + // With SSE4.2, use double quad vector compare + Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; + // Setup to compare 16-byte vectors + masm.movl(rdi, rsi); + masm.andl(rsi, 0xfffffff8); // rsi holds the vector count + masm.andl(rdi, 0x00000007); // rdi holds the tail count + masm.testl(rsi, rsi); + masm.jccb(Assembler::zero, COMPARE_TAIL); + + masm.lea(rax, Address(rax, rsi, Address::times_2)); + masm.lea(rbx, Address(rbx, rsi, Address::times_2)); + masm.negl(rsi); + + masm.bind(COMPARE_VECTORS); + masm.movdqu(tmp1Reg, Address(rax, rsi, Address::times_2)); + masm.movdqu(tmp2Reg, Address(rbx, rsi, Address::times_2)); + masm.pxor(tmp1Reg, tmp2Reg); + masm.ptest(tmp1Reg, tmp1Reg); + masm.jccb(Assembler::notZero, VECTOR_NOT_EQUAL); + masm.addl(rsi, 8); + masm.jcc(Assembler::notZero, COMPARE_VECTORS); + masm.jmpb(COMPARE_TAIL); + + // Mismatched characters in the vectors + masm.bind(VECTOR_NOT_EQUAL); + masm.lea(rax, Address(rax, rsi, Address::times_2)); + masm.lea(rbx, Address(rbx, rsi, Address::times_2)); + masm.movl(rdi, 8); + + // Compare tail (< 8 chars), or rescan last vectors to + // find 1st mismatched characters + masm.bind(COMPARE_TAIL); + masm.testl(rdi, rdi); + masm.jccb(Assembler::zero, LENGTH_DIFF_LABEL); + masm.movl(rsi, rdi); + // Fallthru to tail compare + } + + //Shift rax, and rbx, to the end of the arrays, negate min + masm.lea(rax, Address(rax, rsi, Address::times_2, 0)); + masm.lea(rbx, Address(rbx, rsi, Address::times_2, 0)); masm.negl(rsi); // Compare the rest of the characters @@ -3776,93 +3822,329 @@ encode %{ masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0)); masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0)); masm.subl(rcx, rdi); - masm.jcc(Assembler::notZero, POP_LABEL); + masm.jccb(Assembler::notZero, POP_LABEL); masm.incrementl(rsi); masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL); // Strings are equal up to min length. Return the length difference. masm.bind(LENGTH_DIFF_LABEL); masm.pop(rcx); - masm.jmp(DONE_LABEL); + masm.jmpb(DONE_LABEL); // Discard the stored length difference masm.bind(POP_LABEL); masm.addptr(rsp, 4); - + // That's it masm.bind(DONE_LABEL); %} - enc_class enc_Array_Equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result) %{ - Label TRUE_LABEL, FALSE_LABEL, DONE_LABEL, COMPARE_LOOP_HDR, COMPARE_LOOP; + enc_class enc_String_Equals(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2, + eBXRegI tmp3, eCXRegI tmp4, eAXRegI result) %{ + Label RET_TRUE, RET_FALSE, DONE, COMPARE_VECTORS, COMPARE_CHAR; MacroAssembler masm(&cbuf); - Register ary1Reg = as_Register($ary1$$reg); - Register ary2Reg = as_Register($ary2$$reg); - Register tmp1Reg = as_Register($tmp1$$reg); - Register tmp2Reg = as_Register($tmp2$$reg); - Register resultReg = as_Register($result$$reg); + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); + XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg); + + int value_offset = java_lang_String::value_offset_in_bytes(); + int offset_offset = java_lang_String::offset_offset_in_bytes(); + int count_offset = java_lang_String::count_offset_in_bytes(); + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); + + // does source == target string? + masm.cmpptr(rdi, rsi); + masm.jcc(Assembler::equal, RET_TRUE); + + // get and compare counts + masm.movl(rcx, Address(rdi, count_offset)); + masm.movl(rax, Address(rsi, count_offset)); + masm.cmpl(rcx, rax); + masm.jcc(Assembler::notEqual, RET_FALSE); + masm.testl(rax, rax); + masm.jcc(Assembler::zero, RET_TRUE); + + // get source string offset and value + masm.movptr(rbx, Address(rsi, value_offset)); + masm.movl(rax, Address(rsi, offset_offset)); + masm.leal(rsi, Address(rbx, rax, Address::times_2, base_offset)); + + // get compare string offset and value + masm.movptr(rbx, Address(rdi, value_offset)); + masm.movl(rax, Address(rdi, offset_offset)); + masm.leal(rdi, Address(rbx, rax, Address::times_2, base_offset)); + + // Set byte count + masm.shll(rcx, 1); + masm.movl(rax, rcx); + + if (UseSSE42Intrinsics) { + // With SSE4.2, use double quad vector compare + Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; + // Compare 16-byte vectors + masm.andl(rcx, 0xfffffff0); // vector count (in bytes) + masm.andl(rax, 0x0000000e); // tail count (in bytes) + masm.testl(rcx, rcx); + masm.jccb(Assembler::zero, COMPARE_TAIL); + masm.lea(rdi, Address(rdi, rcx, Address::times_1)); + masm.lea(rsi, Address(rsi, rcx, Address::times_1)); + masm.negl(rcx); + + masm.bind(COMPARE_WIDE_VECTORS); + masm.movdqu(tmp1Reg, Address(rdi, rcx, Address::times_1)); + masm.movdqu(tmp2Reg, Address(rsi, rcx, Address::times_1)); + masm.pxor(tmp1Reg, tmp2Reg); + masm.ptest(tmp1Reg, tmp1Reg); + masm.jccb(Assembler::notZero, RET_FALSE); + masm.addl(rcx, 16); + masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); + masm.bind(COMPARE_TAIL); + masm.movl(rcx, rax); + // Fallthru to tail compare + } + + // Compare 4-byte vectors + masm.andl(rcx, 0xfffffffc); // vector count (in bytes) + masm.andl(rax, 0x00000002); // tail char (in bytes) + masm.testl(rcx, rcx); + masm.jccb(Assembler::zero, COMPARE_CHAR); + masm.lea(rdi, Address(rdi, rcx, Address::times_1)); + masm.lea(rsi, Address(rsi, rcx, Address::times_1)); + masm.negl(rcx); + + masm.bind(COMPARE_VECTORS); + masm.movl(rbx, Address(rdi, rcx, Address::times_1)); + masm.cmpl(rbx, Address(rsi, rcx, Address::times_1)); + masm.jccb(Assembler::notEqual, RET_FALSE); + masm.addl(rcx, 4); + masm.jcc(Assembler::notZero, COMPARE_VECTORS); + + // Compare trailing char (final 2 bytes), if any + masm.bind(COMPARE_CHAR); + masm.testl(rax, rax); + masm.jccb(Assembler::zero, RET_TRUE); + masm.load_unsigned_short(rbx, Address(rdi, 0)); + masm.load_unsigned_short(rcx, Address(rsi, 0)); + masm.cmpl(rbx, rcx); + masm.jccb(Assembler::notEqual, RET_FALSE); + + masm.bind(RET_TRUE); + masm.movl(rax, 1); // return true + masm.jmpb(DONE); + + masm.bind(RET_FALSE); + masm.xorl(rax, rax); // return false + + masm.bind(DONE); + %} + + enc_class enc_String_IndexOf(eSIRegP str1, eDIRegP str2, regXD tmp1, eAXRegI tmp2, + eCXRegI tmp3, eDXRegI tmp4, eBXRegI result) %{ + // SSE4.2 version + Label LOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR, + SCAN_SUBSTR, RET_NEG_ONE, RET_NOT_FOUND, CLEANUP, DONE; + MacroAssembler masm(&cbuf); + + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); + + // Get the first character position in both strings + // [8] char array, [12] offset, [16] count + int value_offset = java_lang_String::value_offset_in_bytes(); + int offset_offset = java_lang_String::offset_offset_in_bytes(); + int count_offset = java_lang_String::count_offset_in_bytes(); + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); + + // Get counts for string and substr + masm.movl(rdx, Address(rsi, count_offset)); + masm.movl(rax, Address(rdi, count_offset)); + // Check for substr count > string count + masm.cmpl(rax, rdx); + masm.jcc(Assembler::greater, RET_NEG_ONE); + + // Start the indexOf operation + // Get start addr of string + masm.movptr(rbx, Address(rsi, value_offset)); + masm.movl(rcx, Address(rsi, offset_offset)); + masm.lea(rsi, Address(rbx, rcx, Address::times_2, base_offset)); + masm.push(rsi); + + // Get start addr of substr + masm.movptr(rbx, Address(rdi, value_offset)); + masm.movl(rcx, Address(rdi, offset_offset)); + masm.lea(rdi, Address(rbx, rcx, Address::times_2, base_offset)); + masm.push(rdi); + masm.push(rax); + masm.jmpb(PREP_FOR_SCAN); + + // Substr count saved at sp + // Substr saved at sp+4 + // String saved at sp+8 + + // Prep to load substr for scan + masm.bind(LOAD_SUBSTR); + masm.movptr(rdi, Address(rsp, 4)); + masm.movl(rax, Address(rsp, 0)); + + // Load substr + masm.bind(PREP_FOR_SCAN); + masm.movdqu(tmp1Reg, Address(rdi, 0)); + masm.addl(rdx, 8); // prime the loop + masm.subptr(rsi, 16); + + // Scan string for substr in 16-byte vectors + masm.bind(SCAN_TO_SUBSTR); + masm.subl(rdx, 8); + masm.addptr(rsi, 16); + masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d); + masm.jcc(Assembler::above, SCAN_TO_SUBSTR); // CF == 0 && ZF == 0 + masm.jccb(Assembler::aboveEqual, RET_NOT_FOUND); // CF == 0 + + // Fallthru: found a potential substr + + // Make sure string is still long enough + masm.subl(rdx, rcx); + masm.cmpl(rdx, rax); + masm.jccb(Assembler::negative, RET_NOT_FOUND); + // Compute start addr of substr + masm.lea(rsi, Address(rsi, rcx, Address::times_2)); + masm.movptr(rbx, rsi); + + // Compare potential substr + masm.addl(rdx, 8); // prime the loop + masm.addl(rax, 8); + masm.subptr(rsi, 16); + masm.subptr(rdi, 16); + + // Scan 16-byte vectors of string and substr + masm.bind(SCAN_SUBSTR); + masm.subl(rax, 8); + masm.subl(rdx, 8); + masm.addptr(rsi, 16); + masm.addptr(rdi, 16); + masm.movdqu(tmp1Reg, Address(rdi, 0)); + masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d); + masm.jcc(Assembler::noOverflow, LOAD_SUBSTR); // OF == 0 + masm.jcc(Assembler::positive, SCAN_SUBSTR); // SF == 0 + + // Compute substr offset + masm.movptr(rsi, Address(rsp, 8)); + masm.subptr(rbx, rsi); + masm.shrl(rbx, 1); + masm.jmpb(CLEANUP); + + masm.bind(RET_NEG_ONE); + masm.movl(rbx, -1); + masm.jmpb(DONE); + + masm.bind(RET_NOT_FOUND); + masm.movl(rbx, -1); + + masm.bind(CLEANUP); + masm.addptr(rsp, 12); + + masm.bind(DONE); + %} + + enc_class enc_Array_Equals(eDIRegP ary1, eSIRegP ary2, regXD tmp1, regXD tmp2, + eBXRegI tmp3, eDXRegI tmp4, eAXRegI result) %{ + Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; + MacroAssembler masm(&cbuf); + + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); + XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg); + Register ary1Reg = as_Register($ary1$$reg); + Register ary2Reg = as_Register($ary2$$reg); + Register tmp3Reg = as_Register($tmp3$$reg); + Register tmp4Reg = as_Register($tmp4$$reg); + Register resultReg = as_Register($result$$reg); int length_offset = arrayOopDesc::length_offset_in_bytes(); int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); // Check the input args - masm.cmpl(ary1Reg, ary2Reg); + masm.cmpptr(ary1Reg, ary2Reg); masm.jcc(Assembler::equal, TRUE_LABEL); - masm.testl(ary1Reg, ary1Reg); + masm.testptr(ary1Reg, ary1Reg); masm.jcc(Assembler::zero, FALSE_LABEL); - masm.testl(ary2Reg, ary2Reg); + masm.testptr(ary2Reg, ary2Reg); masm.jcc(Assembler::zero, FALSE_LABEL); // Check the lengths - masm.movl(tmp2Reg, Address(ary1Reg, length_offset)); + masm.movl(tmp4Reg, Address(ary1Reg, length_offset)); masm.movl(resultReg, Address(ary2Reg, length_offset)); - masm.cmpl(tmp2Reg, resultReg); + masm.cmpl(tmp4Reg, resultReg); masm.jcc(Assembler::notEqual, FALSE_LABEL); masm.testl(resultReg, resultReg); masm.jcc(Assembler::zero, TRUE_LABEL); - // Get the number of 4 byte vectors to compare - masm.shrl(resultReg, 1); + // Load array addrs + masm.lea(ary1Reg, Address(ary1Reg, base_offset)); + masm.lea(ary2Reg, Address(ary2Reg, base_offset)); - // Check for odd-length arrays - masm.andl(tmp2Reg, 1); - masm.testl(tmp2Reg, tmp2Reg); - masm.jcc(Assembler::zero, COMPARE_LOOP_HDR); + // Set byte count + masm.shll(tmp4Reg, 1); + masm.movl(resultReg, tmp4Reg); - // Compare 2-byte "tail" at end of arrays - masm.load_unsigned_short(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset)); - masm.load_unsigned_short(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset)); - masm.cmpl(tmp1Reg, tmp2Reg); - masm.jcc(Assembler::notEqual, FALSE_LABEL); + if (UseSSE42Intrinsics) { + // With SSE4.2, use double quad vector compare + Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; + // Compare 16-byte vectors + masm.andl(tmp4Reg, 0xfffffff0); // vector count (in bytes) + masm.andl(resultReg, 0x0000000e); // tail count (in bytes) + masm.testl(tmp4Reg, tmp4Reg); + masm.jccb(Assembler::zero, COMPARE_TAIL); + masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); + masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); + masm.negl(tmp4Reg); + + masm.bind(COMPARE_WIDE_VECTORS); + masm.movdqu(tmp1Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); + masm.movdqu(tmp2Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); + masm.pxor(tmp1Reg, tmp2Reg); + masm.ptest(tmp1Reg, tmp1Reg); + + masm.jccb(Assembler::notZero, FALSE_LABEL); + masm.addl(tmp4Reg, 16); + masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); + masm.bind(COMPARE_TAIL); + masm.movl(tmp4Reg, resultReg); + // Fallthru to tail compare + } + + // Compare 4-byte vectors + masm.andl(tmp4Reg, 0xfffffffc); // vector count (in bytes) + masm.andl(resultReg, 0x00000002); // tail char (in bytes) + masm.testl(tmp4Reg, tmp4Reg); + masm.jccb(Assembler::zero, COMPARE_CHAR); + masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); + masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); + masm.negl(tmp4Reg); + + masm.bind(COMPARE_VECTORS); + masm.movl(tmp3Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); + masm.cmpl(tmp3Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); + masm.jccb(Assembler::notEqual, FALSE_LABEL); + masm.addl(tmp4Reg, 4); + masm.jcc(Assembler::notZero, COMPARE_VECTORS); + + // Compare trailing char (final 2 bytes), if any + masm.bind(COMPARE_CHAR); masm.testl(resultReg, resultReg); - masm.jcc(Assembler::zero, TRUE_LABEL); - - // Setup compare loop - masm.bind(COMPARE_LOOP_HDR); - // Shift tmp1Reg and tmp2Reg to the last 4-byte boundary of the arrays - masm.leal(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset)); - masm.leal(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset)); - masm.negl(resultReg); - - // 4-byte-wide compare loop - masm.bind(COMPARE_LOOP); - masm.movl(ary1Reg, Address(tmp1Reg, resultReg, Address::times_4, 0)); - masm.movl(ary2Reg, Address(tmp2Reg, resultReg, Address::times_4, 0)); - masm.cmpl(ary1Reg, ary2Reg); - masm.jcc(Assembler::notEqual, FALSE_LABEL); - masm.increment(resultReg); - masm.jcc(Assembler::notZero, COMPARE_LOOP); + masm.jccb(Assembler::zero, TRUE_LABEL); + masm.load_unsigned_short(tmp3Reg, Address(ary1Reg, 0)); + masm.load_unsigned_short(tmp4Reg, Address(ary2Reg, 0)); + masm.cmpl(tmp3Reg, tmp4Reg); + masm.jccb(Assembler::notEqual, FALSE_LABEL); masm.bind(TRUE_LABEL); masm.movl(resultReg, 1); // return true - masm.jmp(DONE_LABEL); + masm.jmpb(DONE); masm.bind(FALSE_LABEL); masm.xorl(resultReg, resultReg); // return false // That's it - masm.bind(DONE_LABEL); + masm.bind(DONE); %} enc_class enc_pop_rdx() %{ @@ -12074,11 +12356,8 @@ instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{ ins_pipe( fpu_reg_reg ); %} - - // ======================================================================= // fast clearing of an array - instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ match(Set dummy (ClearArray cnt base)); effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); @@ -12092,24 +12371,48 @@ instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlag ins_pipe( pipe_slow ); %} -instruct string_compare(eDIRegP str1, eSIRegP str2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{ +instruct string_compare(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2, + eAXRegI tmp3, eBXRegI tmp4, eCXRegI result, eFlagsReg cr) %{ match(Set result (StrComp str1 str2)); - effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL cr); + effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr); //ins_cost(300); format %{ "String Compare $str1,$str2 -> $result // KILL EAX, EBX" %} - ins_encode( enc_String_Compare() ); + ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, tmp3, tmp4, result) ); + ins_pipe( pipe_slow ); +%} + +// fast string equals +instruct string_equals(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2, + eBXRegI tmp3, eCXRegI tmp4, eAXRegI result, eFlagsReg cr) %{ + match(Set result (StrEquals str1 str2)); + effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr); + + format %{ "String Equals $str1,$str2 -> $result // KILL EBX, ECX" %} + ins_encode( enc_String_Equals(tmp1, tmp2, str1, str2, tmp3, tmp4, result) ); + ins_pipe( pipe_slow ); +%} + +instruct string_indexof(eSIRegP str1, eDIRegP str2, regXD tmp1, eAXRegI tmp2, + eCXRegI tmp3, eDXRegI tmp4, eBXRegI result, eFlagsReg cr) %{ + predicate(UseSSE42Intrinsics); + match(Set result (StrIndexOf str1 str2)); + effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, KILL tmp2, KILL tmp3, KILL tmp4, KILL cr); + + format %{ "String IndexOf $str1,$str2 -> $result // KILL EAX, ECX, EDX" %} + ins_encode( enc_String_IndexOf(str1, str2, tmp1, tmp2, tmp3, tmp4, result) ); ins_pipe( pipe_slow ); %} // fast array equals -instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{ +instruct array_equals(eDIRegP ary1, eSIRegP ary2, regXD tmp1, regXD tmp2, eBXRegI tmp3, + eDXRegI tmp4, eAXRegI result, eFlagsReg cr) %{ match(Set result (AryEq ary1 ary2)); - effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL cr); + effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); //ins_cost(300); - format %{ "Array Equals $ary1,$ary2 -> $result // KILL EAX, EBX" %} - ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result) ); + format %{ "Array Equals $ary1,$ary2 -> $result // KILL EBX, EDX" %} + ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, tmp3, tmp4, result) ); ins_pipe( pipe_slow ); %} diff --git a/hotspot/src/cpu/x86/vm/x86_64.ad b/hotspot/src/cpu/x86/vm/x86_64.ad index ae23fef1114..892b0863128 100644 --- a/hotspot/src/cpu/x86/vm/x86_64.ad +++ b/hotspot/src/cpu/x86/vm/x86_64.ad @@ -3694,13 +3694,16 @@ encode %{ } %} - enc_class enc_String_Compare() - %{ + enc_class enc_String_Compare(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2, + rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result) %{ Label RCX_GOOD_LABEL, LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, CONT_LABEL, WHILE_HEAD_LABEL; MacroAssembler masm(&cbuf); + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); + XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg); + // Get the first character position in both strings // [8] char array, [12] offset, [16] count int value_offset = java_lang_String::value_offset_in_bytes(); @@ -3718,6 +3721,7 @@ encode %{ // Compute the minimum of the string lengths(rsi) and the // difference of the string lengths (stack) + // do the conditional move stuff masm.movl(rdi, Address(rdi, count_offset)); masm.movl(rsi, Address(rsi, count_offset)); masm.movl(rcx, rdi); @@ -3745,7 +3749,7 @@ encode %{ Label LSkip2; // Check if the strings start at same location masm.cmpptr(rbx, rax); - masm.jcc(Assembler::notEqual, LSkip2); + masm.jccb(Assembler::notEqual, LSkip2); // Check if the length difference is zero (from stack) masm.cmpl(Address(rsp, 0), 0x0); @@ -3755,9 +3759,52 @@ encode %{ masm.bind(LSkip2); } + // Advance to next character + masm.addptr(rax, 2); + masm.addptr(rbx, 2); + + if (UseSSE42Intrinsics) { + // With SSE4.2, use double quad vector compare + Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; + // Setup to compare 16-byte vectors + masm.movl(rdi, rsi); + masm.andl(rsi, 0xfffffff8); // rsi holds the vector count + masm.andl(rdi, 0x00000007); // rdi holds the tail count + masm.testl(rsi, rsi); + masm.jccb(Assembler::zero, COMPARE_TAIL); + + masm.lea(rax, Address(rax, rsi, Address::times_2)); + masm.lea(rbx, Address(rbx, rsi, Address::times_2)); + masm.negptr(rsi); + + masm.bind(COMPARE_VECTORS); + masm.movdqu(tmp1Reg, Address(rax, rsi, Address::times_2)); + masm.movdqu(tmp2Reg, Address(rbx, rsi, Address::times_2)); + masm.pxor(tmp1Reg, tmp2Reg); + masm.ptest(tmp1Reg, tmp1Reg); + masm.jccb(Assembler::notZero, VECTOR_NOT_EQUAL); + masm.addptr(rsi, 8); + masm.jcc(Assembler::notZero, COMPARE_VECTORS); + masm.jmpb(COMPARE_TAIL); + + // Mismatched characters in the vectors + masm.bind(VECTOR_NOT_EQUAL); + masm.lea(rax, Address(rax, rsi, Address::times_2)); + masm.lea(rbx, Address(rbx, rsi, Address::times_2)); + masm.movl(rdi, 8); + + // Compare tail (< 8 chars), or rescan last vectors to + // find 1st mismatched characters + masm.bind(COMPARE_TAIL); + masm.testl(rdi, rdi); + masm.jccb(Assembler::zero, LENGTH_DIFF_LABEL); + masm.movl(rsi, rdi); + // Fallthru to tail compare + } + // Shift RAX and RBX to the end of the arrays, negate min - masm.lea(rax, Address(rax, rsi, Address::times_2, 2)); - masm.lea(rbx, Address(rbx, rsi, Address::times_2, 2)); + masm.lea(rax, Address(rax, rsi, Address::times_2, 0)); + masm.lea(rbx, Address(rbx, rsi, Address::times_2, 0)); masm.negptr(rsi); // Compare the rest of the characters @@ -3765,93 +3812,329 @@ encode %{ masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0)); masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0)); masm.subl(rcx, rdi); - masm.jcc(Assembler::notZero, POP_LABEL); + masm.jccb(Assembler::notZero, POP_LABEL); masm.increment(rsi); masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL); // Strings are equal up to min length. Return the length difference. masm.bind(LENGTH_DIFF_LABEL); masm.pop(rcx); - masm.jmp(DONE_LABEL); + masm.jmpb(DONE_LABEL); // Discard the stored length difference masm.bind(POP_LABEL); masm.addptr(rsp, 8); - + // That's it masm.bind(DONE_LABEL); %} - enc_class enc_Array_Equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI tmp1, rbx_RegI tmp2, rcx_RegI result) %{ - Label TRUE_LABEL, FALSE_LABEL, DONE_LABEL, COMPARE_LOOP_HDR, COMPARE_LOOP; + enc_class enc_String_IndexOf(rsi_RegP str1, rdi_RegP str2, regD tmp1, rax_RegI tmp2, + rcx_RegI tmp3, rdx_RegI tmp4, rbx_RegI result) %{ + // SSE4.2 version + Label LOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR, + SCAN_SUBSTR, RET_NEG_ONE, RET_NOT_FOUND, CLEANUP, DONE; MacroAssembler masm(&cbuf); - Register ary1Reg = as_Register($ary1$$reg); - Register ary2Reg = as_Register($ary2$$reg); - Register tmp1Reg = as_Register($tmp1$$reg); - Register tmp2Reg = as_Register($tmp2$$reg); - Register resultReg = as_Register($result$$reg); + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); + + // Get the first character position in both strings + // [8] char array, [12] offset, [16] count + int value_offset = java_lang_String::value_offset_in_bytes(); + int offset_offset = java_lang_String::offset_offset_in_bytes(); + int count_offset = java_lang_String::count_offset_in_bytes(); + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); + + // Get counts for string and substr + masm.movl(rdx, Address(rsi, count_offset)); + masm.movl(rax, Address(rdi, count_offset)); + // Check for substr count > string count + masm.cmpl(rax, rdx); + masm.jcc(Assembler::greater, RET_NEG_ONE); + + // Start the indexOf operation + // Get start addr of string + masm.load_heap_oop(rbx, Address(rsi, value_offset)); + masm.movl(rcx, Address(rsi, offset_offset)); + masm.lea(rsi, Address(rbx, rcx, Address::times_2, base_offset)); + masm.push(rsi); + + // Get start addr of substr + masm.load_heap_oop(rbx, Address(rdi, value_offset)); + masm.movl(rcx, Address(rdi, offset_offset)); + masm.lea(rdi, Address(rbx, rcx, Address::times_2, base_offset)); + masm.push(rdi); + masm.push(rax); + masm.jmpb(PREP_FOR_SCAN); + + // Substr count saved at sp + // Substr saved at sp+8 + // String saved at sp+16 + + // Prep to load substr for scan + masm.bind(LOAD_SUBSTR); + masm.movptr(rdi, Address(rsp, 8)); + masm.movl(rax, Address(rsp, 0)); + + // Load substr + masm.bind(PREP_FOR_SCAN); + masm.movdqu(tmp1Reg, Address(rdi, 0)); + masm.addq(rdx, 8); // prime the loop + masm.subptr(rsi, 16); + + // Scan string for substr in 16-byte vectors + masm.bind(SCAN_TO_SUBSTR); + masm.subq(rdx, 8); + masm.addptr(rsi, 16); + masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d); + masm.jcc(Assembler::above, SCAN_TO_SUBSTR); + masm.jccb(Assembler::aboveEqual, RET_NOT_FOUND); + + // Fallthru: found a potential substr + + //Make sure string is still long enough + masm.subl(rdx, rcx); + masm.cmpl(rdx, rax); + masm.jccb(Assembler::negative, RET_NOT_FOUND); + // Compute start addr of substr + masm.lea(rsi, Address(rsi, rcx, Address::times_2)); + masm.movptr(rbx, rsi); + + // Compare potential substr + masm.addq(rdx, 8); // prime the loop + masm.addq(rax, 8); + masm.subptr(rsi, 16); + masm.subptr(rdi, 16); + + // Scan 16-byte vectors of string and substr + masm.bind(SCAN_SUBSTR); + masm.subq(rax, 8); + masm.subq(rdx, 8); + masm.addptr(rsi, 16); + masm.addptr(rdi, 16); + masm.movdqu(tmp1Reg, Address(rdi, 0)); + masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d); + masm.jcc(Assembler::noOverflow, LOAD_SUBSTR); // OF == 0 + masm.jcc(Assembler::positive, SCAN_SUBSTR); // SF == 0 + + // Compute substr offset + masm.movptr(rsi, Address(rsp, 16)); + masm.subptr(rbx, rsi); + masm.shrl(rbx, 1); + masm.jmpb(CLEANUP); + + masm.bind(RET_NEG_ONE); + masm.movl(rbx, -1); + masm.jmpb(DONE); + + masm.bind(RET_NOT_FOUND); + masm.movl(rbx, -1); + + masm.bind(CLEANUP); + masm.addptr(rsp, 24); + + masm.bind(DONE); + %} + + enc_class enc_String_Equals(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2, + rbx_RegI tmp3, rcx_RegI tmp2, rax_RegI result) %{ + Label RET_TRUE, RET_FALSE, DONE, COMPARE_VECTORS, COMPARE_CHAR; + MacroAssembler masm(&cbuf); + + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); + XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg); + + int value_offset = java_lang_String::value_offset_in_bytes(); + int offset_offset = java_lang_String::offset_offset_in_bytes(); + int count_offset = java_lang_String::count_offset_in_bytes(); + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); + + // does source == target string? + masm.cmpptr(rdi, rsi); + masm.jcc(Assembler::equal, RET_TRUE); + + // get and compare counts + masm.movl(rcx, Address(rdi, count_offset)); + masm.movl(rax, Address(rsi, count_offset)); + masm.cmpl(rcx, rax); + masm.jcc(Assembler::notEqual, RET_FALSE); + masm.testl(rax, rax); + masm.jcc(Assembler::zero, RET_TRUE); + + // get source string offset and value + masm.load_heap_oop(rbx, Address(rsi, value_offset)); + masm.movl(rax, Address(rsi, offset_offset)); + masm.lea(rsi, Address(rbx, rax, Address::times_2, base_offset)); + + // get compare string offset and value + masm.load_heap_oop(rbx, Address(rdi, value_offset)); + masm.movl(rax, Address(rdi, offset_offset)); + masm.lea(rdi, Address(rbx, rax, Address::times_2, base_offset)); + + // Set byte count + masm.shll(rcx, 1); + masm.movl(rax, rcx); + + if (UseSSE42Intrinsics) { + // With SSE4.2, use double quad vector compare + Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; + // Compare 16-byte vectors + masm.andl(rcx, 0xfffffff0); // vector count (in bytes) + masm.andl(rax, 0x0000000e); // tail count (in bytes) + masm.testl(rcx, rcx); + masm.jccb(Assembler::zero, COMPARE_TAIL); + masm.lea(rdi, Address(rdi, rcx, Address::times_1)); + masm.lea(rsi, Address(rsi, rcx, Address::times_1)); + masm.negptr(rcx); + + masm.bind(COMPARE_WIDE_VECTORS); + masm.movdqu(tmp1Reg, Address(rdi, rcx, Address::times_1)); + masm.movdqu(tmp2Reg, Address(rsi, rcx, Address::times_1)); + masm.pxor(tmp1Reg, tmp2Reg); + masm.ptest(tmp1Reg, tmp1Reg); + masm.jccb(Assembler::notZero, RET_FALSE); + masm.addptr(rcx, 16); + masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); + masm.bind(COMPARE_TAIL); + masm.movl(rcx, rax); + // Fallthru to tail compare + } + + // Compare 4-byte vectors + masm.andl(rcx, 0xfffffffc); // vector count (in bytes) + masm.andl(rax, 0x00000002); // tail char (in bytes) + masm.testl(rcx, rcx); + masm.jccb(Assembler::zero, COMPARE_CHAR); + masm.lea(rdi, Address(rdi, rcx, Address::times_1)); + masm.lea(rsi, Address(rsi, rcx, Address::times_1)); + masm.negptr(rcx); + + masm.bind(COMPARE_VECTORS); + masm.movl(rbx, Address(rdi, rcx, Address::times_1)); + masm.cmpl(rbx, Address(rsi, rcx, Address::times_1)); + masm.jccb(Assembler::notEqual, RET_FALSE); + masm.addptr(rcx, 4); + masm.jcc(Assembler::notZero, COMPARE_VECTORS); + + // Compare trailing char (final 2 bytes), if any + masm.bind(COMPARE_CHAR); + masm.testl(rax, rax); + masm.jccb(Assembler::zero, RET_TRUE); + masm.load_unsigned_short(rbx, Address(rdi, 0)); + masm.load_unsigned_short(rcx, Address(rsi, 0)); + masm.cmpl(rbx, rcx); + masm.jccb(Assembler::notEqual, RET_FALSE); + + masm.bind(RET_TRUE); + masm.movl(rax, 1); // return true + masm.jmpb(DONE); + + masm.bind(RET_FALSE); + masm.xorl(rax, rax); // return false + + masm.bind(DONE); + %} + + enc_class enc_Array_Equals(rdi_RegP ary1, rsi_RegP ary2, regD tmp1, regD tmp2, + rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result) %{ + Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; + MacroAssembler masm(&cbuf); + + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); + XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg); + Register ary1Reg = as_Register($ary1$$reg); + Register ary2Reg = as_Register($ary2$$reg); + Register tmp3Reg = as_Register($tmp3$$reg); + Register tmp4Reg = as_Register($tmp4$$reg); + Register resultReg = as_Register($result$$reg); int length_offset = arrayOopDesc::length_offset_in_bytes(); int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); // Check the input args - masm.cmpq(ary1Reg, ary2Reg); + masm.cmpq(ary1Reg, ary2Reg); masm.jcc(Assembler::equal, TRUE_LABEL); - masm.testq(ary1Reg, ary1Reg); + masm.testq(ary1Reg, ary1Reg); masm.jcc(Assembler::zero, FALSE_LABEL); - masm.testq(ary2Reg, ary2Reg); + masm.testq(ary2Reg, ary2Reg); masm.jcc(Assembler::zero, FALSE_LABEL); // Check the lengths - masm.movl(tmp2Reg, Address(ary1Reg, length_offset)); + masm.movl(tmp4Reg, Address(ary1Reg, length_offset)); masm.movl(resultReg, Address(ary2Reg, length_offset)); - masm.cmpl(tmp2Reg, resultReg); + masm.cmpl(tmp4Reg, resultReg); masm.jcc(Assembler::notEqual, FALSE_LABEL); masm.testl(resultReg, resultReg); masm.jcc(Assembler::zero, TRUE_LABEL); - // Get the number of 4 byte vectors to compare - masm.shrl(resultReg, 1); + //load array address + masm.lea(ary1Reg, Address(ary1Reg, base_offset)); + masm.lea(ary2Reg, Address(ary2Reg, base_offset)); - // Check for odd-length arrays - masm.andl(tmp2Reg, 1); - masm.testl(tmp2Reg, tmp2Reg); - masm.jcc(Assembler::zero, COMPARE_LOOP_HDR); + //set byte count + masm.shll(tmp4Reg, 1); + masm.movl(resultReg,tmp4Reg); - // Compare 2-byte "tail" at end of arrays - masm.load_unsigned_short(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset)); - masm.load_unsigned_short(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset)); - masm.cmpl(tmp1Reg, tmp2Reg); - masm.jcc(Assembler::notEqual, FALSE_LABEL); + if (UseSSE42Intrinsics){ + // With SSE4.2, use double quad vector compare + Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; + // Compare 16-byte vectors + masm.andl(tmp4Reg, 0xfffffff0); // vector count (in bytes) + masm.andl(resultReg, 0x0000000e); // tail count (in bytes) + masm.testl(tmp4Reg, tmp4Reg); + masm.jccb(Assembler::zero, COMPARE_TAIL); + masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); + masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); + masm.negptr(tmp4Reg); + + masm.bind(COMPARE_WIDE_VECTORS); + masm.movdqu(tmp1Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); + masm.movdqu(tmp2Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); + masm.pxor(tmp1Reg, tmp2Reg); + masm.ptest(tmp1Reg, tmp1Reg); + + masm.jccb(Assembler::notZero, FALSE_LABEL); + masm.addptr(tmp4Reg, 16); + masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); + masm.bind(COMPARE_TAIL); + masm.movl(tmp4Reg, resultReg); + // Fallthru to tail compare + } + + // Compare 4-byte vectors + masm.andl(tmp4Reg, 0xfffffffc); // vector count (in bytes) + masm.andl(resultReg, 0x00000002); // tail char (in bytes) + masm.testl(tmp4Reg, tmp4Reg); //if tmp2 == 0, only compare char + masm.jccb(Assembler::zero, COMPARE_CHAR); + masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); + masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); + masm.negptr(tmp4Reg); + + masm.bind(COMPARE_VECTORS); + masm.movl(tmp3Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); + masm.cmpl(tmp3Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); + masm.jccb(Assembler::notEqual, FALSE_LABEL); + masm.addptr(tmp4Reg, 4); + masm.jcc(Assembler::notZero, COMPARE_VECTORS); + + // Compare trailing char (final 2 bytes), if any + masm.bind(COMPARE_CHAR); masm.testl(resultReg, resultReg); - masm.jcc(Assembler::zero, TRUE_LABEL); - - // Setup compare loop - masm.bind(COMPARE_LOOP_HDR); - // Shift tmp1Reg and tmp2Reg to the last 4-byte boundary of the arrays - masm.leaq(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset)); - masm.leaq(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset)); - masm.negq(resultReg); - - // 4-byte-wide compare loop - masm.bind(COMPARE_LOOP); - masm.movl(ary1Reg, Address(tmp1Reg, resultReg, Address::times_4, 0)); - masm.movl(ary2Reg, Address(tmp2Reg, resultReg, Address::times_4, 0)); - masm.cmpl(ary1Reg, ary2Reg); - masm.jcc(Assembler::notEqual, FALSE_LABEL); - masm.incrementq(resultReg); - masm.jcc(Assembler::notZero, COMPARE_LOOP); + masm.jccb(Assembler::zero, TRUE_LABEL); + masm.load_unsigned_short(tmp3Reg, Address(ary1Reg, 0)); + masm.load_unsigned_short(tmp4Reg, Address(ary2Reg, 0)); + masm.cmpl(tmp3Reg, tmp4Reg); + masm.jccb(Assembler::notEqual, FALSE_LABEL); masm.bind(TRUE_LABEL); masm.movl(resultReg, 1); // return true - masm.jmp(DONE_LABEL); + masm.jmpb(DONE); masm.bind(FALSE_LABEL); masm.xorl(resultReg, resultReg); // return false // That's it - masm.bind(DONE_LABEL); + masm.bind(DONE); %} enc_class enc_rethrow() @@ -5087,7 +5370,7 @@ operand regF() %} // Double register operands -operand regD() +operand regD() %{ constraint(ALLOC_IN_RC(double_reg)); match(RegD); @@ -11540,27 +11823,52 @@ instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy, ins_pipe(pipe_slow); %} -instruct string_compare(rdi_RegP str1, rsi_RegP str2, rax_RegI tmp1, - rbx_RegI tmp2, rcx_RegI result, rFlagsReg cr) +instruct string_compare(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2, + rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result, rFlagsReg cr) %{ match(Set result (StrComp str1 str2)); - effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL cr); + effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr); //ins_cost(300); format %{ "String Compare $str1, $str2 -> $result // XXX KILL RAX, RBX" %} - ins_encode( enc_String_Compare() ); + ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, tmp3, tmp4, result) ); + ins_pipe( pipe_slow ); +%} + +instruct string_indexof(rsi_RegP str1, rdi_RegP str2, regD tmp1, rax_RegI tmp2, + rcx_RegI tmp3, rdx_RegI tmp4, rbx_RegI result, rFlagsReg cr) +%{ + predicate(UseSSE42Intrinsics); + match(Set result (StrIndexOf str1 str2)); + effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, KILL tmp2, KILL tmp3, KILL tmp4, KILL cr); + + format %{ "String IndexOf $str1,$str2 -> $result // KILL RAX, RCX, RDX" %} + ins_encode( enc_String_IndexOf(str1, str2, tmp1, tmp2, tmp3, tmp4, result) ); + ins_pipe( pipe_slow ); +%} + +// fast string equals +instruct string_equals(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2, rbx_RegI tmp3, + rcx_RegI tmp4, rax_RegI result, rFlagsReg cr) +%{ + match(Set result (StrEquals str1 str2)); + effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr); + + format %{ "String Equals $str1,$str2 -> $result // KILL RBX, RCX" %} + ins_encode( enc_String_Equals(str1, str2, tmp1, tmp2, tmp3, tmp4, result) ); ins_pipe( pipe_slow ); %} // fast array equals -instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI tmp1, - rbx_RegI tmp2, rcx_RegI result, rFlagsReg cr) %{ +instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, regD tmp1, regD tmp2, rax_RegI tmp3, + rbx_RegI tmp4, rcx_RegI result, rFlagsReg cr) +%{ match(Set result (AryEq ary1 ary2)); - effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL cr); + effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); //ins_cost(300); - format %{ "Array Equals $ary1,$ary2 -> $result // KILL RAX, RBX" %} - ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result) ); + format %{ "Array Equals $ary1,$ary2 -> $result // KILL RAX, RBX" %} + ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, tmp3, tmp4, result) ); ins_pipe( pipe_slow ); %} diff --git a/hotspot/src/share/vm/adlc/formssel.cpp b/hotspot/src/share/vm/adlc/formssel.cpp index c573511a66e..1f80e2d0e16 100644 --- a/hotspot/src/share/vm/adlc/formssel.cpp +++ b/hotspot/src/share/vm/adlc/formssel.cpp @@ -574,9 +574,13 @@ bool InstructForm::needs_anti_dependence_check(FormDict &globals) const { // TEMPORARY // if( is_simple_chain_rule(globals) ) return false; - // String-compare uses many memorys edges, but writes none + // String.(compareTo/equals/indexOf) and Arrays.equals use many memorys edges, + // but writes none if( _matrule && _matrule->_rChild && - strcmp(_matrule->_rChild->_opType,"StrComp")==0 ) + ( strcmp(_matrule->_rChild->_opType,"StrComp" )==0 || + strcmp(_matrule->_rChild->_opType,"StrEquals" )==0 || + strcmp(_matrule->_rChild->_opType,"StrIndexOf" )==0 || + strcmp(_matrule->_rChild->_opType,"AryEq" )==0 )) return true; // Check if instruction has a USE of a memory operand class, but no defs @@ -815,8 +819,10 @@ uint InstructForm::oper_input_base(FormDict &globals) { return AdlcVMDeps::Parms; // Skip the machine-state edges if( _matrule->_rChild && - strcmp(_matrule->_rChild->_opType,"StrComp")==0 ) { - // String compare takes 1 control and 4 memory edges. + ( strcmp(_matrule->_rChild->_opType,"StrComp" )==0 || + strcmp(_matrule->_rChild->_opType,"StrEquals" )==0 || + strcmp(_matrule->_rChild->_opType,"StrIndexOf")==0 )) { + // String.(compareTo/equals/indexOf) take 1 control and 4 memory edges. return 5; } diff --git a/hotspot/src/share/vm/classfile/vmSymbols.hpp b/hotspot/src/share/vm/classfile/vmSymbols.hpp index cbf24f4bc89..fc0601aee3a 100644 --- a/hotspot/src/share/vm/classfile/vmSymbols.hpp +++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp @@ -288,6 +288,7 @@ template(stringCacheEnabled_name, "stringCacheEnabled") \ template(bitCount_name, "bitCount") \ template(profile_name, "profile") \ + template(equals_name, "equals") \ \ /* non-intrinsic name/signature pairs: */ \ template(register_method_name, "register") \ @@ -579,7 +580,6 @@ do_signature(copyOfRange_signature, "([Ljava/lang/Object;IILjava/lang/Class;)[Ljava/lang/Object;") \ \ do_intrinsic(_equalsC, java_util_Arrays, equals_name, equalsC_signature, F_S) \ - do_name( equals_name, "equals") \ do_signature(equalsC_signature, "([C[C)Z") \ \ do_intrinsic(_invoke, java_lang_reflect_Method, invoke_name, object_array_object_object_signature, F_R) \ @@ -589,6 +589,7 @@ do_name( compareTo_name, "compareTo") \ do_intrinsic(_indexOf, java_lang_String, indexOf_name, string_int_signature, F_R) \ do_name( indexOf_name, "indexOf") \ + do_intrinsic(_equals, java_lang_String, equals_name, object_boolean_signature, F_R) \ \ do_class(java_nio_Buffer, "java/nio/Buffer") \ do_intrinsic(_checkIndex, java_nio_Buffer, checkIndex_name, int_int_signature, F_R) \ diff --git a/hotspot/src/share/vm/opto/classes.hpp b/hotspot/src/share/vm/opto/classes.hpp index 87adb737cb7..b854447d75d 100644 --- a/hotspot/src/share/vm/opto/classes.hpp +++ b/hotspot/src/share/vm/opto/classes.hpp @@ -218,6 +218,8 @@ macro(StoreL) macro(StoreP) macro(StoreN) macro(StrComp) +macro(StrEquals) +macro(StrIndexOf) macro(SubD) macro(SubF) macro(SubI) diff --git a/hotspot/src/share/vm/opto/gcm.cpp b/hotspot/src/share/vm/opto/gcm.cpp index df648455117..92d5371153d 100644 --- a/hotspot/src/share/vm/opto/gcm.cpp +++ b/hotspot/src/share/vm/opto/gcm.cpp @@ -438,6 +438,12 @@ Block* PhaseCFG::insert_anti_dependences(Block* LCA, Node* load, bool verify) { #endif assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrComp), "String compare is only known 'load' that does not conflict with any stores"); + assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrEquals), + "String equals is a 'load' that does not conflict with any stores"); + assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrIndexOf), + "String indexOf is a 'load' that does not conflict with any stores"); + assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_AryEq), + "Arrays equals is a 'load' that do not conflict with any stores"); if (!C->alias_type(load_alias_idx)->is_rewritable()) { // It is impossible to spoil this load by putting stores before it, diff --git a/hotspot/src/share/vm/opto/lcm.cpp b/hotspot/src/share/vm/opto/lcm.cpp index 4c83f0af66d..31de55a5435 100644 --- a/hotspot/src/share/vm/opto/lcm.cpp +++ b/hotspot/src/share/vm/opto/lcm.cpp @@ -137,6 +137,8 @@ void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowe if( mach->in(2) != val ) continue; break; // Found a memory op? case Op_StrComp: + case Op_StrEquals: + case Op_StrIndexOf: case Op_AryEq: // Not a legit memory op for implicit null check regardless of // embedded loads diff --git a/hotspot/src/share/vm/opto/library_call.cpp b/hotspot/src/share/vm/opto/library_call.cpp index b5b7136a375..a57333fd5a0 100644 --- a/hotspot/src/share/vm/opto/library_call.cpp +++ b/hotspot/src/share/vm/opto/library_call.cpp @@ -136,6 +136,7 @@ class LibraryCallKit : public GraphKit { bool inline_string_compareTo(); bool inline_string_indexOf(); Node* string_indexOf(Node* string_object, ciTypeArray* target_array, jint offset, jint cache_i, jint md2_i); + bool inline_string_equals(); Node* pop_math_arg(); bool runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName); bool inline_math_native(vmIntrinsics::ID id); @@ -261,6 +262,7 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) { switch (id) { case vmIntrinsics::_indexOf: case vmIntrinsics::_compareTo: + case vmIntrinsics::_equals: case vmIntrinsics::_equalsC: break; // InlineNatives does not control String.compareTo default: @@ -275,6 +277,9 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) { case vmIntrinsics::_indexOf: if (!SpecialStringIndexOf) return NULL; break; + case vmIntrinsics::_equals: + if (!SpecialStringEquals) return NULL; + break; case vmIntrinsics::_equalsC: if (!SpecialArraysEquals) return NULL; break; @@ -442,6 +447,8 @@ bool LibraryCallKit::try_to_inline() { return inline_string_compareTo(); case vmIntrinsics::_indexOf: return inline_string_indexOf(); + case vmIntrinsics::_equals: + return inline_string_equals(); case vmIntrinsics::_getObject: return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, false); @@ -793,6 +800,8 @@ Node* LibraryCallKit::generate_current_thread(Node* &tls_output) { //------------------------------inline_string_compareTo------------------------ bool LibraryCallKit::inline_string_compareTo() { + if (!Matcher::has_match_rule(Op_StrComp)) return false; + const int value_offset = java_lang_String::value_offset_in_bytes(); const int count_offset = java_lang_String::count_offset_in_bytes(); const int offset_offset = java_lang_String::offset_offset_in_bytes(); @@ -830,6 +839,82 @@ bool LibraryCallKit::inline_string_compareTo() { return true; } +//------------------------------inline_string_equals------------------------ +bool LibraryCallKit::inline_string_equals() { + + if (!Matcher::has_match_rule(Op_StrEquals)) return false; + + const int value_offset = java_lang_String::value_offset_in_bytes(); + const int count_offset = java_lang_String::count_offset_in_bytes(); + const int offset_offset = java_lang_String::offset_offset_in_bytes(); + + _sp += 2; + Node* argument = pop(); // pop non-receiver first: it was pushed second + Node* receiver = pop(); + + // Null check on self without removing any arguments. The argument + // null check technically happens in the wrong place, which can lead to + // invalid stack traces when string compare is inlined into a method + // which handles NullPointerExceptions. + _sp += 2; + receiver = do_null_check(receiver, T_OBJECT); + //should not do null check for argument for String.equals(), because spec + //allows to specify NULL as argument. + _sp -= 2; + + if (stopped()) { + return true; + } + + // get String klass for instanceOf + ciInstanceKlass* klass = env()->String_klass(); + + // two paths (plus control) merge + RegionNode* region = new (C, 3) RegionNode(3); + Node* phi = new (C, 3) PhiNode(region, TypeInt::BOOL); + + Node* inst = gen_instanceof(argument, makecon(TypeKlassPtr::make(klass))); + Node* cmp = _gvn.transform(new (C, 3) CmpINode(inst, intcon(1))); + Node* bol = _gvn.transform(new (C, 2) BoolNode(cmp, BoolTest::eq)); + + IfNode* iff = create_and_map_if(control(), bol, PROB_MAX, COUNT_UNKNOWN); + + Node* if_true = _gvn.transform(new (C, 1) IfTrueNode(iff)); + set_control(if_true); + + const TypeInstPtr* string_type = + TypeInstPtr::make(TypePtr::BotPTR, klass, false, NULL, 0); + + // instanceOf == true + Node* equals = + _gvn.transform(new (C, 7) StrEqualsNode( + control(), + memory(TypeAryPtr::CHARS), + memory(string_type->add_offset(value_offset)), + memory(string_type->add_offset(count_offset)), + memory(string_type->add_offset(offset_offset)), + receiver, + argument)); + + phi->init_req(1, _gvn.transform(equals)); + region->init_req(1, if_true); + + //instanceOf == false, fallthrough + Node* if_false = _gvn.transform(new (C, 1) IfFalseNode(iff)); + set_control(if_false); + + phi->init_req(2, _gvn.transform(intcon(0))); + region->init_req(2, if_false); + + // post merge + set_control(_gvn.transform(region)); + record_for_igvn(region); + + push(_gvn.transform(phi)); + + return true; +} + //------------------------------inline_array_equals---------------------------- bool LibraryCallKit::inline_array_equals() { @@ -994,80 +1079,115 @@ Node* LibraryCallKit::string_indexOf(Node* string_object, ciTypeArray* target_ar return result; } - //------------------------------inline_string_indexOf------------------------ bool LibraryCallKit::inline_string_indexOf() { - _sp += 2; - Node *argument = pop(); // pop non-receiver first: it was pushed second - Node *receiver = pop(); - - // don't intrinsify if argument isn't a constant string. - if (!argument->is_Con()) { - return false; - } - const TypeOopPtr* str_type = _gvn.type(argument)->isa_oopptr(); - if (str_type == NULL) { - return false; - } - ciInstanceKlass* klass = env()->String_klass(); - ciObject* str_const = str_type->const_oop(); - if (str_const == NULL || str_const->klass() != klass) { - return false; - } - ciInstance* str = str_const->as_instance(); - assert(str != NULL, "must be instance"); - const int value_offset = java_lang_String::value_offset_in_bytes(); const int count_offset = java_lang_String::count_offset_in_bytes(); const int offset_offset = java_lang_String::offset_offset_in_bytes(); - ciObject* v = str->field_value_by_offset(value_offset).as_object(); - int o = str->field_value_by_offset(offset_offset).as_int(); - int c = str->field_value_by_offset(count_offset).as_int(); - ciTypeArray* pat = v->as_type_array(); // pattern (argument) character array - - // constant strings have no offset and count == length which - // simplifies the resulting code somewhat so lets optimize for that. - if (o != 0 || c != pat->length()) { - return false; - } - - // Null check on self without removing any arguments. The argument - // null check technically happens in the wrong place, which can lead to - // invalid stack traces when string compare is inlined into a method - // which handles NullPointerExceptions. _sp += 2; - receiver = do_null_check(receiver, T_OBJECT); - // No null check on the argument is needed since it's a constant String oop. - _sp -= 2; - if (stopped()) { - return true; - } + Node *argument = pop(); // pop non-receiver first: it was pushed second + Node *receiver = pop(); - // The null string as a pattern always returns 0 (match at beginning of string) - if (c == 0) { - push(intcon(0)); - return true; - } + Node* result; + if (Matcher::has_match_rule(Op_StrIndexOf) && + UseSSE42Intrinsics) { + // Generate SSE4.2 version of indexOf + // We currently only have match rules that use SSE4.2 - jchar lastChar = pat->char_at(o + (c - 1)); - int cache = 0; - int i; - for (i = 0; i < c - 1; i++) { - assert(i < pat->length(), "out of range"); - cache |= (1 << (pat->char_at(o + i) & (sizeof(cache) * BitsPerByte - 1))); - } + // Null check on self without removing any arguments. The argument + // null check technically happens in the wrong place, which can lead to + // invalid stack traces when string compare is inlined into a method + // which handles NullPointerExceptions. + _sp += 2; + receiver = do_null_check(receiver, T_OBJECT); + argument = do_null_check(argument, T_OBJECT); + _sp -= 2; - int md2 = c; - for (i = 0; i < c - 1; i++) { - assert(i < pat->length(), "out of range"); - if (pat->char_at(o + i) == lastChar) { - md2 = (c - 1) - i; + if (stopped()) { + return true; } + + ciInstanceKlass* klass = env()->String_klass(); + const TypeInstPtr* string_type = + TypeInstPtr::make(TypePtr::BotPTR, klass, false, NULL, 0); + + result = + _gvn.transform(new (C, 7) + StrIndexOfNode(control(), + memory(TypeAryPtr::CHARS), + memory(string_type->add_offset(value_offset)), + memory(string_type->add_offset(count_offset)), + memory(string_type->add_offset(offset_offset)), + receiver, + argument)); + } else { //Use LibraryCallKit::string_indexOf + // don't intrinsify is argument isn't a constant string. + if (!argument->is_Con()) { + return false; + } + const TypeOopPtr* str_type = _gvn.type(argument)->isa_oopptr(); + if (str_type == NULL) { + return false; + } + ciInstanceKlass* klass = env()->String_klass(); + ciObject* str_const = str_type->const_oop(); + if (str_const == NULL || str_const->klass() != klass) { + return false; + } + ciInstance* str = str_const->as_instance(); + assert(str != NULL, "must be instance"); + + ciObject* v = str->field_value_by_offset(value_offset).as_object(); + int o = str->field_value_by_offset(offset_offset).as_int(); + int c = str->field_value_by_offset(count_offset).as_int(); + ciTypeArray* pat = v->as_type_array(); // pattern (argument) character array + + // constant strings have no offset and count == length which + // simplifies the resulting code somewhat so lets optimize for that. + if (o != 0 || c != pat->length()) { + return false; + } + + // Null check on self without removing any arguments. The argument + // null check technically happens in the wrong place, which can lead to + // invalid stack traces when string compare is inlined into a method + // which handles NullPointerExceptions. + _sp += 2; + receiver = do_null_check(receiver, T_OBJECT); + // No null check on the argument is needed since it's a constant String oop. + _sp -= 2; + if (stopped()) { + return true; + } + + // The null string as a pattern always returns 0 (match at beginning of string) + if (c == 0) { + push(intcon(0)); + return true; + } + + // Generate default indexOf + jchar lastChar = pat->char_at(o + (c - 1)); + int cache = 0; + int i; + for (i = 0; i < c - 1; i++) { + assert(i < pat->length(), "out of range"); + cache |= (1 << (pat->char_at(o + i) & (sizeof(cache) * BitsPerByte - 1))); + } + + int md2 = c; + for (i = 0; i < c - 1; i++) { + assert(i < pat->length(), "out of range"); + if (pat->char_at(o + i) == lastChar) { + md2 = (c - 1) - i; + } + } + + result = string_indexOf(receiver, pat, o, cache, md2); } - Node* result = string_indexOf(receiver, pat, o, cache, md2); push(result); return true; } diff --git a/hotspot/src/share/vm/opto/loopnode.cpp b/hotspot/src/share/vm/opto/loopnode.cpp index bb372e0d3f6..a36d0a534fc 100644 --- a/hotspot/src/share/vm/opto/loopnode.cpp +++ b/hotspot/src/share/vm/opto/loopnode.cpp @@ -2668,6 +2668,8 @@ void PhaseIdealLoop::build_loop_late_post( Node *n, const PhaseIdealLoop *verify case Op_LoadD_unaligned: case Op_LoadL_unaligned: case Op_StrComp: // Does a bunch of load-like effects + case Op_StrEquals: + case Op_StrIndexOf: case Op_AryEq: pinned = false; } diff --git a/hotspot/src/share/vm/opto/matcher.cpp b/hotspot/src/share/vm/opto/matcher.cpp index 100f79fd9e1..e230480b22a 100644 --- a/hotspot/src/share/vm/opto/matcher.cpp +++ b/hotspot/src/share/vm/opto/matcher.cpp @@ -746,6 +746,8 @@ static void match_alias_type(Compile* C, Node* n, Node* m) { if (nidx == Compile::AliasIdxBot && midx == Compile::AliasIdxTop) { switch (n->Opcode()) { case Op_StrComp: + case Op_StrEquals: + case Op_StrIndexOf: case Op_AryEq: case Op_MemBarVolatile: case Op_MemBarCPUOrder: // %%% these ideals should have narrower adr_type? @@ -1788,6 +1790,8 @@ void Matcher::find_shared( Node *n ) { mstack.push(n->in(0), Pre_Visit); // Visit Control input continue; // while (mstack.is_nonempty()) case Op_StrComp: + case Op_StrEquals: + case Op_StrIndexOf: case Op_AryEq: set_shared(n); // Force result into register (it will be anyways) break; diff --git a/hotspot/src/share/vm/opto/memnode.cpp b/hotspot/src/share/vm/opto/memnode.cpp index 570e813e2fa..b2af60d27dc 100644 --- a/hotspot/src/share/vm/opto/memnode.cpp +++ b/hotspot/src/share/vm/opto/memnode.cpp @@ -2481,6 +2481,31 @@ Node *StrCompNode::Ideal(PhaseGVN *phase, bool can_reshape){ return remove_dead_region(phase, can_reshape) ? this : NULL; } +// Do we match on this edge? No memory edges +uint StrEqualsNode::match_edge(uint idx) const { + return idx == 5 || idx == 6; +} + +//------------------------------Ideal------------------------------------------ +// Return a node which is more "ideal" than the current node. Strip out +// control copies +Node *StrEqualsNode::Ideal(PhaseGVN *phase, bool can_reshape){ + return remove_dead_region(phase, can_reshape) ? this : NULL; +} + +//============================================================================= +// Do we match on this edge? No memory edges +uint StrIndexOfNode::match_edge(uint idx) const { + return idx == 5 || idx == 6; +} + +//------------------------------Ideal------------------------------------------ +// Return a node which is more "ideal" than the current node. Strip out +// control copies +Node *StrIndexOfNode::Ideal(PhaseGVN *phase, bool can_reshape){ + return remove_dead_region(phase, can_reshape) ? this : NULL; +} + //------------------------------Ideal------------------------------------------ // Return a node which is more "ideal" than the current node. Strip out // control copies @@ -2488,7 +2513,6 @@ Node *AryEqNode::Ideal(PhaseGVN *phase, bool can_reshape){ return remove_dead_region(phase, can_reshape) ? this : NULL; } - //============================================================================= MemBarNode::MemBarNode(Compile* C, int alias_idx, Node* precedent) : MultiNode(TypeFunc::Parms + (precedent == NULL? 0: 1)), diff --git a/hotspot/src/share/vm/opto/memnode.hpp b/hotspot/src/share/vm/opto/memnode.hpp index e318f3079f6..1d4f499da13 100644 --- a/hotspot/src/share/vm/opto/memnode.hpp +++ b/hotspot/src/share/vm/opto/memnode.hpp @@ -765,6 +765,54 @@ public: virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); }; +//------------------------------StrEquals------------------------------------- +class StrEqualsNode: public Node { +public: + StrEqualsNode(Node *control, + Node* char_array_mem, + Node* value_mem, + Node* count_mem, + Node* offset_mem, + Node* s1, Node* s2): Node(control, + char_array_mem, + value_mem, + count_mem, + offset_mem, + s1, s2) {}; + virtual int Opcode() const; + virtual bool depends_only_on_test() const { return false; } + virtual const Type* bottom_type() const { return TypeInt::BOOL; } + // a StrEqualsNode (conservatively) aliases with everything: + virtual const TypePtr* adr_type() const { return TypePtr::BOTTOM; } + virtual uint match_edge(uint idx) const; + virtual uint ideal_reg() const { return Op_RegI; } + virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); +}; + +//------------------------------StrIndexOf------------------------------------- +class StrIndexOfNode: public Node { +public: + StrIndexOfNode(Node *control, + Node* char_array_mem, + Node* value_mem, + Node* count_mem, + Node* offset_mem, + Node* s1, Node* s2): Node(control, + char_array_mem, + value_mem, + count_mem, + offset_mem, + s1, s2) {}; + virtual int Opcode() const; + virtual bool depends_only_on_test() const { return false; } + virtual const Type* bottom_type() const { return TypeInt::INT; } + // a StrIndexOfNode (conservatively) aliases with everything: + virtual const TypePtr* adr_type() const { return TypePtr::BOTTOM; } + virtual uint match_edge(uint idx) const; + virtual uint ideal_reg() const { return Op_RegI; } + virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); +}; + //------------------------------AryEq--------------------------------------- class AryEqNode: public Node { public: diff --git a/hotspot/src/share/vm/runtime/arguments.cpp b/hotspot/src/share/vm/runtime/arguments.cpp index 05a84d581e9..567e6073e28 100644 --- a/hotspot/src/share/vm/runtime/arguments.cpp +++ b/hotspot/src/share/vm/runtime/arguments.cpp @@ -1366,9 +1366,6 @@ void Arguments::set_aggressive_opts_flags() { if (AggressiveOpts && FLAG_IS_DEFAULT(DoEscapeAnalysis)) { FLAG_SET_DEFAULT(DoEscapeAnalysis, true); } - if (AggressiveOpts && FLAG_IS_DEFAULT(SpecialArraysEquals)) { - FLAG_SET_DEFAULT(SpecialArraysEquals, true); - } if (AggressiveOpts && FLAG_IS_DEFAULT(BiasedLockingStartupDelay)) { FLAG_SET_DEFAULT(BiasedLockingStartupDelay, 500); } diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp index 9af4074d76f..fc85089e624 100644 --- a/hotspot/src/share/vm/runtime/globals.hpp +++ b/hotspot/src/share/vm/runtime/globals.hpp @@ -491,9 +491,15 @@ class CommandLineFlags { develop(bool, SpecialStringIndexOf, true, \ "special version of string indexOf") \ \ - product(bool, SpecialArraysEquals, false, \ + develop(bool, SpecialStringEquals, true, \ + "special version of string equals") \ + \ + develop(bool, SpecialArraysEquals, true, \ "special version of Arrays.equals(char[],char[])") \ \ + product(bool, UseSSE42Intrinsics, false, \ + "SSE4.2 versions of intrinsics") \ + \ develop(bool, TraceCallFixup, false, \ "traces all call fixups") \ \ From b857081608c1f9cf158cf253143fb704acb106a5 Mon Sep 17 00:00:00 2001 From: Tom Rodriguez Date: Tue, 31 Mar 2009 15:09:45 -0700 Subject: [PATCH 2/3] 6824463: deopt blob is testing wrong register on 64-bit x86 Reviewed-by: jrose, phh, kvn --- hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp index 0de47d37fe3..57fa4a480ca 100644 --- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp @@ -2691,7 +2691,7 @@ void SharedRuntime::generate_deopt_blob() { __ mov(rdi, rax); Label noException; - __ cmpl(r12, Deoptimization::Unpack_exception); // Was exception pending? + __ cmpl(r14, Deoptimization::Unpack_exception); // Was exception pending? __ jcc(Assembler::notEqual, noException); __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); // QQQ this is useless it was NULL above From 4d34a77121f9d9869b08416bb3c63afd84a365a7 Mon Sep 17 00:00:00 2001 From: Tom Rodriguez Date: Wed, 1 Apr 2009 11:45:01 -0700 Subject: [PATCH 3/3] 6823454: Oop-typed loadP yields invalid pointer (0x1) on SPECjbb2005 at OSRed method entry Reviewed-by: kvn, jrose --- hotspot/src/share/vm/opto/parse1.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hotspot/src/share/vm/opto/parse1.cpp b/hotspot/src/share/vm/opto/parse1.cpp index 12b75fb327f..da9537eb3c4 100644 --- a/hotspot/src/share/vm/opto/parse1.cpp +++ b/hotspot/src/share/vm/opto/parse1.cpp @@ -95,7 +95,7 @@ Node *Parse::fetch_interpreter_state(int index, switch( bt ) { // Signature is flattened case T_INT: l = new (C, 3) LoadINode( 0, mem, adr, TypeRawPtr::BOTTOM ); break; case T_FLOAT: l = new (C, 3) LoadFNode( 0, mem, adr, TypeRawPtr::BOTTOM ); break; - case T_ADDRESS: + case T_ADDRESS: l = new (C, 3) LoadPNode( 0, mem, adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM ); break; case T_OBJECT: l = new (C, 3) LoadPNode( 0, mem, adr, TypeRawPtr::BOTTOM, TypeInstPtr::BOTTOM ); break; case T_LONG: case T_DOUBLE: {