diff --git a/.hgtags-top-repo b/.hgtags-top-repo index af2cc8250c7..a531df300ad 100644 --- a/.hgtags-top-repo +++ b/.hgtags-top-repo @@ -142,3 +142,4 @@ a4f28069d44a379cda99dd1d921d19f819726d22 jdk8-b15 7010bd24cdd07bc7daef80702f39124854dec36c jdk8-b18 237bc29afbfc6f56a4fe4a6008e2befb59c44bac jdk8-b19 5a5eaf6374bcbe23530899579fed17a05b7705f3 jdk8-b20 +cc771d92284f71765eca14d6d08703c4af254c04 jdk8-b21 diff --git a/hotspot/.hgtags b/hotspot/.hgtags index b55f2017c48..214b0ee6025 100644 --- a/hotspot/.hgtags +++ b/hotspot/.hgtags @@ -209,3 +209,5 @@ a2fef924d8e6f37dac2a887315e3502876cc8e24 hs23-b08 4bcf61041217f8677dcec18e90e9196acc945bba hs23-b09 9232e0ecbc2cec54dcc8f93004fb00c214446460 jdk8-b19 fe2c8764998112b7fefcd7d41599714813ae4327 jdk8-b20 +9952d1c439d64c5fd4ad1236a63a62bd5a49d4c3 jdk8-b21 +513351373923f74a7c91755748b95c9771e59f96 hs23-b10 diff --git a/hotspot/make/bsd/makefiles/adlc.make b/hotspot/make/bsd/makefiles/adlc.make index 69797ab734e..7686c488623 100644 --- a/hotspot/make/bsd/makefiles/adlc.make +++ b/hotspot/make/bsd/makefiles/adlc.make @@ -39,9 +39,16 @@ OS = $(Platform_os_family) SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad -SOURCES.AD = \ +ifeq ("${Platform_arch_model}", "${Platform_arch}") + SOURCES.AD = \ $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) +else + SOURCES.AD = \ + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \ + $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) +endif EXEC = $(OUTDIR)/adlc diff --git a/hotspot/make/hotspot_version b/hotspot/make/hotspot_version index a72f4c22d43..75a0f4c2b14 100644 --- a/hotspot/make/hotspot_version +++ b/hotspot/make/hotspot_version @@ -35,7 +35,7 @@ HOTSPOT_VM_COPYRIGHT=Copyright 2011 HS_MAJOR_VER=23 HS_MINOR_VER=0 -HS_BUILD_NUMBER=09 +HS_BUILD_NUMBER=10 JDK_MAJOR_VER=1 JDK_MINOR_VER=8 diff --git a/hotspot/make/linux/makefiles/adlc.make b/hotspot/make/linux/makefiles/adlc.make index 0c15c1c6589..33a28eef86b 100644 --- a/hotspot/make/linux/makefiles/adlc.make +++ b/hotspot/make/linux/makefiles/adlc.make @@ -39,9 +39,16 @@ OS = $(Platform_os_family) SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad -SOURCES.AD = \ +ifeq ("${Platform_arch_model}", "${Platform_arch}") + SOURCES.AD = \ $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) +else + SOURCES.AD = \ + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \ + $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) +endif EXEC = $(OUTDIR)/adlc diff --git a/hotspot/make/solaris/makefiles/adlc.make b/hotspot/make/solaris/makefiles/adlc.make index 4bcecf607cd..b14a1879477 100644 --- a/hotspot/make/solaris/makefiles/adlc.make +++ b/hotspot/make/solaris/makefiles/adlc.make @@ -40,9 +40,16 @@ OS = $(Platform_os_family) SOURCE.AD = $(OUTDIR)/$(OS)_$(Platform_arch_model).ad -SOURCES.AD = \ +ifeq ("${Platform_arch_model}", "${Platform_arch}") + SOURCES.AD = \ $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) +else + SOURCES.AD = \ + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch_model).ad) \ + $(call altsrc-replace,$(HS_COMMON_SRC)/cpu/$(ARCH)/vm/$(Platform_arch).ad) \ + $(call altsrc-replace,$(HS_COMMON_SRC)/os_cpu/$(OS)_$(ARCH)/vm/$(OS)_$(Platform_arch_model).ad) +endif EXEC = $(OUTDIR)/adlc diff --git a/hotspot/make/windows/makefiles/adlc.make b/hotspot/make/windows/makefiles/adlc.make index d03e73373ea..de607ec52d4 100644 --- a/hotspot/make/windows/makefiles/adlc.make +++ b/hotspot/make/windows/makefiles/adlc.make @@ -53,6 +53,17 @@ CPP_INCLUDE_DIRS=\ /I "$(WorkSpace)\src\os\windows\vm" \ /I "$(WorkSpace)\src\cpu\$(Platform_arch)\vm" +!if "$(Platform_arch_model)" == "$(Platform_arch)" +SOURCES_AD=\ + $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \ + $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad +!else +SOURCES_AD=\ + $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \ + $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch).ad \ + $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad +!endif + # NOTE! If you add any files here, you must also update GENERATED_NAMES_IN_DIR # and ProjectCreatorIDEOptions in projectcreator.make. GENERATED_NAMES=\ @@ -105,7 +116,6 @@ $(GENERATED_NAMES_IN_DIR): $(Platform_arch_model).ad adlc.exe $(ADLC) $(ADLCFLAGS) $(Platform_arch_model).ad mv $(GENERATED_NAMES) $(AdlcOutDir)/ -$(Platform_arch_model).ad: $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad +$(Platform_arch_model).ad: $(SOURCES_AD) rm -f $(Platform_arch_model).ad - cat $(WorkSpace)/src/cpu/$(Platform_arch)/vm/$(Platform_arch_model).ad \ - $(WorkSpace)/src/os_cpu/windows_$(Platform_arch)/vm/windows_$(Platform_arch_model).ad >$(Platform_arch_model).ad + cat $(SOURCES_AD) >$(Platform_arch_model).ad diff --git a/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp b/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp index 8fe11550f28..71c8e074532 100644 --- a/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp @@ -3036,10 +3036,8 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, Label* L_failure, Label* L_slow_path, RegisterOrConstant super_check_offset) { - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::secondary_super_cache_offset_in_bytes()); - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::super_check_offset_offset_in_bytes()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); bool must_load_sco = (super_check_offset.constant_or_zero() == -1); bool need_slow_path = (must_load_sco || @@ -3159,10 +3157,8 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, assert(label_nulls <= 1, "at most one NULL in the batch"); // a couple of useful fields in sub_klass: - int ss_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::secondary_supers_offset_in_bytes()); - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::secondary_super_cache_offset_in_bytes()); + int ss_offset = in_bytes(Klass::secondary_supers_offset()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); // Do a linear scan of the secondary super-klass chain. // This code is rarely used, so simplicity is a virtue here. @@ -3336,7 +3332,7 @@ void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg, cmp_and_brx_short(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, Assembler::pn, cas_label); load_klass(obj_reg, temp_reg); - ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); + ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); or3(G2_thread, temp_reg, temp_reg); xor3(mark_reg, temp_reg, temp_reg); andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg); @@ -3413,7 +3409,7 @@ void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg, // FIXME: due to a lack of registers we currently blow away the age // bits in this situation. Should attempt to preserve them. load_klass(obj_reg, temp_reg); - ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); + ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); or3(G2_thread, temp_reg, temp_reg); casn(mark_addr.base(), mark_reg, temp_reg); // If the biasing toward our thread failed, this means that @@ -3443,7 +3439,7 @@ void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg, // FIXME: due to a lack of registers we currently blow away the age // bits in this situation. Should attempt to preserve them. load_klass(obj_reg, temp_reg); - ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg); + ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); casn(mark_addr.base(), mark_reg, temp_reg); // Fall through to the normal CAS-based lock, because no matter what // the result of the above CAS, some thread must have succeeded in diff --git a/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp b/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp index a54b68c4513..837488c23e9 100644 --- a/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp @@ -302,7 +302,7 @@ void PatchingStub::emit_code(LIR_Assembler* ce) { assert(_obj != noreg, "must be a valid register"); assert(_oop_index >= 0, "must have oop index"); __ load_heap_oop(_obj, java_lang_Class::klass_offset_in_bytes(), G3); - __ ld_ptr(G3, instanceKlass::init_thread_offset_in_bytes() + sizeof(klassOopDesc), G3); + __ ld_ptr(G3, in_bytes(instanceKlass::init_thread_offset()), G3); __ cmp_and_brx_short(G2_thread, G3, Assembler::notEqual, Assembler::pn, call_patch); // load_klass patches may execute the patched code before it's @@ -471,7 +471,7 @@ void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) { __ load_klass(src_reg, tmp_reg); - Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset_in_bytes() + sizeof(oopDesc)); + Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset()); __ ld(ref_type_adr, tmp_reg); // _reference_type field is of type ReferenceType (enum) diff --git a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp index 97e86fd789f..96953eebf5a 100644 --- a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp @@ -2202,8 +2202,7 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { __ load_klass(dst, tmp); } - int lh_offset = klassOopDesc::header_size() * HeapWordSize + - Klass::layout_helper_offset_in_bytes(); + int lh_offset = in_bytes(Klass::layout_helper_offset()); __ lduw(tmp, lh_offset, tmp2); @@ -2238,12 +2237,10 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { __ mov(length, len); __ load_klass(dst, tmp); - int ek_offset = (klassOopDesc::header_size() * HeapWordSize + - objArrayKlass::element_klass_offset_in_bytes()); + int ek_offset = in_bytes(objArrayKlass::element_klass_offset()); __ ld_ptr(tmp, ek_offset, super_k); - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::super_check_offset_offset_in_bytes()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); __ lduw(super_k, sco_offset, chk_off); __ call_VM_leaf(tmp, copyfunc_addr); @@ -2455,8 +2452,8 @@ void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { op->obj()->as_register() == O0 && op->klass()->as_register() == G5, "must be"); if (op->init_check()) { - __ ld(op->klass()->as_register(), - instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc), + __ ldub(op->klass()->as_register(), + in_bytes(instanceKlass::init_state_offset()), op->tmp1()->as_register()); add_debug_info_for_null_check_here(op->stub()->info()); __ cmp(op->tmp1()->as_register(), instanceKlass::fully_initialized); @@ -2627,7 +2624,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L } else { bool need_slow_path = true; if (k->is_loaded()) { - if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()) + if ((int) k->super_check_offset() != in_bytes(Klass::secondary_super_cache_offset())) need_slow_path = false; // perform the fast part of the checking logic __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, noreg, @@ -2731,7 +2728,7 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { __ load_klass(value, klass_RInfo); // get instance klass - __ ld_ptr(Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)), k_RInfo); + __ ld_ptr(Address(k_RInfo, objArrayKlass::element_klass_offset()), k_RInfo); // perform the fast part of the checking logic __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, success_target, failure_target, NULL); diff --git a/hotspot/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp b/hotspot/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp index 43722254879..47f82cf878d 100644 --- a/hotspot/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp @@ -181,7 +181,7 @@ void C1_MacroAssembler::try_allocate( void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) { assert_different_registers(obj, klass, len, t1, t2); if (UseBiasedLocking && !len->is_valid()) { - ld_ptr(klass, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes(), t1); + ld_ptr(klass, in_bytes(Klass::prototype_header_offset()), t1); } else { set((intx)markOopDesc::prototype(), t1); } @@ -252,7 +252,7 @@ void C1_MacroAssembler::initialize_object( #ifdef ASSERT { Label ok; - ld(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), t1); + ld(klass, in_bytes(Klass::layout_helper_offset()), t1); if (var_size_in_bytes != noreg) { cmp_and_brx_short(t1, var_size_in_bytes, Assembler::equal, Assembler::pt, ok); } else { diff --git a/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp b/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp index 64f15e0b64a..d0ddc37c867 100644 --- a/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp @@ -398,14 +398,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { if (id == fast_new_instance_init_check_id) { // make sure the klass is initialized - __ ld(G5_klass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc), G3_t1); + __ ldub(G5_klass, in_bytes(instanceKlass::init_state_offset()), G3_t1); __ cmp_and_br_short(G3_t1, instanceKlass::fully_initialized, Assembler::notEqual, Assembler::pn, slow_path); } #ifdef ASSERT // assert object can be fast path allocated { Label ok, not_ok; - __ ld(G5_klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc), G1_obj_size); + __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size); // make sure it's an instance (LH > 0) __ cmp_and_br_short(G1_obj_size, 0, Assembler::lessEqual, Assembler::pn, not_ok); __ btst(Klass::_lh_instance_slow_path_bit, G1_obj_size); @@ -425,7 +425,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { __ bind(retry_tlab); // get the instance size - __ ld(G5_klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), G1_obj_size); + __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size); __ tlab_allocate(O0_obj, G1_obj_size, 0, G3_t1, slow_path); @@ -437,7 +437,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { __ bind(try_eden); // get the instance size - __ ld(G5_klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), G1_obj_size); + __ ld(G5_klass, in_bytes(Klass::layout_helper_offset()), G1_obj_size); __ eden_allocate(O0_obj, G1_obj_size, 0, G3_t1, G4_t2, slow_path); __ incr_allocated_bytes(G1_obj_size, G3_t1, G4_t2); @@ -471,8 +471,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { Register G4_length = G4; // Incoming Register O0_obj = O0; // Outgoing - Address klass_lh(G5_klass, ((klassOopDesc::header_size() * HeapWordSize) - + Klass::layout_helper_offset_in_bytes())); + Address klass_lh(G5_klass, Klass::layout_helper_offset()); assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); assert(Klass::_lh_header_size_mask == 0xFF, "bytewise"); // Use this offset to pick out an individual byte of the layout_helper: @@ -592,7 +591,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { Label register_finalizer; Register t = O1; __ load_klass(O0, t); - __ ld(t, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc), t); + __ ld(t, in_bytes(Klass::access_flags_offset()), t); __ set(JVM_ACC_HAS_FINALIZER, G3); __ andcc(G3, t, G0); __ br(Assembler::notZero, false, Assembler::pt, register_finalizer); diff --git a/hotspot/src/cpu/sparc/vm/cppInterpreter_sparc.cpp b/hotspot/src/cpu/sparc/vm/cppInterpreter_sparc.cpp index 2c2a93f9959..f402d622f35 100644 --- a/hotspot/src/cpu/sparc/vm/cppInterpreter_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/cppInterpreter_sparc.cpp @@ -766,7 +766,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { // get native function entry point(O0 is a good temp until the very end) ld_ptr(Address(G5_method, 0, in_bytes(methodOopDesc::native_function_offset())), O0); // for static methods insert the mirror argument - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); __ ld_ptr(Address(G5_method, 0, in_bytes(methodOopDesc:: constants_offset())), O1); __ ld_ptr(Address(O1, 0, constantPoolOopDesc::pool_holder_offset_in_bytes()), O1); @@ -1173,7 +1173,7 @@ void CppInterpreterGenerator::generate_compute_interpreter_state(const Register __ btst(JVM_ACC_SYNCHRONIZED, O1); __ br( Assembler::zero, false, Assembler::pt, done); - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); __ delayed()->btst(JVM_ACC_STATIC, O1); __ ld_ptr(XXX_STATE(_locals), O1); __ br( Assembler::zero, true, Assembler::pt, got_obj); diff --git a/hotspot/src/cpu/sparc/vm/methodHandles_sparc.cpp b/hotspot/src/cpu/sparc/vm/methodHandles_sparc.cpp index d2a94d17edb..5bdf88e53ba 100644 --- a/hotspot/src/cpu/sparc/vm/methodHandles_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/methodHandles_sparc.cpp @@ -1098,7 +1098,7 @@ void MethodHandles::generate_method_handle_stub(MacroAssembler* _masm, MethodHan Address G3_amh_argument ( G3_method_handle, java_lang_invoke_AdapterMethodHandle::argument_offset_in_bytes()); Address G3_amh_conversion(G3_method_handle, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes()); - const int java_mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int java_mirror_offset = in_bytes(Klass::java_mirror_offset()); if (have_entry(ek)) { __ nop(); // empty stubs make SG sick diff --git a/hotspot/src/cpu/sparc/vm/sparc.ad b/hotspot/src/cpu/sparc/vm/sparc.ad index fe5f992e889..3063b2d9714 100644 --- a/hotspot/src/cpu/sparc/vm/sparc.ad +++ b/hotspot/src/cpu/sparc/vm/sparc.ad @@ -6773,6 +6773,16 @@ instruct unnecessary_membar_volatile() %{ ins_pipe(empty); %} +instruct membar_storestore() %{ + match(MemBarStoreStore); + ins_cost(0); + + size(0); + format %{ "!MEMBAR-storestore (empty encoding)" %} + ins_encode( ); + ins_pipe(empty); +%} + //----------Register Move Instructions----------------------------------------- instruct roundDouble_nop(regD dst) %{ match(Set dst (RoundDouble dst)); @@ -9273,6 +9283,7 @@ instruct cmpD_reg(iRegI dst, regD src1, regD src2, flagsRegF0 fcc0) %{ // (compare 'operand indIndex' and 'instruct addP_reg_reg' above) instruct jumpXtnd(iRegX switch_val, o7RegI table) %{ match(Jump switch_val); + effect(TEMP table); ins_cost(350); @@ -10263,24 +10274,24 @@ instruct partialSubtypeCheck_vs_zero( flagsRegP pcc, o1RegP sub, o2RegP super, i // ============================================================================ // inlined locking and unlocking -instruct cmpFastLock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, o7RegP scratch ) %{ +instruct cmpFastLock(flagsRegP pcc, iRegP object, o1RegP box, iRegP scratch2, o7RegP scratch ) %{ match(Set pcc (FastLock object box)); - effect(KILL scratch, TEMP scratch2); + effect(TEMP scratch2, USE_KILL box, KILL scratch); ins_cost(100); - format %{ "FASTLOCK $object, $box; KILL $scratch, $scratch2, $box" %} + format %{ "FASTLOCK $object,$box\t! kills $box,$scratch,$scratch2" %} ins_encode( Fast_Lock(object, box, scratch, scratch2) ); ins_pipe(long_memory_op); %} -instruct cmpFastUnlock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, o7RegP scratch ) %{ +instruct cmpFastUnlock(flagsRegP pcc, iRegP object, o1RegP box, iRegP scratch2, o7RegP scratch ) %{ match(Set pcc (FastUnlock object box)); - effect(KILL scratch, TEMP scratch2); + effect(TEMP scratch2, USE_KILL box, KILL scratch); ins_cost(100); - format %{ "FASTUNLOCK $object, $box; KILL $scratch, $scratch2, $box" %} + format %{ "FASTUNLOCK $object,$box\t! kills $box,$scratch,$scratch2" %} ins_encode( Fast_Unlock(object, box, scratch, scratch2) ); ins_pipe(long_memory_op); %} diff --git a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp index 9415c7b6c4c..0e076dbbd5b 100644 --- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp @@ -3046,8 +3046,7 @@ class StubGenerator: public StubCodeGenerator { // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 // - int lh_offset = klassOopDesc::header_size() * HeapWordSize + - Klass::layout_helper_offset_in_bytes(); + int lh_offset = in_bytes(Klass::layout_helper_offset()); // Load 32-bits signed value. Use br() instruction with it to check icc. __ lduw(G3_src_klass, lh_offset, G5_lh); @@ -3194,15 +3193,13 @@ class StubGenerator: public StubCodeGenerator { G4_dst_klass, G3_src_klass); // Generate the type check. - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::super_check_offset_offset_in_bytes()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); __ lduw(G4_dst_klass, sco_offset, sco_temp); generate_type_check(G3_src_klass, sco_temp, G4_dst_klass, O5_temp, L_plain_copy); // Fetch destination element klass from the objArrayKlass header. - int ek_offset = (klassOopDesc::header_size() * HeapWordSize + - objArrayKlass::element_klass_offset_in_bytes()); + int ek_offset = in_bytes(objArrayKlass::element_klass_offset()); // the checkcast_copy loop needs two extra arguments: __ ld_ptr(G4_dst_klass, ek_offset, O4); // dest elem klass @@ -3414,6 +3411,9 @@ class StubGenerator: public StubCodeGenerator { generate_throw_exception("WrongMethodTypeException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_WrongMethodTypeException), G5_method_type, G3_method_handle); + + // Build this early so it's available for the interpreter. + StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError)); } @@ -3427,7 +3427,6 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError)); StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError)); StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call)); - StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError)); StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access(); diff --git a/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp b/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp index 8ec7d0690b6..ceb6a5994b9 100644 --- a/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp @@ -366,7 +366,7 @@ void InterpreterGenerator::lock_method(void) { // get synchronization object to O0 { Label done; - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); __ btst(JVM_ACC_STATIC, O0); __ br( Assembler::zero, true, Assembler::pt, done); __ delayed()->ld_ptr(Llocals, Interpreter::local_offset_in_bytes(0), O0); // get receiver for not-static case @@ -396,7 +396,6 @@ void TemplateInterpreterGenerator::generate_stack_overflow_check(Register Rframe Register Rscratch, Register Rscratch2) { const int page_size = os::vm_page_size(); - Address saved_exception_pc(G2_thread, JavaThread::saved_exception_pc_offset()); Label after_frame_check; assert_different_registers(Rframe_size, Rscratch, Rscratch2); @@ -436,11 +435,19 @@ void TemplateInterpreterGenerator::generate_stack_overflow_check(Register Rframe // the bottom of the stack __ cmp_and_brx_short(SP, Rscratch, Assembler::greater, Assembler::pt, after_frame_check); - // Save the return address as the exception pc - __ st_ptr(O7, saved_exception_pc); - // the stack will overflow, throw an exception - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); + + // Note that SP is restored to sender's sp (in the delay slot). This + // is necessary if the sender's frame is an extended compiled frame + // (see gen_c2i_adapter()) and safer anyway in case of JSR292 + // adaptations. + + // Note also that the restored frame is not necessarily interpreted. + // Use the shared runtime version of the StackOverflowError. + assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); + AddressLiteral stub(StubRoutines::throw_StackOverflowError_entry()); + __ jump_to(stub, Rscratch); + __ delayed()->mov(O5_savedSP, SP); // if you get to here, then there is enough stack space __ bind( after_frame_check ); @@ -984,7 +991,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { // get native function entry point(O0 is a good temp until the very end) __ delayed()->ld_ptr(Lmethod, in_bytes(methodOopDesc::native_function_offset()), O0); // for static methods insert the mirror argument - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); __ ld_ptr(Lmethod, methodOopDesc:: constants_offset(), O1); __ ld_ptr(O1, constantPoolOopDesc::pool_holder_offset_in_bytes(), O1); diff --git a/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp b/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp index 850dbe62a25..af6829b3d71 100644 --- a/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp @@ -888,7 +888,7 @@ void TemplateTable::aastore() { // do fast instanceof cache test - __ ld_ptr(O4, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes(), O4); + __ ld_ptr(O4, in_bytes(objArrayKlass::element_klass_offset()), O4); assert(Otos_i == O0, "just checking"); @@ -2031,7 +2031,7 @@ void TemplateTable::_return(TosState state) { __ access_local_ptr(G3_scratch, Otos_i); __ load_klass(Otos_i, O2); __ set(JVM_ACC_HAS_FINALIZER, G3); - __ ld(O2, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc), O2); + __ ld(O2, in_bytes(Klass::access_flags_offset()), O2); __ andcc(G3, O2, G0); Label skip_register_finalizer; __ br(Assembler::zero, false, Assembler::pn, skip_register_finalizer); @@ -3350,13 +3350,13 @@ void TemplateTable::_new() { __ ld_ptr(Rscratch, Roffset, RinstanceKlass); // make sure klass is fully initialized: - __ ld(RinstanceKlass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc), G3_scratch); + __ ldub(RinstanceKlass, in_bytes(instanceKlass::init_state_offset()), G3_scratch); __ cmp(G3_scratch, instanceKlass::fully_initialized); __ br(Assembler::notEqual, false, Assembler::pn, slow_case); - __ delayed()->ld(RinstanceKlass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc), Roffset); + __ delayed()->ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset); // get instance_size in instanceKlass (already aligned) - //__ ld(RinstanceKlass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc), Roffset); + //__ ld(RinstanceKlass, in_bytes(Klass::layout_helper_offset()), Roffset); // make sure klass does not have has_finalizer, or is abstract, or interface or java/lang/Class __ btst(Klass::_lh_instance_slow_path_bit, Roffset); @@ -3483,7 +3483,7 @@ void TemplateTable::_new() { __ bind(initialize_header); if (UseBiasedLocking) { - __ ld_ptr(RinstanceKlass, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), G4_scratch); + __ ld_ptr(RinstanceKlass, in_bytes(Klass::prototype_header_offset()), G4_scratch); } else { __ set((intptr_t)markOopDesc::prototype(), G4_scratch); } diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.cpp b/hotspot/src/cpu/x86/vm/assembler_x86.cpp index a58455cbf95..aa5cd185420 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -533,6 +533,19 @@ address Assembler::locate_operand(address inst, WhichOperand which) { case 0x0F: // movx..., etc. switch (0xFF & *ip++) { + case 0x3A: // pcmpestri + tail_size = 1; + case 0x38: // ptest, pmovzxbw + ip++; // skip opcode + debug_only(has_disp32 = true); // has both kinds of operands! + break; + + case 0x70: // pshufd r, r/a, #8 + debug_only(has_disp32 = true); // has both kinds of operands! + case 0x73: // psrldq r, #8 + tail_size = 1; + break; + case 0x12: // movlps case 0x28: // movaps case 0x2E: // ucomiss @@ -543,9 +556,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) { case 0x57: // xorps case 0x6E: // movd case 0x7E: // movd - case 0xAE: // ldmxcsr a - // 64bit side says it these have both operands but that doesn't - // appear to be true + case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush debug_only(has_disp32 = true); break; @@ -565,6 +576,12 @@ address Assembler::locate_operand(address inst, WhichOperand which) { // fall out of the switch to decode the address break; + case 0xC4: // pinsrw r, a, #8 + debug_only(has_disp32 = true); + case 0xC5: // pextrw r, r, #8 + tail_size = 1; // the imm8 + break; + case 0xAC: // shrd r, a, #8 debug_only(has_disp32 = true); tail_size = 1; // the imm8 @@ -625,11 +642,44 @@ address Assembler::locate_operand(address inst, WhichOperand which) { tail_size = 1; // the imm8 break; - case 0xE8: // call rdisp32 - case 0xE9: // jmp rdisp32 - if (which == end_pc_operand) return ip + 4; - assert(which == call32_operand, "call has no disp32 or imm"); - return ip; + case 0xC4: // VEX_3bytes + case 0xC5: // VEX_2bytes + assert((UseAVX > 0), "shouldn't have VEX prefix"); + assert(ip == inst+1, "no prefixes allowed"); + // C4 and C5 are also used as opcodes for PINSRW and PEXTRW instructions + // but they have prefix 0x0F and processed when 0x0F processed above. + // + // In 32-bit mode the VEX first byte C4 and C5 alias onto LDS and LES + // instructions (these instructions are not supported in 64-bit mode). + // To distinguish them bits [7:6] are set in the VEX second byte since + // ModRM byte can not be of the form 11xxxxxx in 32-bit mode. To set + // those VEX bits REX and vvvv bits are inverted. + // + // Fortunately C2 doesn't generate these instructions so we don't need + // to check for them in product version. + + // Check second byte + NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions")); + + // First byte + if ((0xFF & *inst) == VEX_3bytes) { + ip++; // third byte + is_64bit = ((VEX_W & *ip) == VEX_W); + } + ip++; // opcode + // To find the end of instruction (which == end_pc_operand). + switch (0xFF & *ip) { + case 0x61: // pcmpestri r, r/a, #8 + case 0x70: // pshufd r, r/a, #8 + case 0x73: // psrldq r, #8 + tail_size = 1; // the imm8 + break; + default: + break; + } + ip++; // skip opcode + debug_only(has_disp32 = true); // has both kinds of operands! + break; case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1 case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl @@ -643,6 +693,12 @@ address Assembler::locate_operand(address inst, WhichOperand which) { debug_only(has_disp32 = true); break; + case 0xE8: // call rdisp32 + case 0xE9: // jmp rdisp32 + if (which == end_pc_operand) return ip + 4; + assert(which == call32_operand, "call has no disp32 or imm"); + return ip; + case 0xF0: // Lock assert(os::is_MP(), "only on MP"); goto again_after_prefix; @@ -918,9 +974,7 @@ void Assembler::addr_nop_8() { void Assembler::addsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x58); emit_byte(0xC0 | encode); } @@ -928,18 +982,14 @@ void Assembler::addsd(XMMRegister dst, XMMRegister src) { void Assembler::addsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF2); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F2); emit_byte(0x58); emit_operand(dst, src); } void Assembler::addss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x58); emit_byte(0xC0 | encode); } @@ -947,13 +997,19 @@ void Assembler::addss(XMMRegister dst, XMMRegister src) { void Assembler::addss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F3); emit_byte(0x58); emit_operand(dst, src); } +void Assembler::andl(Address dst, int32_t imm32) { + InstructionMark im(this); + prefix(dst); + emit_byte(0x81); + emit_operand(rsp, dst, 4); + emit_long(imm32); +} + void Assembler::andl(Register dst, int32_t imm32) { prefix(dst); emit_arith(0x81, 0xE0, dst, imm32); @@ -974,13 +1030,33 @@ void Assembler::andl(Register dst, Register src) { void Assembler::andpd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_66); emit_byte(0x54); emit_operand(dst, src); } +void Assembler::andpd(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); + emit_byte(0x54); + emit_byte(0xC0 | encode); +} + +void Assembler::andps(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_NONE); + emit_byte(0x54); + emit_operand(dst, src); +} + +void Assembler::andps(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE); + emit_byte(0x54); + emit_byte(0xC0 | encode); +} + void Assembler::bsfl(Register dst, Register src) { int encode = prefix_and_encode(dst->encoding(), src->encoding()); emit_byte(0x0F); @@ -1025,19 +1101,7 @@ void Assembler::call(Label& L, relocInfo::relocType rtype) { } void Assembler::call(Register dst) { - // This was originally using a 32bit register encoding - // and surely we want 64bit! - // this is a 32bit encoding but in 64bit mode the default - // operand size is 64bit so there is no need for the - // wide prefix. So prefix only happens if we use the - // new registers. Much like push/pop. - int x = offset(); - // this may be true but dbx disassembles it as if it - // were 32bits... - // int encode = prefix_and_encode(dst->encoding()); - // if (offset() != x) assert(dst->encoding() >= 8, "what?"); - int encode = prefixq_and_encode(dst->encoding()); - + int encode = prefix_and_encode(dst->encoding()); emit_byte(0xFF); emit_byte(0xD0 | encode); } @@ -1157,87 +1221,119 @@ void Assembler::comisd(XMMRegister dst, Address src) { // NOTE: dbx seems to decode this as comiss even though the // 0x66 is there. Strangly ucomisd comes out correct NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); - comiss(dst, src); -} - -void Assembler::comiss(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); - InstructionMark im(this); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_66); emit_byte(0x2F); emit_operand(dst, src); } +void Assembler::comisd(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); + emit_byte(0x2F); + emit_byte(0xC0 | encode); +} + +void Assembler::comiss(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_NONE); + emit_byte(0x2F); + emit_operand(dst, src); +} + +void Assembler::comiss(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); + emit_byte(0x2F); + emit_byte(0xC0 | encode); +} + void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); emit_byte(0xE6); emit_byte(0xC0 | encode); } void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); emit_byte(0x5B); emit_byte(0xC0 | encode); } void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x5A); emit_byte(0xC0 | encode); } +void Assembler::cvtsd2ss(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_F2); + emit_byte(0x5A); + emit_operand(dst, src); +} + void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x2A); emit_byte(0xC0 | encode); } +void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_F2); + emit_byte(0x2A); + emit_operand(dst, src); +} + void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x2A); emit_byte(0xC0 | encode); } +void Assembler::cvtsi2ssl(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_F3); + emit_byte(0x2A); + emit_operand(dst, src); +} + void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x5A); emit_byte(0xC0 | encode); } +void Assembler::cvtss2sd(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_F3); + emit_byte(0x5A); + emit_operand(dst, src); +} + + void Assembler::cvttsd2sil(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); emit_byte(0x2C); emit_byte(0xC0 | encode); } void Assembler::cvttss2sil(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); emit_byte(0x2C); emit_byte(0xC0 | encode); } @@ -1253,18 +1349,14 @@ void Assembler::decl(Address dst) { void Assembler::divsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF2); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F2); emit_byte(0x5E); emit_operand(dst, src); } void Assembler::divsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x5E); emit_byte(0xC0 | encode); } @@ -1272,18 +1364,14 @@ void Assembler::divsd(XMMRegister dst, XMMRegister src) { void Assembler::divss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F3); emit_byte(0x5E); emit_operand(dst, src); } void Assembler::divss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x5E); emit_byte(0xC0 | encode); } @@ -1377,8 +1465,14 @@ void Assembler::jccb(Condition cc, Label& L) { if (L.is_bound()) { const int short_size = 2; address entry = target(L); - assert(is8bit((intptr_t)entry - ((intptr_t)_code_pos + short_size)), - "Dispacement too large for a short jmp"); +#ifdef ASSERT + intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); + intptr_t delta = short_branch_delta(); + if (delta != 0) { + dist += (dist < 0 ? (-delta) :delta); + } + assert(is8bit(dist), "Dispacement too large for a short jmp"); +#endif intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos; // 0111 tttn #8-bit disp emit_byte(0x70 | cc); @@ -1444,9 +1538,15 @@ void Assembler::jmpb(Label& L) { if (L.is_bound()) { const int short_size = 2; address entry = target(L); - assert(is8bit((entry - _code_pos) + short_size), - "Dispacement too large for a short jmp"); assert(entry != NULL, "jmp most probably wrong"); +#ifdef ASSERT + intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); + intptr_t delta = short_branch_delta(); + if (delta != 0) { + dist += (dist < 0 ? (-delta) :delta); + } + assert(is8bit(dist), "Dispacement too large for a short jmp"); +#endif intptr_t offs = entry - _code_pos; emit_byte(0xEB); emit_byte((offs - short_size) & 0xFF); @@ -1509,49 +1609,16 @@ void Assembler::mov(Register dst, Register src) { void Assembler::movapd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - int dstenc = dst->encoding(); - int srcenc = src->encoding(); - emit_byte(0x66); - if (dstenc < 8) { - if (srcenc >= 8) { - prefix(REX_B); - srcenc -= 8; - } - } else { - if (srcenc < 8) { - prefix(REX_R); - } else { - prefix(REX_RB); - srcenc -= 8; - } - dstenc -= 8; - } - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); emit_byte(0x28); - emit_byte(0xC0 | dstenc << 3 | srcenc); + emit_byte(0xC0 | encode); } void Assembler::movaps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int dstenc = dst->encoding(); - int srcenc = src->encoding(); - if (dstenc < 8) { - if (srcenc >= 8) { - prefix(REX_B); - srcenc -= 8; - } - } else { - if (srcenc < 8) { - prefix(REX_R); - } else { - prefix(REX_RB); - srcenc -= 8; - } - dstenc -= 8; - } - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); emit_byte(0x28); - emit_byte(0xC0 | dstenc << 3 | srcenc); + emit_byte(0xC0 | encode); } void Assembler::movb(Register dst, Address src) { @@ -1582,19 +1649,15 @@ void Assembler::movb(Address dst, Register src) { void Assembler::movdl(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); emit_byte(0x6E); emit_byte(0xC0 | encode); } void Assembler::movdl(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); // swap src/dst to get correct prefix - int encode = prefix_and_encode(src->encoding(), dst->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66); emit_byte(0x7E); emit_byte(0xC0 | encode); } @@ -1602,58 +1665,29 @@ void Assembler::movdl(Register dst, XMMRegister src) { void Assembler::movdl(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_66); emit_byte(0x6E); emit_operand(dst, src); } - -void Assembler::movdqa(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); - emit_byte(0x6F); - emit_operand(dst, src); -} - void Assembler::movdqa(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); emit_byte(0x6F); emit_byte(0xC0 | encode); } -void Assembler::movdqa(Address dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - emit_byte(0x66); - prefix(dst, src); - emit_byte(0x0F); - emit_byte(0x7F); - emit_operand(src, dst); -} - void Assembler::movdqu(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F3); emit_byte(0x6F); emit_operand(dst, src); } void Assembler::movdqu(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF3); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3); emit_byte(0x6F); emit_byte(0xC0 | encode); } @@ -1661,9 +1695,7 @@ void Assembler::movdqu(XMMRegister dst, XMMRegister src) { void Assembler::movdqu(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(dst, src); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F3); emit_byte(0x7F); emit_operand(src, dst); } @@ -1710,9 +1742,7 @@ void Assembler::movl(Address dst, Register src) { void Assembler::movlpd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_66); emit_byte(0x12); emit_operand(dst, src); } @@ -1740,9 +1770,7 @@ void Assembler::movq( Address dst, MMXRegister src ) { void Assembler::movq(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F3); emit_byte(0x7E); emit_operand(dst, src); } @@ -1750,9 +1778,7 @@ void Assembler::movq(XMMRegister dst, Address src) { void Assembler::movq(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0x66); - prefix(dst, src); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_66); emit_byte(0xD6); emit_operand(src, dst); } @@ -1775,9 +1801,7 @@ void Assembler::movsbl(Register dst, Register src) { // movsxb void Assembler::movsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x10); emit_byte(0xC0 | encode); } @@ -1785,9 +1809,7 @@ void Assembler::movsd(XMMRegister dst, XMMRegister src) { void Assembler::movsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF2); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F2); emit_byte(0x10); emit_operand(dst, src); } @@ -1795,18 +1817,14 @@ void Assembler::movsd(XMMRegister dst, Address src) { void Assembler::movsd(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF2); - prefix(dst, src); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F2); emit_byte(0x11); emit_operand(src, dst); } void Assembler::movss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x10); emit_byte(0xC0 | encode); } @@ -1814,9 +1832,7 @@ void Assembler::movss(XMMRegister dst, XMMRegister src) { void Assembler::movss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F3); emit_byte(0x10); emit_operand(dst, src); } @@ -1824,9 +1840,7 @@ void Assembler::movss(XMMRegister dst, Address src) { void Assembler::movss(Address dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(dst, src); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F3); emit_byte(0x11); emit_operand(src, dst); } @@ -1919,18 +1933,14 @@ void Assembler::mull(Register src) { void Assembler::mulsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF2); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F2); emit_byte(0x59); emit_operand(dst, src); } void Assembler::mulsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x59); emit_byte(0xC0 | encode); } @@ -1938,18 +1948,14 @@ void Assembler::mulsd(XMMRegister dst, XMMRegister src) { void Assembler::mulss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F3); emit_byte(0x59); emit_operand(dst, src); } void Assembler::mulss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x59); emit_byte(0xC0 | encode); } @@ -2237,14 +2243,26 @@ void Assembler::orl(Register dst, Register src) { emit_arith(0x0B, 0xC0, dst, src); } +void Assembler::packuswb(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66); + emit_byte(0x67); + emit_operand(dst, src); +} + +void Assembler::packuswb(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); + emit_byte(0x67); + emit_byte(0xC0 | encode); +} + void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { assert(VM_Version::supports_sse4_2(), ""); - InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); - emit_byte(0x3A); + simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); emit_byte(0x61); emit_operand(dst, src); emit_byte(imm8); @@ -2252,16 +2270,27 @@ void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse4_2(), ""); - - emit_byte(0x66); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0x3A); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A); emit_byte(0x61); emit_byte(0xC0 | encode); emit_byte(imm8); } +void Assembler::pmovzxbw(XMMRegister dst, Address src) { + assert(VM_Version::supports_sse4_1(), ""); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0x30); + emit_operand(dst, src); +} + +void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); + emit_byte(0x30); + emit_byte(0xC0 | encode); +} + // generic void Assembler::pop(Register dst) { int encode = prefix_and_encode(dst->encoding()); @@ -2360,22 +2389,24 @@ void Assembler::prefix(Prefix p) { void Assembler::por(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - - emit_byte(0x66); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); emit_byte(0xEB); emit_byte(0xC0 | encode); } +void Assembler::por(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66); + emit_byte(0xEB); + emit_operand(dst, src); +} + void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - - emit_byte(0x66); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); emit_byte(0x70); emit_byte(0xC0 | encode); emit_byte(mode & 0xFF); @@ -2385,11 +2416,9 @@ void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { void Assembler::pshufd(XMMRegister dst, Address src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_66); emit_byte(0x70); emit_operand(dst, src); emit_byte(mode & 0xFF); @@ -2398,10 +2427,7 @@ void Assembler::pshufd(XMMRegister dst, Address src, int mode) { void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2); emit_byte(0x70); emit_byte(0xC0 | encode); emit_byte(mode & 0xFF); @@ -2410,11 +2436,9 @@ void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); InstructionMark im(this); - emit_byte(0xF2); - prefix(src, dst); // QQ new - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_F2); emit_byte(0x70); emit_operand(dst, src); emit_byte(mode & 0xFF); @@ -2425,11 +2449,8 @@ void Assembler::psrlq(XMMRegister dst, int shift) { // HMM Table D-1 says sse2 or mmx. // Do not confuse it with psrldq SSE2 instruction which // shifts 128 bit value in xmm register by number of bytes. - NOT_LP64(assert(VM_Version::supports_sse(), "")); - - int encode = prefixq_and_encode(xmm2->encoding(), dst->encoding()); - emit_byte(0x66); - emit_byte(0x0F); + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66); emit_byte(0x73); emit_byte(0xC0 | encode); emit_byte(shift); @@ -2438,10 +2459,7 @@ void Assembler::psrlq(XMMRegister dst, int shift) { void Assembler::psrldq(XMMRegister dst, int shift) { // Shift 128 bit value in xmm register by number of bytes. NOT_LP64(assert(VM_Version::supports_sse2(), "")); - - int encode = prefixq_and_encode(xmm3->encoding(), dst->encoding()); - emit_byte(0x66); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66); emit_byte(0x73); emit_byte(0xC0 | encode); emit_byte(shift); @@ -2449,36 +2467,52 @@ void Assembler::psrldq(XMMRegister dst, int shift) { void Assembler::ptest(XMMRegister dst, Address src) { assert(VM_Version::supports_sse4_1(), ""); - + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); - emit_byte(0x38); + simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); emit_byte(0x17); emit_operand(dst, src); } void Assembler::ptest(XMMRegister dst, XMMRegister src) { assert(VM_Version::supports_sse4_1(), ""); - - emit_byte(0x66); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); - emit_byte(0x38); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38); emit_byte(0x17); emit_byte(0xC0 | encode); } +void Assembler::punpcklbw(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66); + emit_byte(0x60); + emit_operand(dst, src); +} + void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); emit_byte(0x60); emit_byte(0xC0 | encode); } +void Assembler::punpckldq(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); + InstructionMark im(this); + simd_prefix(dst, dst, src, VEX_SIMD_66); + emit_byte(0x62); + emit_operand(dst, src); +} + +void Assembler::punpckldq(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); + emit_byte(0x62); + emit_byte(0xC0 | encode); +} + void Assembler::push(int32_t imm32) { // in 64bits we push 64bits onto the stack but only // take a 32bit immediate @@ -2508,20 +2542,16 @@ void Assembler::pushl(Address src) { void Assembler::pxor(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); + assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_66); emit_byte(0xEF); emit_operand(dst, src); } void Assembler::pxor(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionMark im(this); - emit_byte(0x66); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); emit_byte(0xEF); emit_byte(0xC0 | encode); } @@ -2683,12 +2713,8 @@ void Assembler::smovl() { } void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { - // HMM Table D-1 says sse2 - // NOT_LP64(assert(VM_Version::supports_sse(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x51); emit_byte(0xC0 | encode); } @@ -2696,30 +2722,22 @@ void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { void Assembler::sqrtsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF2); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F2); emit_byte(0x51); emit_operand(dst, src); } void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { - // HMM Table D-1 says sse2 - // NOT_LP64(assert(VM_Version::supports_sse(), "")); - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + NOT_LP64(assert(VM_Version::supports_sse(), "")); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x51); emit_byte(0xC0 | encode); } void Assembler::sqrtss(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); + NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F3); emit_byte(0x51); emit_operand(dst, src); } @@ -2765,9 +2783,7 @@ void Assembler::subl(Register dst, Register src) { void Assembler::subsd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2); emit_byte(0x5C); emit_byte(0xC0 | encode); } @@ -2775,18 +2791,14 @@ void Assembler::subsd(XMMRegister dst, XMMRegister src) { void Assembler::subsd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0xF2); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F2); emit_byte(0x5C); emit_operand(dst, src); } void Assembler::subss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3); emit_byte(0x5C); emit_byte(0xC0 | encode); } @@ -2794,9 +2806,7 @@ void Assembler::subss(XMMRegister dst, XMMRegister src) { void Assembler::subss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - emit_byte(0xF3); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_F3); emit_byte(0x5C); emit_operand(dst, src); } @@ -2836,30 +2846,30 @@ void Assembler::testl(Register dst, Address src) { void Assembler::ucomisd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); - ucomiss(dst, src); + InstructionMark im(this); + simd_prefix(dst, src, VEX_SIMD_66); + emit_byte(0x2E); + emit_operand(dst, src); } void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); - ucomiss(dst, src); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66); + emit_byte(0x2E); + emit_byte(0xC0 | encode); } void Assembler::ucomiss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - InstructionMark im(this); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, src, VEX_SIMD_NONE); emit_byte(0x2E); emit_operand(dst, src); } void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE); emit_byte(0x2E); emit_byte(0xC0 | encode); } @@ -2905,16 +2915,15 @@ void Assembler::xorl(Register dst, Register src) { void Assembler::xorpd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0x66); - xorps(dst, src); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66); + emit_byte(0x57); + emit_byte(0xC0 | encode); } void Assembler::xorpd(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); - emit_byte(0x66); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_66); emit_byte(0x57); emit_operand(dst, src); } @@ -2922,8 +2931,7 @@ void Assembler::xorpd(XMMRegister dst, Address src) { void Assembler::xorps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - int encode = prefix_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE); emit_byte(0x57); emit_byte(0xC0 | encode); } @@ -2931,12 +2939,166 @@ void Assembler::xorps(XMMRegister dst, XMMRegister src) { void Assembler::xorps(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); - prefix(src, dst); - emit_byte(0x0F); + simd_prefix(dst, dst, src, VEX_SIMD_NONE); emit_byte(0x57); emit_operand(dst, src); } +// AVX 3-operands non destructive source instructions (encoded with VEX prefix) + +void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_F2); + emit_byte(0x58); + emit_operand(dst, src); +} + +void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); + emit_byte(0x58); + emit_byte(0xC0 | encode); +} + +void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_F3); + emit_byte(0x58); + emit_operand(dst, src); +} + +void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); + emit_byte(0x58); + emit_byte(0xC0 | encode); +} + +void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector + emit_byte(0x54); + emit_operand(dst, src); +} + +void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector + emit_byte(0x54); + emit_operand(dst, src); +} + +void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_F2); + emit_byte(0x5E); + emit_operand(dst, src); +} + +void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); + emit_byte(0x5E); + emit_byte(0xC0 | encode); +} + +void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_F3); + emit_byte(0x5E); + emit_operand(dst, src); +} + +void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); + emit_byte(0x5E); + emit_byte(0xC0 | encode); +} + +void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_F2); + emit_byte(0x59); + emit_operand(dst, src); +} + +void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); + emit_byte(0x59); + emit_byte(0xC0 | encode); +} + +void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) { + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_F3); + emit_byte(0x59); + emit_operand(dst, src); +} + +void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); + emit_byte(0x59); + emit_byte(0xC0 | encode); +} + + +void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_F2); + emit_byte(0x5C); + emit_operand(dst, src); +} + +void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2); + emit_byte(0x5C); + emit_byte(0xC0 | encode); +} + +void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_F3); + emit_byte(0x5C); + emit_operand(dst, src); +} + +void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { + assert(VM_Version::supports_avx(), ""); + int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3); + emit_byte(0x5C); + emit_byte(0xC0 | encode); +} + +void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector + emit_byte(0x57); + emit_operand(dst, src); +} + +void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) { + assert(VM_Version::supports_avx(), ""); + InstructionMark im(this); + vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector + emit_byte(0x57); + emit_operand(dst, src); +} + + #ifndef _LP64 // 32bit only pieces of the assembler @@ -3394,12 +3556,114 @@ void Assembler::fyl2x() { emit_byte(0xF1); } +// SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding. +static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 }; +// SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding. +static int simd_opc[4] = { 0, 0, 0x38, 0x3A }; + +// Generate SSE legacy REX prefix and SIMD opcode based on VEX encoding. +void Assembler::rex_prefix(Address adr, XMMRegister xreg, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { + if (pre > 0) { + emit_byte(simd_pre[pre]); + } + if (rex_w) { + prefixq(adr, xreg); + } else { + prefix(adr, xreg); + } + if (opc > 0) { + emit_byte(0x0F); + int opc2 = simd_opc[opc]; + if (opc2 > 0) { + emit_byte(opc2); + } + } +} + +int Assembler::rex_prefix_and_encode(int dst_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool rex_w) { + if (pre > 0) { + emit_byte(simd_pre[pre]); + } + int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : + prefix_and_encode(dst_enc, src_enc); + if (opc > 0) { + emit_byte(0x0F); + int opc2 = simd_opc[opc]; + if (opc2 > 0) { + emit_byte(opc2); + } + } + return encode; +} + + +void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) { + if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) { + prefix(VEX_3bytes); + + int byte1 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0); + byte1 = (~byte1) & 0xE0; + byte1 |= opc; + a_byte(byte1); + + int byte2 = ((~nds_enc) & 0xf) << 3; + byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre; + emit_byte(byte2); + } else { + prefix(VEX_2bytes); + + int byte1 = vex_r ? VEX_R : 0; + byte1 = (~byte1) & 0x80; + byte1 |= ((~nds_enc) & 0xf) << 3; + byte1 |= (vector256 ? 4 : 0) | pre; + emit_byte(byte1); + } +} + +void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){ + bool vex_r = (xreg_enc >= 8); + bool vex_b = adr.base_needs_rex(); + bool vex_x = adr.index_needs_rex(); + vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); +} + +int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) { + bool vex_r = (dst_enc >= 8); + bool vex_b = (src_enc >= 8); + bool vex_x = false; + vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256); + return (((dst_enc & 7) << 3) | (src_enc & 7)); +} + + +void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { + if (UseAVX > 0) { + int xreg_enc = xreg->encoding(); + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256); + } else { + assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding"); + rex_prefix(adr, xreg, pre, opc, rex_w); + } +} + +int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) { + int dst_enc = dst->encoding(); + int src_enc = src->encoding(); + if (UseAVX > 0) { + int nds_enc = nds->is_valid() ? nds->encoding() : 0; + return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256); + } else { + assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding"); + return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w); + } +} #ifndef _LP64 void Assembler::incl(Register dst) { // Don't use it directly. Use MacroAssembler::incrementl() instead. - emit_byte(0x40 | dst->encoding()); + emit_byte(0x40 | dst->encoding()); } void Assembler::lea(Register dst, Address src) { @@ -3756,6 +4020,38 @@ void Assembler::prefix(Address adr, XMMRegister reg) { } } +void Assembler::prefixq(Address adr, XMMRegister src) { + if (src->encoding() < 8) { + if (adr.base_needs_rex()) { + if (adr.index_needs_rex()) { + prefix(REX_WXB); + } else { + prefix(REX_WB); + } + } else { + if (adr.index_needs_rex()) { + prefix(REX_WX); + } else { + prefix(REX_W); + } + } + } else { + if (adr.base_needs_rex()) { + if (adr.index_needs_rex()) { + prefix(REX_WRXB); + } else { + prefix(REX_WRB); + } + } else { + if (adr.index_needs_rex()) { + prefix(REX_WRX); + } else { + prefix(REX_WR); + } + } + } +} + void Assembler::adcq(Register dst, int32_t imm32) { (void) prefixq_and_encode(dst->encoding()); emit_arith(0x81, 0xD0, dst, imm32); @@ -3918,36 +4214,44 @@ void Assembler::cmpxchgq(Register reg, Address adr) { void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2); emit_byte(0x2A); emit_byte(0xC0 | encode); } +void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionMark im(this); + simd_prefix_q(dst, dst, src, VEX_SIMD_F2); + emit_byte(0x2A); + emit_operand(dst, src); +} + void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3); emit_byte(0x2A); emit_byte(0xC0 | encode); } +void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + InstructionMark im(this); + simd_prefix_q(dst, dst, src, VEX_SIMD_F3); + emit_byte(0x2A); + emit_operand(dst, src); +} + void Assembler::cvttsd2siq(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); - emit_byte(0xF2); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2); emit_byte(0x2C); emit_byte(0xC0 | encode); } void Assembler::cvttss2siq(Register dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); - emit_byte(0xF3); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3); emit_byte(0x2C); emit_byte(0xC0 | encode); } @@ -4107,21 +4411,17 @@ void Assembler::lzcntq(Register dst, Register src) { void Assembler::movdq(XMMRegister dst, Register src) { // table D-1 says MMX/SSE2 - NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); - emit_byte(0x66); - int encode = prefixq_and_encode(dst->encoding(), src->encoding()); - emit_byte(0x0F); + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66); emit_byte(0x6E); emit_byte(0xC0 | encode); } void Assembler::movdq(Register dst, XMMRegister src) { // table D-1 says MMX/SSE2 - NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); - emit_byte(0x66); + NOT_LP64(assert(VM_Version::supports_sse2(), "")); // swap src/dst to get correct prefix - int encode = prefixq_and_encode(src->encoding(), dst->encoding()); - emit_byte(0x0F); + int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66); emit_byte(0x7E); emit_byte(0xC0 | encode); } @@ -4632,7 +4932,7 @@ int MacroAssembler::biased_locking_enter(Register lock_reg, null_check_offset = offset(); } movl(tmp_reg, klass_addr); - xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset())); andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); if (need_tmp_reg) { pop(tmp_reg); @@ -4719,7 +5019,7 @@ int MacroAssembler::biased_locking_enter(Register lock_reg, } get_thread(tmp_reg); movl(swap_reg, klass_addr); - orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset())); movl(swap_reg, saved_mark_addr); if (os::is_MP()) { lock(); @@ -4757,7 +5057,7 @@ int MacroAssembler::biased_locking_enter(Register lock_reg, push(tmp_reg); } movl(tmp_reg, klass_addr); - movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); if (os::is_MP()) { lock(); } @@ -5680,6 +5980,24 @@ void MacroAssembler::addptr(Address dst, Register src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); } +void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::addsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::addsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + addss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + addss(dst, Address(rscratch1, 0)); + } +} + void MacroAssembler::align(int modulus) { if (offset() % modulus != 0) { nop(modulus - (offset() % modulus)); @@ -5687,11 +6005,24 @@ void MacroAssembler::align(int modulus) { } void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { + // Used in sign-masking with aligned address. + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); if (reachable(src)) { - andpd(dst, as_Address(src)); + Assembler::andpd(dst, as_Address(src)); } else { lea(rscratch1, src); - andpd(dst, Address(rscratch1, 0)); + Assembler::andpd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { + // Used in sign-masking with aligned address. + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); + if (reachable(src)) { + Assembler::andps(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::andps(dst, Address(rscratch1, 0)); } } @@ -6270,19 +6601,19 @@ void MacroAssembler::cmpxchgptr(Register reg, Address adr) { void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { if (reachable(src)) { - comisd(dst, as_Address(src)); + Assembler::comisd(dst, as_Address(src)); } else { lea(rscratch1, src); - comisd(dst, Address(rscratch1, 0)); + Assembler::comisd(dst, Address(rscratch1, 0)); } } void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { if (reachable(src)) { - comiss(dst, as_Address(src)); + Assembler::comiss(dst, as_Address(src)); } else { lea(rscratch1, src); - comiss(dst, Address(rscratch1, 0)); + Assembler::comiss(dst, Address(rscratch1, 0)); } } @@ -6366,6 +6697,24 @@ void MacroAssembler::division_with_shift (Register reg, int shift_value) { sarl(reg, shift_value); } +void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::divsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::divsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::divss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::divss(dst, Address(rscratch1, 0)); + } +} + // !defined(COMPILER2) is because of stupid core builds #if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) void MacroAssembler::empty_FPU_stack() { @@ -6805,12 +7154,39 @@ void MacroAssembler::movptr(Address dst, Register src) { LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); } -void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { +void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { if (reachable(src)) { - movss(dst, as_Address(src)); + Assembler::movsd(dst, as_Address(src)); } else { lea(rscratch1, src); - movss(dst, Address(rscratch1, 0)); + Assembler::movsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::movss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::movss(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::mulsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::mulsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::mulss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::mulss(dst, Address(rscratch1, 0)); } } @@ -6992,6 +7368,193 @@ void MacroAssembler::testl(Register dst, AddressLiteral src) { testl(dst, as_Address(src)); } +void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::sqrtsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::sqrtsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::sqrtss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::sqrtss(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::subsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::subsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::subss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::subss(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::ucomisd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::ucomisd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::ucomiss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::ucomiss(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { + // Used in sign-bit flipping with aligned address. + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); + if (reachable(src)) { + Assembler::xorpd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::xorpd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { + // Used in sign-bit flipping with aligned address. + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); + if (reachable(src)) { + Assembler::xorps(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::xorps(dst, Address(rscratch1, 0)); + } +} + +// AVX 3-operands instructions + +void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vaddsd(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vaddsd(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vaddss(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vaddss(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vandpd(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vandpd(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vandps(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vandps(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vdivsd(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vdivsd(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vdivss(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vdivss(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vmulsd(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vmulsd(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vmulss(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vmulss(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vsubsd(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vsubsd(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vsubss(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vsubss(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vxorpd(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vxorpd(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vxorps(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vxorps(dst, nds, Address(rscratch1, 0)); + } +} + + ////////////////////////////////////////////////////////////////////////////////// #ifndef SERIALGC @@ -7430,6 +7993,16 @@ void MacroAssembler::incr_allocated_bytes(Register thread, Register var_size_in_bytes, int con_size_in_bytes, Register t1) { + if (!thread->is_valid()) { +#ifdef _LP64 + thread = r15_thread; +#else + assert(t1->is_valid(), "need temp reg"); + thread = t1; + get_thread(thread); +#endif + } + #ifdef _LP64 if (var_size_in_bytes->is_valid()) { addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); @@ -7437,12 +8010,6 @@ void MacroAssembler::incr_allocated_bytes(Register thread, addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); } #else - if (!thread->is_valid()) { - assert(t1->is_valid(), "need temp reg"); - thread = t1; - get_thread(thread); - } - if (var_size_in_bytes->is_valid()) { addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); } else { @@ -7685,10 +8252,8 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } assert(label_nulls <= 1, "at most one NULL in the batch"); - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::secondary_super_cache_offset_in_bytes()); - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::super_check_offset_offset_in_bytes()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); Address super_check_offset_addr(super_klass, sco_offset); // Hacked jcc, which "knows" that L_fallthrough, at least, is in @@ -7786,10 +8351,8 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, assert(label_nulls <= 1, "at most one NULL in the batch"); // a couple of useful fields in sub_klass: - int ss_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::secondary_supers_offset_in_bytes()); - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::secondary_super_cache_offset_in_bytes()); + int ss_offset = in_bytes(Klass::secondary_supers_offset()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); Address secondary_supers_addr(sub_klass, ss_offset); Address super_cache_addr( sub_klass, sc_offset); @@ -7876,32 +8439,6 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, } -void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { - ucomisd(dst, as_Address(src)); -} - -void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { - ucomiss(dst, as_Address(src)); -} - -void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - xorpd(dst, as_Address(src)); - } else { - lea(rscratch1, src); - xorpd(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - xorps(dst, as_Address(src)); - } else { - lea(rscratch1, src); - xorps(dst, Address(rscratch1, 0)); - } -} - void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { if (VM_Version::supports_cmov()) { cmovl(cc, dst, src); @@ -8487,20 +9024,20 @@ void MacroAssembler::load_prototype_header(Register dst, Register src) { if (Universe::narrow_oop_shift() != 0) { assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); if (LogMinObjAlignmentInBytes == Address::times_8) { - movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset())); } else { // OK to use shift since we don't need to preserve flags. shlq(dst, LogMinObjAlignmentInBytes); - movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + movq(dst, Address(r12_heapbase, dst, Address::times_1, Klass::prototype_header_offset())); } } else { - movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + movq(dst, Address(dst, Klass::prototype_header_offset())); } } else #endif { movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); - movptr(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + movptr(dst, Address(dst, Klass::prototype_header_offset())); } } @@ -8761,6 +9298,7 @@ void MacroAssembler::string_indexofC8(Register str1, Register str2, Register cnt1, Register cnt2, int int_cnt2, Register result, XMMRegister vec, Register tmp) { + ShortBranchVerifier sbv(this); assert(UseSSE42Intrinsics, "SSE4.2 is required"); // This method uses pcmpestri inxtruction with bound registers @@ -8890,9 +9428,9 @@ void MacroAssembler::string_indexofC8(Register str1, Register str2, pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); } // Need to reload strings pointers if not matched whole vector - jccb(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 + jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 addptr(cnt2, 8); - jccb(Assembler::negative, SCAN_SUBSTR); + jcc(Assembler::negative, SCAN_SUBSTR); // Fall through if found full substring } // (int_cnt2 > 8) @@ -8911,6 +9449,7 @@ void MacroAssembler::string_indexof(Register str1, Register str2, Register cnt1, Register cnt2, int int_cnt2, Register result, XMMRegister vec, Register tmp) { + ShortBranchVerifier sbv(this); assert(UseSSE42Intrinsics, "SSE4.2 is required"); // // int_cnt2 is length of small (< 8 chars) constant substring @@ -9172,6 +9711,7 @@ void MacroAssembler::string_indexof(Register str1, Register str2, void MacroAssembler::string_compare(Register str1, Register str2, Register cnt1, Register cnt2, Register result, XMMRegister vec1) { + ShortBranchVerifier sbv(this); Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; // Compute the minimum of the string lengths and the @@ -9308,6 +9848,7 @@ void MacroAssembler::string_compare(Register str1, Register str2, void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, Register limit, Register result, Register chr, XMMRegister vec1, XMMRegister vec2) { + ShortBranchVerifier sbv(this); Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; int length_offset = arrayOopDesc::length_offset_in_bytes(); @@ -9427,6 +9968,7 @@ void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Regist void MacroAssembler::generate_fill(BasicType t, bool aligned, Register to, Register value, Register count, Register rtmp, XMMRegister xtmp) { + ShortBranchVerifier sbv(this); assert_different_registers(to, value, count, rtmp); Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; Label L_fill_2_bytes, L_fill_4_bytes; diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.hpp b/hotspot/src/cpu/x86/vm/assembler_x86.hpp index d5c35dfdc06..1a2d4afa6e8 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp @@ -503,7 +503,31 @@ class Assembler : public AbstractAssembler { REX_WR = 0x4C, REX_WRB = 0x4D, REX_WRX = 0x4E, - REX_WRXB = 0x4F + REX_WRXB = 0x4F, + + VEX_3bytes = 0xC4, + VEX_2bytes = 0xC5 + }; + + enum VexPrefix { + VEX_B = 0x20, + VEX_X = 0x40, + VEX_R = 0x80, + VEX_W = 0x80 + }; + + enum VexSimdPrefix { + VEX_SIMD_NONE = 0x0, + VEX_SIMD_66 = 0x1, + VEX_SIMD_F3 = 0x2, + VEX_SIMD_F2 = 0x3 + }; + + enum VexOpcode { + VEX_OPCODE_NONE = 0x0, + VEX_OPCODE_0F = 0x1, + VEX_OPCODE_0F_38 = 0x2, + VEX_OPCODE_0F_3A = 0x3 }; enum WhichOperand { @@ -546,12 +570,99 @@ private: void prefixq(Address adr); void prefix(Address adr, Register reg, bool byteinst = false); - void prefixq(Address adr, Register reg); - void prefix(Address adr, XMMRegister reg); + void prefixq(Address adr, Register reg); + void prefixq(Address adr, XMMRegister reg); void prefetch_prefix(Address src); + void rex_prefix(Address adr, XMMRegister xreg, + VexSimdPrefix pre, VexOpcode opc, bool rex_w); + int rex_prefix_and_encode(int dst_enc, int src_enc, + VexSimdPrefix pre, VexOpcode opc, bool rex_w); + + void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, + int nds_enc, VexSimdPrefix pre, VexOpcode opc, + bool vector256); + + void vex_prefix(Address adr, int nds_enc, int xreg_enc, + VexSimdPrefix pre, VexOpcode opc, + bool vex_w, bool vector256); + + void vex_prefix(XMMRegister dst, XMMRegister nds, Address src, + VexSimdPrefix pre, bool vector256 = false) { + vex_prefix(src, nds->encoding(), dst->encoding(), + pre, VEX_OPCODE_0F, false, vector256); + } + + int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, + VexSimdPrefix pre, VexOpcode opc, + bool vex_w, bool vector256); + + int vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, + VexSimdPrefix pre, bool vector256 = false) { + return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), + pre, VEX_OPCODE_0F, false, vector256); + } + + void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, + bool rex_w = false, bool vector256 = false); + + void simd_prefix(XMMRegister dst, Address src, + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { + simd_prefix(dst, xnoreg, src, pre, opc); + } + void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) { + simd_prefix(src, dst, pre); + } + void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src, + VexSimdPrefix pre) { + bool rex_w = true; + simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w); + } + + + int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F, + bool rex_w = false, bool vector256 = false); + + int simd_prefix_and_encode(XMMRegister dst, XMMRegister src, + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { + return simd_prefix_and_encode(dst, xnoreg, src, pre, opc); + } + + // Move/convert 32-bit integer value. + int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src, + VexSimdPrefix pre) { + // It is OK to cast from Register to XMMRegister to pass argument here + // since only encoding is used in simd_prefix_and_encode() and number of + // Gen and Xmm registers are the same. + return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre); + } + int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) { + return simd_prefix_and_encode(dst, xnoreg, src, pre); + } + int simd_prefix_and_encode(Register dst, XMMRegister src, + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { + return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc); + } + + // Move/convert 64-bit integer value. + int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src, + VexSimdPrefix pre) { + bool rex_w = true; + return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w); + } + int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) { + return simd_prefix_and_encode_q(dst, xnoreg, src, pre); + } + int simd_prefix_and_encode_q(Register dst, XMMRegister src, + VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) { + bool rex_w = true; + return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w); + } + // Helper functions for groups of instructions void emit_arith_b(int op1, int op2, Register dst, int imm8); @@ -764,6 +875,7 @@ private: void addss(XMMRegister dst, Address src); void addss(XMMRegister dst, XMMRegister src); + void andl(Address dst, int32_t imm32); void andl(Register dst, int32_t imm32); void andl(Register dst, Address src); void andl(Register dst, Register src); @@ -774,9 +886,11 @@ private: void andq(Register dst, Register src); // Bitwise Logical AND of Packed Double-Precision Floating-Point Values - void andpd(XMMRegister dst, Address src); void andpd(XMMRegister dst, XMMRegister src); + // Bitwise Logical AND of Packed Single-Precision Floating-Point Values + void andps(XMMRegister dst, XMMRegister src); + void bsfl(Register dst, Register src); void bsrl(Register dst, Register src); @@ -837,9 +951,11 @@ private: // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS void comisd(XMMRegister dst, Address src); + void comisd(XMMRegister dst, XMMRegister src); // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS void comiss(XMMRegister dst, Address src); + void comiss(XMMRegister dst, XMMRegister src); // Identify processor type and features void cpuid() { @@ -849,14 +965,19 @@ private: // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value void cvtsd2ss(XMMRegister dst, XMMRegister src); + void cvtsd2ss(XMMRegister dst, Address src); // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value void cvtsi2sdl(XMMRegister dst, Register src); + void cvtsi2sdl(XMMRegister dst, Address src); void cvtsi2sdq(XMMRegister dst, Register src); + void cvtsi2sdq(XMMRegister dst, Address src); // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value void cvtsi2ssl(XMMRegister dst, Register src); + void cvtsi2ssl(XMMRegister dst, Address src); void cvtsi2ssq(XMMRegister dst, Register src); + void cvtsi2ssq(XMMRegister dst, Address src); // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value void cvtdq2pd(XMMRegister dst, XMMRegister src); @@ -866,6 +987,7 @@ private: // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value void cvtss2sd(XMMRegister dst, XMMRegister src); + void cvtss2sd(XMMRegister dst, Address src); // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer void cvttsd2sil(Register dst, Address src); @@ -1140,8 +1262,6 @@ private: void movdq(Register dst, XMMRegister src); // Move Aligned Double Quadword - void movdqa(Address dst, XMMRegister src); - void movdqa(XMMRegister dst, Address src); void movdqa(XMMRegister dst, XMMRegister src); // Move Unaligned Double Quadword @@ -1261,10 +1381,18 @@ private: void orq(Register dst, Address src); void orq(Register dst, Register src); + // Pack with unsigned saturation + void packuswb(XMMRegister dst, XMMRegister src); + void packuswb(XMMRegister dst, Address src); + // SSE4.2 string instructions void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8); void pcmpestri(XMMRegister xmm1, Address src, int imm8); + // SSE4.1 packed move + void pmovzxbw(XMMRegister dst, XMMRegister src); + void pmovzxbw(XMMRegister dst, Address src); + #ifndef _LP64 // no 32bit push/pop on amd64 void popl(Address dst); #endif @@ -1292,6 +1420,7 @@ private: // POR - Bitwise logical OR void por(XMMRegister dst, XMMRegister src); + void por(XMMRegister dst, Address src); // Shuffle Packed Doublewords void pshufd(XMMRegister dst, XMMRegister src, int mode); @@ -1313,6 +1442,11 @@ private: // Interleave Low Bytes void punpcklbw(XMMRegister dst, XMMRegister src); + void punpcklbw(XMMRegister dst, Address src); + + // Interleave Low Doublewords + void punpckldq(XMMRegister dst, XMMRegister src); + void punpckldq(XMMRegister dst, Address src); #ifndef _LP64 // no 32bit push/pop on amd64 void pushl(Address src); @@ -1429,6 +1563,13 @@ private: void xchgq(Register reg, Address adr); void xchgq(Register dst, Register src); + // Get Value of Extended Control Register + void xgetbv() { + emit_byte(0x0F); + emit_byte(0x01); + emit_byte(0xD0); + } + void xorl(Register dst, int32_t imm32); void xorl(Register dst, Address src); void xorl(Register dst, Register src); @@ -1437,14 +1578,44 @@ private: void xorq(Register dst, Register src); // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values - void xorpd(XMMRegister dst, Address src); void xorpd(XMMRegister dst, XMMRegister src); // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values - void xorps(XMMRegister dst, Address src); void xorps(XMMRegister dst, XMMRegister src); void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0 + + // AVX 3-operands instructions (encoded with VEX prefix) + void vaddsd(XMMRegister dst, XMMRegister nds, Address src); + void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vaddss(XMMRegister dst, XMMRegister nds, Address src); + void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vandpd(XMMRegister dst, XMMRegister nds, Address src); + void vandps(XMMRegister dst, XMMRegister nds, Address src); + void vdivsd(XMMRegister dst, XMMRegister nds, Address src); + void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vdivss(XMMRegister dst, XMMRegister nds, Address src); + void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vmulsd(XMMRegister dst, XMMRegister nds, Address src); + void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vmulss(XMMRegister dst, XMMRegister nds, Address src); + void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vsubsd(XMMRegister dst, XMMRegister nds, Address src); + void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vsubss(XMMRegister dst, XMMRegister nds, Address src); + void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src); + void vxorpd(XMMRegister dst, XMMRegister nds, Address src); + void vxorps(XMMRegister dst, XMMRegister nds, Address src); + + + protected: + // Next instructions require address alignment 16 bytes SSE mode. + // They should be called only from corresponding MacroAssembler instructions. + void andpd(XMMRegister dst, Address src); + void andps(XMMRegister dst, Address src); + void xorpd(XMMRegister dst, Address src); + void xorps(XMMRegister dst, Address src); + }; @@ -2175,9 +2346,15 @@ class MacroAssembler: public Assembler { void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); } void andpd(XMMRegister dst, AddressLiteral src); + void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); } + void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); } + void andps(XMMRegister dst, AddressLiteral src); + + void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); } void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); } void comiss(XMMRegister dst, AddressLiteral src); + void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); } void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); } void comisd(XMMRegister dst, AddressLiteral src); @@ -2211,62 +2388,62 @@ private: void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); } void movss(XMMRegister dst, AddressLiteral src); - void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); } + void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); } void movlpd(XMMRegister dst, AddressLiteral src); public: void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); } void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); } - void addsd(XMMRegister dst, AddressLiteral src) { Assembler::addsd(dst, as_Address(src)); } + void addsd(XMMRegister dst, AddressLiteral src); void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); } void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); } - void addss(XMMRegister dst, AddressLiteral src) { Assembler::addss(dst, as_Address(src)); } + void addss(XMMRegister dst, AddressLiteral src); void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); } void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); } - void divsd(XMMRegister dst, AddressLiteral src) { Assembler::divsd(dst, as_Address(src)); } + void divsd(XMMRegister dst, AddressLiteral src); void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); } void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); } - void divss(XMMRegister dst, AddressLiteral src) { Assembler::divss(dst, as_Address(src)); } + void divss(XMMRegister dst, AddressLiteral src); void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); } void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); } void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); } - void movsd(XMMRegister dst, AddressLiteral src) { Assembler::movsd(dst, as_Address(src)); } + void movsd(XMMRegister dst, AddressLiteral src); void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); } void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); } - void mulsd(XMMRegister dst, AddressLiteral src) { Assembler::mulsd(dst, as_Address(src)); } + void mulsd(XMMRegister dst, AddressLiteral src); void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); } void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); } - void mulss(XMMRegister dst, AddressLiteral src) { Assembler::mulss(dst, as_Address(src)); } + void mulss(XMMRegister dst, AddressLiteral src); void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); } void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); } - void sqrtsd(XMMRegister dst, AddressLiteral src) { Assembler::sqrtsd(dst, as_Address(src)); } + void sqrtsd(XMMRegister dst, AddressLiteral src); void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); } void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); } - void sqrtss(XMMRegister dst, AddressLiteral src) { Assembler::sqrtss(dst, as_Address(src)); } + void sqrtss(XMMRegister dst, AddressLiteral src); void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); } void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); } - void subsd(XMMRegister dst, AddressLiteral src) { Assembler::subsd(dst, as_Address(src)); } + void subsd(XMMRegister dst, AddressLiteral src); void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); } void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); } - void subss(XMMRegister dst, AddressLiteral src) { Assembler::subss(dst, as_Address(src)); } + void subss(XMMRegister dst, AddressLiteral src); void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); } - void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); } + void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); } void ucomiss(XMMRegister dst, AddressLiteral src); void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); } - void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); } + void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); } void ucomisd(XMMRegister dst, AddressLiteral src); // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values @@ -2279,6 +2456,53 @@ public: void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } void xorps(XMMRegister dst, AddressLiteral src); + // AVX 3-operands instructions + + void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); } + void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); } + void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); } + void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); } + void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vandpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vandpd(dst, nds, src); } + void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vandps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vandps(dst, nds, src); } + void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); } + void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); } + void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); } + void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); } + void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); } + void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); } + void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); } + void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); } + void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); } + void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); } + void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); } + void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); } + void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vxorpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorpd(dst, nds, src); } + void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); } + void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + // Data void cmov32( Condition cc, Register dst, Address src); diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.inline.hpp b/hotspot/src/cpu/x86/vm/assembler_x86.inline.hpp index 125bf3ffff0..bf299c6da8c 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.inline.hpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.inline.hpp @@ -86,6 +86,7 @@ inline void Assembler::prefix(Address adr, Register reg, bool byteinst) {} inline void Assembler::prefixq(Address adr, Register reg) {} inline void Assembler::prefix(Address adr, XMMRegister reg) {} +inline void Assembler::prefixq(Address adr, XMMRegister reg) {} #else inline void Assembler::emit_long64(jlong x) { *(jlong*) _code_pos = x; diff --git a/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp b/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp index f276df9e537..fe5495dda94 100644 --- a/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp +++ b/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp @@ -320,7 +320,7 @@ void PatchingStub::emit_code(LIR_Assembler* ce) { // begin_initialized_entry_offset has to fit in a byte. Also, we know it's not null. __ load_heap_oop_not_null(tmp2, Address(_obj, java_lang_Class::klass_offset_in_bytes())); __ get_thread(tmp); - __ cmpptr(tmp, Address(tmp2, instanceKlass::init_thread_offset_in_bytes() + sizeof(klassOopDesc))); + __ cmpptr(tmp, Address(tmp2, instanceKlass::init_thread_offset())); __ pop(tmp2); __ pop(tmp); __ jcc(Assembler::notEqual, call_patch); @@ -519,7 +519,7 @@ void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) { __ load_klass(tmp_reg, src_reg); - Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset_in_bytes() + sizeof(oopDesc)); + Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset()); __ cmpl(ref_type_adr, REF_NONE); __ jcc(Assembler::equal, _continuation); diff --git a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp index dbdd7087fa7..3c24feb724b 100644 --- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp @@ -1557,8 +1557,8 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { if (op->init_check()) { - __ cmpl(Address(op->klass()->as_register(), - instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)), + __ cmpb(Address(op->klass()->as_register(), + instanceKlass::init_state_offset()), instanceKlass::fully_initialized); add_debug_info_for_null_check_here(op->stub()->info()); __ jcc(Assembler::notEqual, *op->stub()->entry()); @@ -1730,7 +1730,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L #else __ cmpoop(Address(klass_RInfo, k->super_check_offset()), k->constant_encoding()); #endif // _LP64 - if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) { + if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) { __ jcc(Assembler::notEqual, *failure_target); // successful cast, fall through to profile or jump } else { @@ -1842,7 +1842,7 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { __ load_klass(klass_RInfo, value); // get instance klass (it's already uncompressed) - __ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc))); + __ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset())); // perform the fast part of the checking logic __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); // call out-of-line instance of __ check_klass_subtype_slow_path(...): @@ -3289,8 +3289,7 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { __ load_klass(tmp, dst); } - int lh_offset = klassOopDesc::header_size() * HeapWordSize + - Klass::layout_helper_offset_in_bytes(); + int lh_offset = in_bytes(Klass::layout_helper_offset()); Address klass_lh_addr(tmp, lh_offset); jint objArray_lh = Klass::array_layout_helper(T_OBJECT); __ cmpl(klass_lh_addr, objArray_lh); @@ -3307,9 +3306,9 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { #ifndef _LP64 __ movptr(tmp, dst_klass_addr); - __ movptr(tmp, Address(tmp, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc))); + __ movptr(tmp, Address(tmp, objArrayKlass::element_klass_offset())); __ push(tmp); - __ movl(tmp, Address(tmp, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc))); + __ movl(tmp, Address(tmp, Klass::super_check_offset_offset())); __ push(tmp); __ push(length); __ lea(tmp, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); @@ -3333,15 +3332,15 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { // Allocate abi space for args but be sure to keep stack aligned __ subptr(rsp, 6*wordSize); __ load_klass(c_rarg3, dst); - __ movptr(c_rarg3, Address(c_rarg3, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc))); + __ movptr(c_rarg3, Address(c_rarg3, objArrayKlass::element_klass_offset())); store_parameter(c_rarg3, 4); - __ movl(c_rarg3, Address(c_rarg3, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc))); + __ movl(c_rarg3, Address(c_rarg3, Klass::super_check_offset_offset())); __ call(RuntimeAddress(copyfunc_addr)); __ addptr(rsp, 6*wordSize); #else __ load_klass(c_rarg4, dst); - __ movptr(c_rarg4, Address(c_rarg4, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc))); - __ movl(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc))); + __ movptr(c_rarg4, Address(c_rarg4, objArrayKlass::element_klass_offset())); + __ movl(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset())); __ call(RuntimeAddress(copyfunc_addr)); #endif diff --git a/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp b/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp index d386a99a096..ad507571479 100644 --- a/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp @@ -150,7 +150,7 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register assert_different_registers(obj, klass, len); if (UseBiasedLocking && !len->is_valid()) { assert_different_registers(obj, klass, len, t1, t2); - movptr(t1, Address(klass, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + movptr(t1, Address(klass, Klass::prototype_header_offset())); movptr(Address(obj, oopDesc::mark_offset_in_bytes()), t1); } else { // This assumes that all prototype bits fit in an int32_t diff --git a/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp b/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp index 9d132217058..5f2cf3886ca 100644 --- a/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp +++ b/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp @@ -1011,7 +1011,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { if (id == fast_new_instance_init_check_id) { // make sure the klass is initialized - __ cmpl(Address(klass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)), instanceKlass::fully_initialized); + __ cmpb(Address(klass, instanceKlass::init_state_offset()), instanceKlass::fully_initialized); __ jcc(Assembler::notEqual, slow_path); } @@ -1019,7 +1019,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { // assert object can be fast path allocated { Label ok, not_ok; - __ movl(obj_size, Address(klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc))); + __ movl(obj_size, Address(klass, Klass::layout_helper_offset())); __ cmpl(obj_size, 0); // make sure it's an instance (LH > 0) __ jcc(Assembler::lessEqual, not_ok); __ testl(obj_size, Klass::_lh_instance_slow_path_bit); @@ -1040,7 +1040,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { __ bind(retry_tlab); // get the instance size (size is postive so movl is fine for 64bit) - __ movl(obj_size, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes())); + __ movl(obj_size, Address(klass, Klass::layout_helper_offset())); __ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path); @@ -1052,7 +1052,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { __ bind(try_eden); // get the instance size (size is postive so movl is fine for 64bit) - __ movl(obj_size, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes())); + __ movl(obj_size, Address(klass, Klass::layout_helper_offset())); __ eden_allocate(obj, obj_size, 0, t1, slow_path); __ incr_allocated_bytes(thread, obj_size, 0); @@ -1119,7 +1119,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { { Label ok; Register t0 = obj; - __ movl(t0, Address(klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc))); + __ movl(t0, Address(klass, Klass::layout_helper_offset())); __ sarl(t0, Klass::_lh_array_tag_shift); int tag = ((id == new_type_array_id) ? Klass::_lh_array_tag_type_value @@ -1153,7 +1153,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F)) // since size is positive movl does right thing on 64bit - __ movl(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes())); + __ movl(t1, Address(klass, Klass::layout_helper_offset())); // since size is postive movl does right thing on 64bit __ movl(arr_size, length); assert(t1 == rcx, "fixed register usage"); @@ -1167,7 +1167,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { __ tlab_allocate(obj, arr_size, 0, t1, t2, slow_path); // preserves arr_size __ initialize_header(obj, klass, length, t1, t2); - __ movb(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes() + (Klass::_lh_header_size_shift / BitsPerByte))); + __ movb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte))); assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise"); __ andptr(t1, Klass::_lh_header_size_mask); @@ -1180,7 +1180,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { __ bind(try_eden); // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F)) // since size is positive movl does right thing on 64bit - __ movl(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes())); + __ movl(t1, Address(klass, Klass::layout_helper_offset())); // since size is postive movl does right thing on 64bit __ movl(arr_size, length); assert(t1 == rcx, "fixed register usage"); @@ -1195,7 +1195,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { __ incr_allocated_bytes(thread, arr_size, 0); __ initialize_header(obj, klass, length, t1, t2); - __ movb(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes() + (Klass::_lh_header_size_shift / BitsPerByte))); + __ movb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte))); assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise"); __ andptr(t1, Klass::_lh_header_size_mask); @@ -1267,7 +1267,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { Label register_finalizer; Register t = rsi; __ load_klass(t, rax); - __ movl(t, Address(t, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc))); + __ movl(t, Address(t, Klass::access_flags_offset())); __ testl(t, JVM_ACC_HAS_FINALIZER); __ jcc(Assembler::notZero, register_finalizer); __ ret(0); diff --git a/hotspot/src/cpu/x86/vm/cppInterpreter_x86.cpp b/hotspot/src/cpu/x86/vm/cppInterpreter_x86.cpp index 226c6cbc6e6..b9a5c22934f 100644 --- a/hotspot/src/cpu/x86/vm/cppInterpreter_x86.cpp +++ b/hotspot/src/cpu/x86/vm/cppInterpreter_x86.cpp @@ -511,7 +511,7 @@ void CppInterpreterGenerator::generate_compute_interpreter_state(const Register // get synchronization object Label done; - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); __ movl(rax, access_flags); __ testl(rax, JVM_ACC_STATIC); __ movptr(rax, Address(locals, 0)); // get receiver (assume this is frequent case) @@ -763,7 +763,7 @@ void InterpreterGenerator::lock_method(void) { #endif // ASSERT // get synchronization object { Label done; - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); __ movl(rax, access_flags); __ movptr(rdi, STATE(_locals)); // prepare to get receiver (assume common case) __ testl(rax, JVM_ACC_STATIC); @@ -1180,7 +1180,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { // pass mirror handle if static call { Label L; - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); __ movl(t, Address(method, methodOopDesc::access_flags_offset())); __ testl(t, JVM_ACC_STATIC); __ jcc(Assembler::zero, L); diff --git a/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp b/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp index 254d087df96..7d987e58829 100644 --- a/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp +++ b/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp @@ -1160,7 +1160,7 @@ void MethodHandles::generate_method_handle_stub(MacroAssembler* _masm, MethodHan Address rcx_amh_conversion( rcx_recv, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes() ); Address vmarg; // __ argument_address(vmargslot) - const int java_mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int java_mirror_offset = in_bytes(Klass::java_mirror_offset()); if (have_entry(ek)) { __ nop(); // empty stubs make SG sick diff --git a/hotspot/src/cpu/x86/vm/nativeInst_x86.cpp b/hotspot/src/cpu/x86/vm/nativeInst_x86.cpp index 7ec07737f61..1cf509992ca 100644 --- a/hotspot/src/cpu/x86/vm/nativeInst_x86.cpp +++ b/hotspot/src/cpu/x86/vm/nativeInst_x86.cpp @@ -237,9 +237,21 @@ int NativeMovRegMem::instruction_start() const { int off = 0; u_char instr_0 = ubyte_at(off); + // See comment in Assembler::locate_operand() about VEX prefixes. + if (instr_0 == instruction_VEX_prefix_2bytes) { + assert((UseAVX > 0), "shouldn't have VEX prefix"); + NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions")); + return 2; + } + if (instr_0 == instruction_VEX_prefix_3bytes) { + assert((UseAVX > 0), "shouldn't have VEX prefix"); + NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions")); + return 3; + } + // First check to see if we have a (prefixed or not) xor - if ( instr_0 >= instruction_prefix_wide_lo && // 0x40 - instr_0 <= instruction_prefix_wide_hi) { // 0x4f + if (instr_0 >= instruction_prefix_wide_lo && // 0x40 + instr_0 <= instruction_prefix_wide_hi) { // 0x4f off++; instr_0 = ubyte_at(off); } @@ -256,13 +268,13 @@ int NativeMovRegMem::instruction_start() const { instr_0 = ubyte_at(off); } - if ( instr_0 == instruction_code_xmm_ss_prefix || // 0xf3 + if ( instr_0 == instruction_code_xmm_ss_prefix || // 0xf3 instr_0 == instruction_code_xmm_sd_prefix) { // 0xf2 off++; instr_0 = ubyte_at(off); } - if ( instr_0 >= instruction_prefix_wide_lo && // 0x40 + if ( instr_0 >= instruction_prefix_wide_lo && // 0x40 instr_0 <= instruction_prefix_wide_hi) { // 0x4f off++; instr_0 = ubyte_at(off); diff --git a/hotspot/src/cpu/x86/vm/nativeInst_x86.hpp b/hotspot/src/cpu/x86/vm/nativeInst_x86.hpp index fc7a1ab0753..470e971fe48 100644 --- a/hotspot/src/cpu/x86/vm/nativeInst_x86.hpp +++ b/hotspot/src/cpu/x86/vm/nativeInst_x86.hpp @@ -287,6 +287,9 @@ class NativeMovRegMem: public NativeInstruction { instruction_code_xmm_store = 0x11, instruction_code_xmm_lpd = 0x12, + instruction_VEX_prefix_2bytes = Assembler::VEX_2bytes, + instruction_VEX_prefix_3bytes = Assembler::VEX_3bytes, + instruction_size = 4, instruction_offset = 0, data_offset = 2, diff --git a/hotspot/src/cpu/x86/vm/register_definitions_x86.cpp b/hotspot/src/cpu/x86/vm/register_definitions_x86.cpp index f1fd229b96b..7165872c239 100644 --- a/hotspot/src/cpu/x86/vm/register_definitions_x86.cpp +++ b/hotspot/src/cpu/x86/vm/register_definitions_x86.cpp @@ -53,6 +53,7 @@ REGISTER_DEFINITION(Register, r14); REGISTER_DEFINITION(Register, r15); #endif // AMD64 +REGISTER_DEFINITION(XMMRegister, xnoreg); REGISTER_DEFINITION(XMMRegister, xmm0 ); REGISTER_DEFINITION(XMMRegister, xmm1 ); REGISTER_DEFINITION(XMMRegister, xmm2 ); @@ -115,6 +116,7 @@ REGISTER_DEFINITION(Register, r12_heapbase); REGISTER_DEFINITION(Register, r15_thread); #endif // AMD64 +REGISTER_DEFINITION(MMXRegister, mnoreg ); REGISTER_DEFINITION(MMXRegister, mmx0 ); REGISTER_DEFINITION(MMXRegister, mmx1 ); REGISTER_DEFINITION(MMXRegister, mmx2 ); diff --git a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp index fb85fffd77b..4d4e66f600b 100644 --- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp +++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp @@ -1374,8 +1374,7 @@ class StubGenerator: public StubCodeGenerator { // L_success, L_failure, NULL); assert_different_registers(sub_klass, temp); - int sc_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::secondary_super_cache_offset_in_bytes()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); // if the pointers are equal, we are done (e.g., String[] elements) __ cmpptr(sub_klass, super_klass_addr); @@ -1787,8 +1786,7 @@ class StubGenerator: public StubCodeGenerator { // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 // - int lh_offset = klassOopDesc::header_size() * HeapWordSize + - Klass::layout_helper_offset_in_bytes(); + int lh_offset = in_bytes(Klass::layout_helper_offset()); Address src_klass_lh_addr(rcx_src_klass, lh_offset); // Handle objArrays completely differently... @@ -1914,10 +1912,8 @@ class StubGenerator: public StubCodeGenerator { // live at this point: rcx_src_klass, dst[_pos], src[_pos] { // Handy offsets: - int ek_offset = (klassOopDesc::header_size() * HeapWordSize + - objArrayKlass::element_klass_offset_in_bytes()); - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::super_check_offset_offset_in_bytes()); + int ek_offset = in_bytes(objArrayKlass::element_klass_offset()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); Register rsi_dst_klass = rsi; Register rdi_temp = rdi; @@ -2323,6 +2319,9 @@ class StubGenerator: public StubCodeGenerator { generate_throw_exception("WrongMethodTypeException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_WrongMethodTypeException), rax, rcx); + + // Build this early so it's available for the interpreter + StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError)); } @@ -2334,7 +2333,6 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError)); StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError)); StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call)); - StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError)); //------------------------------------------------------------------------------------------------------------------------ // entry points that are platform specific diff --git a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp index 86104223fd4..a6fcc782094 100644 --- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp @@ -2261,8 +2261,7 @@ class StubGenerator: public StubCodeGenerator { // The ckoff and ckval must be mutually consistent, // even though caller generates both. { Label L; - int sco_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::super_check_offset_offset_in_bytes()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); __ cmpl(ckoff, Address(ckval, sco_offset)); __ jcc(Assembler::equal, L); __ stop("super_check_offset inconsistent"); @@ -2572,8 +2571,7 @@ class StubGenerator: public StubCodeGenerator { // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 // - const int lh_offset = klassOopDesc::header_size() * HeapWordSize + - Klass::layout_helper_offset_in_bytes(); + const int lh_offset = in_bytes(Klass::layout_helper_offset()); // Handle objArrays completely differently... const jint objArray_lh = Klass::array_layout_helper(T_OBJECT); @@ -2722,15 +2720,13 @@ class StubGenerator: public StubCodeGenerator { assert_clean_int(count, sco_temp); // Generate the type check. - const int sco_offset = (klassOopDesc::header_size() * HeapWordSize + - Klass::super_check_offset_offset_in_bytes()); + const int sco_offset = in_bytes(Klass::super_check_offset_offset()); __ movl(sco_temp, Address(r11_dst_klass, sco_offset)); assert_clean_int(sco_temp, rax); generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy); // Fetch destination element klass from the objArrayKlass header. - int ek_offset = (klassOopDesc::header_size() * HeapWordSize + - objArrayKlass::element_klass_offset_in_bytes()); + int ek_offset = in_bytes(objArrayKlass::element_klass_offset()); __ movptr(r11_dst_klass, Address(r11_dst_klass, ek_offset)); __ movl( sco_temp, Address(r11_dst_klass, sco_offset)); assert_clean_int(sco_temp, rax); @@ -3072,6 +3068,13 @@ class StubGenerator: public StubCodeGenerator { generate_throw_exception("WrongMethodTypeException throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_WrongMethodTypeException), rax, rcx); + + // Build this early so it's available for the interpreter. + StubRoutines::_throw_StackOverflowError_entry = + generate_throw_exception("StackOverflowError throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime:: + throw_StackOverflowError)); } void generate_all() { @@ -3098,12 +3101,6 @@ class StubGenerator: public StubCodeGenerator { SharedRuntime:: throw_NullPointerException_at_call)); - StubRoutines::_throw_StackOverflowError_entry = - generate_throw_exception("StackOverflowError throw_exception", - CAST_FROM_FN_PTR(address, - SharedRuntime:: - throw_StackOverflowError)); - // entry points that are platform specific StubRoutines::x86::_f2i_fixup = generate_f2i_fixup(); StubRoutines::x86::_f2l_fixup = generate_f2l_fixup(); diff --git a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp b/hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp index 6f8e35afdf4..29533832ef6 100644 --- a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp +++ b/hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp @@ -522,9 +522,18 @@ void InterpreterGenerator::generate_stack_overflow_check(void) { __ pop(rsi); // get saved bcp / (c++ prev state ). - __ pop(rax); // get return address - __ jump(ExternalAddress(Interpreter::throw_StackOverflowError_entry())); + // Restore sender's sp as SP. This is necessary if the sender's + // frame is an extended compiled frame (see gen_c2i_adapter()) + // and safer anyway in case of JSR292 adaptations. + __ pop(rax); // return address must be moved if SP is changed + __ mov(rsp, rsi); + __ push(rax); + + // Note: the restored frame is not necessarily interpreted. + // Use the shared runtime version of the StackOverflowError. + assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); + __ jump(ExternalAddress(StubRoutines::throw_StackOverflowError_entry())); // all done with frame size check __ bind(after_frame_check_pop); __ pop(rsi); @@ -552,7 +561,7 @@ void InterpreterGenerator::lock_method(void) { #endif // ASSERT // get synchronization object { Label done; - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); __ movl(rax, access_flags); __ testl(rax, JVM_ACC_STATIC); __ movptr(rax, Address(rdi, Interpreter::local_offset_in_bytes(0))); // get receiver (assume this is frequent case) @@ -1012,7 +1021,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { // pass mirror handle if static call { Label L; - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); __ movl(t, Address(method, methodOopDesc::access_flags_offset())); __ testl(t, JVM_ACC_STATIC); __ jcc(Assembler::zero, L); diff --git a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp b/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp index 40c7d63e165..110d8ebdf3c 100644 --- a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp @@ -467,8 +467,18 @@ void InterpreterGenerator::generate_stack_overflow_check(void) { __ cmpptr(rsp, rax); __ jcc(Assembler::above, after_frame_check); - __ pop(rax); // get return address - __ jump(ExternalAddress(Interpreter::throw_StackOverflowError_entry())); + // Restore sender's sp as SP. This is necessary if the sender's + // frame is an extended compiled frame (see gen_c2i_adapter()) + // and safer anyway in case of JSR292 adaptations. + + __ pop(rax); // return address must be moved if SP is changed + __ mov(rsp, r13); + __ push(rax); + + // Note: the restored frame is not necessarily interpreted. + // Use the shared runtime version of the StackOverflowError. + assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); + __ jump(ExternalAddress(StubRoutines::throw_StackOverflowError_entry())); // all done with frame size check __ bind(after_frame_check); @@ -505,8 +515,7 @@ void InterpreterGenerator::lock_method(void) { // get synchronization object { - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + - Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); Label done; __ movl(rax, access_flags); __ testl(rax, JVM_ACC_STATIC); @@ -1006,8 +1015,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { // pass mirror handle if static call { Label L; - const int mirror_offset = klassOopDesc::klass_part_offset_in_bytes() + - Klass::java_mirror_offset_in_bytes(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); __ movl(t, Address(method, methodOopDesc::access_flags_offset())); __ testl(t, JVM_ACC_STATIC); __ jcc(Assembler::zero, L); diff --git a/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp b/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp index 65e65ef5b51..1cbc67e6060 100644 --- a/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp +++ b/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp @@ -980,7 +980,7 @@ void TemplateTable::aastore() { __ load_klass(rbx, rax); // Move superklass into EAX __ load_klass(rax, rdx); - __ movptr(rax, Address(rax, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes())); + __ movptr(rax, Address(rax, objArrayKlass::element_klass_offset())); // Compress array+index*wordSize+12 into a single register. Frees ECX. __ lea(rdx, element_address); @@ -2033,7 +2033,7 @@ void TemplateTable::_return(TosState state) { assert(state == vtos, "only valid state"); __ movptr(rax, aaddress(0)); __ load_klass(rdi, rax); - __ movl(rdi, Address(rdi, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc))); + __ movl(rdi, Address(rdi, Klass::access_flags_offset())); __ testl(rdi, JVM_ACC_HAS_FINALIZER); Label skip_register_finalizer; __ jcc(Assembler::zero, skip_register_finalizer); @@ -3188,11 +3188,11 @@ void TemplateTable::_new() { // make sure klass is initialized & doesn't have finalizer // make sure klass is fully initialized - __ cmpl(Address(rcx, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc)), instanceKlass::fully_initialized); + __ cmpb(Address(rcx, instanceKlass::init_state_offset()), instanceKlass::fully_initialized); __ jcc(Assembler::notEqual, slow_case); // get instance_size in instanceKlass (scaled to a count of bytes) - __ movl(rdx, Address(rcx, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc))); + __ movl(rdx, Address(rcx, Klass::layout_helper_offset())); // test to see if it has a finalizer or is malformed in some way __ testl(rdx, Klass::_lh_instance_slow_path_bit); __ jcc(Assembler::notZero, slow_case); @@ -3293,7 +3293,7 @@ void TemplateTable::_new() { __ bind(initialize_header); if (UseBiasedLocking) { __ pop(rcx); // get saved klass back in the register. - __ movptr(rbx, Address(rcx, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + __ movptr(rbx, Address(rcx, Klass::prototype_header_offset())); __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()), rbx); } else { __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()), diff --git a/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp b/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp index 818fb44e0a5..0e5ac274f36 100644 --- a/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp @@ -1004,8 +1004,7 @@ void TemplateTable::aastore() { // Move superklass into rax __ load_klass(rax, rdx); __ movptr(rax, Address(rax, - sizeof(oopDesc) + - objArrayKlass::element_klass_offset_in_bytes())); + objArrayKlass::element_klass_offset())); // Compress array + index*oopSize + 12 into a single register. Frees rcx. __ lea(rdx, element_address); @@ -2067,7 +2066,7 @@ void TemplateTable::_return(TosState state) { assert(state == vtos, "only valid state"); __ movptr(c_rarg1, aaddress(0)); __ load_klass(rdi, c_rarg1); - __ movl(rdi, Address(rdi, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc))); + __ movl(rdi, Address(rdi, Klass::access_flags_offset())); __ testl(rdi, JVM_ACC_HAS_FINALIZER); Label skip_register_finalizer; __ jcc(Assembler::zero, skip_register_finalizer); @@ -3235,16 +3234,15 @@ void TemplateTable::_new() { // make sure klass is initialized & doesn't have finalizer // make sure klass is fully initialized - __ cmpl(Address(rsi, - instanceKlass::init_state_offset_in_bytes() + - sizeof(oopDesc)), + __ cmpb(Address(rsi, + instanceKlass::init_state_offset()), instanceKlass::fully_initialized); __ jcc(Assembler::notEqual, slow_case); // get instance_size in instanceKlass (scaled to a count of bytes) __ movl(rdx, Address(rsi, - Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc))); + Klass::layout_helper_offset())); // test to see if it has a finalizer or is malformed in some way __ testl(rdx, Klass::_lh_instance_slow_path_bit); __ jcc(Assembler::notZero, slow_case); @@ -3337,7 +3335,7 @@ void TemplateTable::_new() { // initialize object header only. __ bind(initialize_header); if (UseBiasedLocking) { - __ movptr(rscratch1, Address(rsi, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); + __ movptr(rscratch1, Address(rsi, Klass::prototype_header_offset())); __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()), rscratch1); } else { __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()), diff --git a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp index 7a8ee727b85..2155d767980 100644 --- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp +++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp @@ -50,7 +50,7 @@ const char* VM_Version::_features_str = ""; VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; static BufferBlob* stub_blob; -static const int stub_size = 400; +static const int stub_size = 550; extern "C" { typedef void (*getPsrInfo_stub_t)(void*); @@ -73,7 +73,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator { const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; - Label ext_cpuid1, ext_cpuid5, done; + Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done; StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub"); # define __ _masm-> @@ -229,14 +229,51 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ movl(Address(rsi, 8), rcx); __ movl(Address(rsi,12), rdx); + // + // Check if OS has enabled XGETBV instruction to access XCR0 + // (OSXSAVE feature flag) and CPU supports AVX + // + __ andl(rcx, 0x18000000); + __ cmpl(rcx, 0x18000000); + __ jccb(Assembler::notEqual, sef_cpuid); + + // + // XCR0, XFEATURE_ENABLED_MASK register + // + __ xorl(rcx, rcx); // zero for XCR0 register + __ xgetbv(); + __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); + __ movl(Address(rsi, 0), rax); + __ movl(Address(rsi, 4), rdx); + + // + // cpuid(0x7) Structured Extended Features + // + __ bind(sef_cpuid); + __ movl(rax, 7); + __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? + __ jccb(Assembler::greater, ext_cpuid); + + __ xorl(rcx, rcx); + __ cpuid(); + __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); + __ movl(Address(rsi, 0), rax); + __ movl(Address(rsi, 4), rbx); + + // + // Extended cpuid(0x80000000) + // + __ bind(ext_cpuid); __ movl(rax, 0x80000000); __ cpuid(); __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? __ jcc(Assembler::belowEqual, done); __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? __ jccb(Assembler::belowEqual, ext_cpuid1); - __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? + __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? __ jccb(Assembler::belowEqual, ext_cpuid5); + __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? + __ jccb(Assembler::belowEqual, ext_cpuid7); // // Extended cpuid(0x80000008) // @@ -248,6 +285,18 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ movl(Address(rsi, 8), rcx); __ movl(Address(rsi,12), rdx); + // + // Extended cpuid(0x80000007) + // + __ bind(ext_cpuid7); + __ movl(rax, 0x80000007); + __ cpuid(); + __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); + __ movl(Address(rsi, 0), rax); + __ movl(Address(rsi, 4), rbx); + __ movl(Address(rsi, 8), rcx); + __ movl(Address(rsi,12), rdx); + // // Extended cpuid(0x80000005) // @@ -359,13 +408,19 @@ void VM_Version::get_processor_features() { if (UseSSE < 1) _cpuFeatures &= ~CPU_SSE; + if (UseAVX < 2) + _cpuFeatures &= ~CPU_AVX2; + + if (UseAVX < 1) + _cpuFeatures &= ~CPU_AVX; + if (logical_processors_per_package() == 1) { // HT processor could be installed on a system which doesn't support HT. _cpuFeatures &= ~CPU_HT; } char buf[256]; - jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", cores_per_cpu(), threads_per_core(), cpu_family(), _model, _stepping, (supports_cmov() ? ", cmov" : ""), @@ -379,27 +434,39 @@ void VM_Version::get_processor_features() { (supports_sse4_1() ? ", sse4.1" : ""), (supports_sse4_2() ? ", sse4.2" : ""), (supports_popcnt() ? ", popcnt" : ""), + (supports_avx() ? ", avx" : ""), + (supports_avx2() ? ", avx2" : ""), (supports_mmx_ext() ? ", mmxext" : ""), (supports_3dnow_prefetch() ? ", 3dnowpref" : ""), (supports_lzcnt() ? ", lzcnt": ""), (supports_sse4a() ? ", sse4a": ""), - (supports_ht() ? ", ht": "")); + (supports_ht() ? ", ht": ""), + (supports_tsc() ? ", tsc": ""), + (supports_tscinv_bit() ? ", tscinvbit": ""), + (supports_tscinv() ? ", tscinv": "")); _features_str = strdup(buf); // UseSSE is set to the smaller of what hardware supports and what // the command line requires. I.e., you cannot set UseSSE to 2 on // older Pentiums which do not support it. - if( UseSSE > 4 ) UseSSE=4; - if( UseSSE < 0 ) UseSSE=0; - if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support + if (UseSSE > 4) UseSSE=4; + if (UseSSE < 0) UseSSE=0; + if (!supports_sse4_1()) // Drop to 3 if no SSE4 support UseSSE = MIN2((intx)3,UseSSE); - if( !supports_sse3() ) // Drop to 2 if no SSE3 support + if (!supports_sse3()) // Drop to 2 if no SSE3 support UseSSE = MIN2((intx)2,UseSSE); - if( !supports_sse2() ) // Drop to 1 if no SSE2 support + if (!supports_sse2()) // Drop to 1 if no SSE2 support UseSSE = MIN2((intx)1,UseSSE); - if( !supports_sse () ) // Drop to 0 if no SSE support + if (!supports_sse ()) // Drop to 0 if no SSE support UseSSE = 0; + if (UseAVX > 2) UseAVX=2; + if (UseAVX < 0) UseAVX=0; + if (!supports_avx2()) // Drop to 1 if no AVX2 support + UseAVX = MIN2((intx)1,UseAVX); + if (!supports_avx ()) // Drop to 0 if no AVX support + UseAVX = 0; + // On new cpus instructions which update whole XMM register should be used // to prevent partial register stall due to dependencies on high half. // @@ -534,6 +601,9 @@ void VM_Version::get_processor_features() { if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { UsePopCountInstruction = true; } + } else if (UsePopCountInstruction) { + warning("POPCNT instruction is not available on this CPU"); + FLAG_SET_DEFAULT(UsePopCountInstruction, false); } #ifdef COMPILER2 @@ -605,7 +675,11 @@ void VM_Version::get_processor_features() { if (PrintMiscellaneous && Verbose) { tty->print_cr("Logical CPUs per core: %u", logical_processors_per_package()); - tty->print_cr("UseSSE=%d",UseSSE); + tty->print("UseSSE=%d",UseSSE); + if (UseAVX > 0) { + tty->print(" UseAVX=%d",UseAVX); + } + tty->cr(); tty->print("Allocation"); if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) { tty->print_cr(": no prefetching"); diff --git a/hotspot/src/cpu/x86/vm/vm_version_x86.hpp b/hotspot/src/cpu/x86/vm/vm_version_x86.hpp index 47d81e01be6..27f3bde57d5 100644 --- a/hotspot/src/cpu/x86/vm/vm_version_x86.hpp +++ b/hotspot/src/cpu/x86/vm/vm_version_x86.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -78,7 +78,10 @@ public: sse4_2 : 1, : 2, popcnt : 1, - : 8; + : 3, + osxsave : 1, + avx : 1, + : 3; } bits; }; @@ -168,6 +171,15 @@ public: } bits; }; + union ExtCpuid7Edx { + uint32_t value; + struct { + uint32_t : 8, + tsc_invariance : 1, + : 23; + } bits; + }; + union ExtCpuid8Ecx { uint32_t value; struct { @@ -176,32 +188,75 @@ public: } bits; }; -protected: - static int _cpu; - static int _model; - static int _stepping; - static int _cpuFeatures; // features returned by the "cpuid" instruction - // 0 if this instruction is not available - static const char* _features_str; + union SefCpuid7Eax { + uint32_t value; + }; - enum { - CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) - CPU_CMOV = (1 << 1), - CPU_FXSR = (1 << 2), - CPU_HT = (1 << 3), - CPU_MMX = (1 << 4), - CPU_3DNOW_PREFETCH = (1 << 5), // Processor supports 3dnow prefetch and prefetchw instructions - // may not necessarily support other 3dnow instructions - CPU_SSE = (1 << 6), - CPU_SSE2 = (1 << 7), - CPU_SSE3 = (1 << 8), // SSE3 comes from cpuid 1 (ECX) - CPU_SSSE3 = (1 << 9), - CPU_SSE4A = (1 << 10), - CPU_SSE4_1 = (1 << 11), - CPU_SSE4_2 = (1 << 12), - CPU_POPCNT = (1 << 13), - CPU_LZCNT = (1 << 14) - } cpuFeatureFlags; + union SefCpuid7Ebx { + uint32_t value; + struct { + uint32_t fsgsbase : 1, + : 2, + bmi1 : 1, + : 1, + avx2 : 1, + : 2, + bmi2 : 1, + : 23; + } bits; + }; + + union XemXcr0Eax { + uint32_t value; + struct { + uint32_t x87 : 1, + sse : 1, + ymm : 1, + : 29; + } bits; + }; + +protected: + static int _cpu; + static int _model; + static int _stepping; + static int _cpuFeatures; // features returned by the "cpuid" instruction + // 0 if this instruction is not available + static const char* _features_str; + + enum { + CPU_CX8 = (1 << 0), // next bits are from cpuid 1 (EDX) + CPU_CMOV = (1 << 1), + CPU_FXSR = (1 << 2), + CPU_HT = (1 << 3), + CPU_MMX = (1 << 4), + CPU_3DNOW_PREFETCH = (1 << 5), // Processor supports 3dnow prefetch and prefetchw instructions + // may not necessarily support other 3dnow instructions + CPU_SSE = (1 << 6), + CPU_SSE2 = (1 << 7), + CPU_SSE3 = (1 << 8), // SSE3 comes from cpuid 1 (ECX) + CPU_SSSE3 = (1 << 9), + CPU_SSE4A = (1 << 10), + CPU_SSE4_1 = (1 << 11), + CPU_SSE4_2 = (1 << 12), + CPU_POPCNT = (1 << 13), + CPU_LZCNT = (1 << 14), + CPU_TSC = (1 << 15), + CPU_TSCINV = (1 << 16), + CPU_AVX = (1 << 17), + CPU_AVX2 = (1 << 18) + } cpuFeatureFlags; + + enum { + // AMD + CPU_FAMILY_AMD_11H = 17, + // Intel + CPU_FAMILY_INTEL_CORE = 6, + CPU_MODEL_NEHALEM_EP = 26, + CPU_MODEL_WESTMERE_EP = 44, +// CPU_MODEL_IVYBRIDGE_EP = ??, TODO - get real value + CPU_MODEL_SANDYBRIDGE_EP = 45 + } cpuExtendedFamily; // cpuid information block. All info derived from executing cpuid with // various function numbers is stored here. Intel and AMD info is @@ -228,6 +283,12 @@ protected: uint32_t dcp_cpuid4_ecx; // unused currently uint32_t dcp_cpuid4_edx; // unused currently + // cpuid function 7 (structured extended features) + SefCpuid7Eax sef_cpuid7_eax; + SefCpuid7Ebx sef_cpuid7_ebx; + uint32_t sef_cpuid7_ecx; // unused currently + uint32_t sef_cpuid7_edx; // unused currently + // cpuid function 0xB (processor topology) // ecx = 0 uint32_t tpl_cpuidB0_eax; @@ -270,11 +331,21 @@ protected: ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD) ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD) + // cpuid function 0x80000007 + uint32_t ext_cpuid7_eax; // reserved + uint32_t ext_cpuid7_ebx; // reserved + uint32_t ext_cpuid7_ecx; // reserved + ExtCpuid7Edx ext_cpuid7_edx; // tscinv + // cpuid function 0x80000008 uint32_t ext_cpuid8_eax; // unused currently uint32_t ext_cpuid8_ebx; // reserved ExtCpuid8Ecx ext_cpuid8_ecx; uint32_t ext_cpuid8_edx; // reserved + + // extended control register XCR0 (the XFEATURE_ENABLED_MASK register) + XemXcr0Eax xem_xcr0_eax; + uint32_t xem_xcr0_edx; // reserved }; // The actual cpuid info block @@ -286,19 +357,23 @@ protected: result += _cpuid_info.std_cpuid1_eax.bits.ext_family; return result; } + static uint32_t extended_cpu_model() { uint32_t result = _cpuid_info.std_cpuid1_eax.bits.model; result |= _cpuid_info.std_cpuid1_eax.bits.ext_model << 4; return result; } + static uint32_t cpu_stepping() { uint32_t result = _cpuid_info.std_cpuid1_eax.bits.stepping; return result; } + static uint logical_processor_count() { uint result = threads_per_core(); return result; } + static uint32_t feature_flags() { uint32_t result = 0; if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) @@ -328,6 +403,18 @@ protected: result |= CPU_SSE4_2; if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) result |= CPU_POPCNT; + if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 && + _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 && + _cpuid_info.xem_xcr0_eax.bits.sse != 0 && + _cpuid_info.xem_xcr0_eax.bits.ymm != 0) { + result |= CPU_AVX; + if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0) + result |= CPU_AVX2; + } + if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0) + result |= CPU_TSC; + if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0) + result |= CPU_TSCINV; // AMD features. if (is_amd()) { @@ -350,12 +437,15 @@ public: static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); } static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); } static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); } + static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); } static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); } static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); } + static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); } static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); } static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); } static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); } static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); } + static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); } // Initialization static void initialize(); @@ -382,7 +472,6 @@ public: // static int cpu_family() { return _cpu;} static bool is_P6() { return cpu_family() >= 6; } - static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA' static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG' @@ -447,14 +536,51 @@ public: static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } static bool supports_popcnt() { return (_cpuFeatures & CPU_POPCNT) != 0; } - // + static bool supports_avx() { return (_cpuFeatures & CPU_AVX) != 0; } + static bool supports_avx2() { return (_cpuFeatures & CPU_AVX2) != 0; } + static bool supports_tsc() { return (_cpuFeatures & CPU_TSC) != 0; } + + // Intel features + static bool is_intel_family_core() { return is_intel() && + extended_cpu_family() == CPU_FAMILY_INTEL_CORE; } + + static bool is_intel_tsc_synched_at_init() { + if (is_intel_family_core()) { + uint32_t ext_model = extended_cpu_model(); + if (ext_model == CPU_MODEL_NEHALEM_EP || + ext_model == CPU_MODEL_WESTMERE_EP || +// TODO ext_model == CPU_MODEL_IVYBRIDGE_EP || + ext_model == CPU_MODEL_SANDYBRIDGE_EP) { + // 2-socket invtsc support. EX versions with 4 sockets are not + // guaranteed to synchronize tscs at initialization via a double + // handshake. The tscs can be explicitly set in software. Code + // that uses tsc values must be prepared for them to arbitrarily + // jump backward or forward. + return true; + } + } + return false; + } + // AMD features - // static bool supports_3dnow_prefetch() { return (_cpuFeatures & CPU_3DNOW_PREFETCH) != 0; } static bool supports_mmx_ext() { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; } static bool supports_lzcnt() { return (_cpuFeatures & CPU_LZCNT) != 0; } static bool supports_sse4a() { return (_cpuFeatures & CPU_SSE4A) != 0; } + static bool is_amd_Barcelona() { return is_amd() && + extended_cpu_family() == CPU_FAMILY_AMD_11H; } + + // Intel and AMD newer cores support fast timestamps well + static bool supports_tscinv_bit() { + return (_cpuFeatures & CPU_TSCINV) != 0; + } + static bool supports_tscinv() { + return supports_tscinv_bit() && + ( (is_amd() && !is_amd_Barcelona()) || + is_intel_tsc_synched_at_init() ); + } + // Intel Core and newer cpus have fast IDIV instruction (excluding Atom). static bool has_fast_idiv() { return is_intel() && cpu_family() == 6 && supports_sse3() && _model != 0x1C; } diff --git a/hotspot/src/cpu/x86/vm/x86.ad b/hotspot/src/cpu/x86/vm/x86.ad new file mode 100644 index 00000000000..5f165a9ff12 --- /dev/null +++ b/hotspot/src/cpu/x86/vm/x86.ad @@ -0,0 +1,777 @@ +// +// Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +// X86 Common Architecture Description File + +source %{ + // Float masks come from different places depending on platform. +#ifdef _LP64 + static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } + static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } + static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } + static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } +#else + static address float_signmask() { return (address)float_signmask_pool; } + static address float_signflip() { return (address)float_signflip_pool; } + static address double_signmask() { return (address)double_signmask_pool; } + static address double_signflip() { return (address)double_signflip_pool; } +#endif +%} + +// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) + +instruct addF_reg(regF dst, regF src) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (AddF dst src)); + + format %{ "addss $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ addss($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct addF_mem(regF dst, memory src) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (AddF dst (LoadF src))); + + format %{ "addss $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ addss($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct addF_imm(regF dst, immF con) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (AddF dst con)); + format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} + ins_cost(150); + ins_encode %{ + __ addss($dst$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddF_reg(regF dst, regF src1, regF src2) %{ + predicate(UseAVX > 0); + match(Set dst (AddF src1 src2)); + + format %{ "vaddss $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddF_mem(regF dst, regF src1, memory src2) %{ + predicate(UseAVX > 0); + match(Set dst (AddF src1 (LoadF src2))); + + format %{ "vaddss $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddF_imm(regF dst, regF src, immF con) %{ + predicate(UseAVX > 0); + match(Set dst (AddF src con)); + + format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} + ins_cost(150); + ins_encode %{ + __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct addD_reg(regD dst, regD src) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (AddD dst src)); + + format %{ "addsd $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ addsd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct addD_mem(regD dst, memory src) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (AddD dst (LoadD src))); + + format %{ "addsd $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ addsd($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct addD_imm(regD dst, immD con) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (AddD dst con)); + format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} + ins_cost(150); + ins_encode %{ + __ addsd($dst$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddD_reg(regD dst, regD src1, regD src2) %{ + predicate(UseAVX > 0); + match(Set dst (AddD src1 src2)); + + format %{ "vaddsd $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddD_mem(regD dst, regD src1, memory src2) %{ + predicate(UseAVX > 0); + match(Set dst (AddD src1 (LoadD src2))); + + format %{ "vaddsd $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct vaddD_imm(regD dst, regD src, immD con) %{ + predicate(UseAVX > 0); + match(Set dst (AddD src con)); + + format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} + ins_cost(150); + ins_encode %{ + __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct subF_reg(regF dst, regF src) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (SubF dst src)); + + format %{ "subss $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ subss($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct subF_mem(regF dst, memory src) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (SubF dst (LoadF src))); + + format %{ "subss $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ subss($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct subF_imm(regF dst, immF con) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (SubF dst con)); + format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} + ins_cost(150); + ins_encode %{ + __ subss($dst$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubF_reg(regF dst, regF src1, regF src2) %{ + predicate(UseAVX > 0); + match(Set dst (SubF src1 src2)); + + format %{ "vsubss $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubF_mem(regF dst, regF src1, memory src2) %{ + predicate(UseAVX > 0); + match(Set dst (SubF src1 (LoadF src2))); + + format %{ "vsubss $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubF_imm(regF dst, regF src, immF con) %{ + predicate(UseAVX > 0); + match(Set dst (SubF src con)); + + format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} + ins_cost(150); + ins_encode %{ + __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct subD_reg(regD dst, regD src) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (SubD dst src)); + + format %{ "subsd $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ subsd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct subD_mem(regD dst, memory src) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (SubD dst (LoadD src))); + + format %{ "subsd $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ subsd($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct subD_imm(regD dst, immD con) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (SubD dst con)); + format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} + ins_cost(150); + ins_encode %{ + __ subsd($dst$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubD_reg(regD dst, regD src1, regD src2) %{ + predicate(UseAVX > 0); + match(Set dst (SubD src1 src2)); + + format %{ "vsubsd $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubD_mem(regD dst, regD src1, memory src2) %{ + predicate(UseAVX > 0); + match(Set dst (SubD src1 (LoadD src2))); + + format %{ "vsubsd $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct vsubD_imm(regD dst, regD src, immD con) %{ + predicate(UseAVX > 0); + match(Set dst (SubD src con)); + + format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} + ins_cost(150); + ins_encode %{ + __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct mulF_reg(regF dst, regF src) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (MulF dst src)); + + format %{ "mulss $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ mulss($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct mulF_mem(regF dst, memory src) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (MulF dst (LoadF src))); + + format %{ "mulss $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ mulss($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct mulF_imm(regF dst, immF con) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (MulF dst con)); + format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} + ins_cost(150); + ins_encode %{ + __ mulss($dst$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulF_reg(regF dst, regF src1, regF src2) %{ + predicate(UseAVX > 0); + match(Set dst (MulF src1 src2)); + + format %{ "vmulss $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulF_mem(regF dst, regF src1, memory src2) %{ + predicate(UseAVX > 0); + match(Set dst (MulF src1 (LoadF src2))); + + format %{ "vmulss $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulF_imm(regF dst, regF src, immF con) %{ + predicate(UseAVX > 0); + match(Set dst (MulF src con)); + + format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} + ins_cost(150); + ins_encode %{ + __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct mulD_reg(regD dst, regD src) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (MulD dst src)); + + format %{ "mulsd $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ mulsd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct mulD_mem(regD dst, memory src) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (MulD dst (LoadD src))); + + format %{ "mulsd $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ mulsd($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct mulD_imm(regD dst, immD con) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (MulD dst con)); + format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} + ins_cost(150); + ins_encode %{ + __ mulsd($dst$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulD_reg(regD dst, regD src1, regD src2) %{ + predicate(UseAVX > 0); + match(Set dst (MulD src1 src2)); + + format %{ "vmulsd $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulD_mem(regD dst, regD src1, memory src2) %{ + predicate(UseAVX > 0); + match(Set dst (MulD src1 (LoadD src2))); + + format %{ "vmulsd $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct vmulD_imm(regD dst, regD src, immD con) %{ + predicate(UseAVX > 0); + match(Set dst (MulD src con)); + + format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} + ins_cost(150); + ins_encode %{ + __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct divF_reg(regF dst, regF src) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (DivF dst src)); + + format %{ "divss $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ divss($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct divF_mem(regF dst, memory src) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (DivF dst (LoadF src))); + + format %{ "divss $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ divss($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct divF_imm(regF dst, immF con) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (DivF dst con)); + format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} + ins_cost(150); + ins_encode %{ + __ divss($dst$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct vdivF_reg(regF dst, regF src1, regF src2) %{ + predicate(UseAVX > 0); + match(Set dst (DivF src1 src2)); + + format %{ "vdivss $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct vdivF_mem(regF dst, regF src1, memory src2) %{ + predicate(UseAVX > 0); + match(Set dst (DivF src1 (LoadF src2))); + + format %{ "vdivss $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct vdivF_imm(regF dst, regF src, immF con) %{ + predicate(UseAVX > 0); + match(Set dst (DivF src con)); + + format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} + ins_cost(150); + ins_encode %{ + __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct divD_reg(regD dst, regD src) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (DivD dst src)); + + format %{ "divsd $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ divsd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct divD_mem(regD dst, memory src) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (DivD dst (LoadD src))); + + format %{ "divsd $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ divsd($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct divD_imm(regD dst, immD con) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (DivD dst con)); + format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} + ins_cost(150); + ins_encode %{ + __ divsd($dst$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct vdivD_reg(regD dst, regD src1, regD src2) %{ + predicate(UseAVX > 0); + match(Set dst (DivD src1 src2)); + + format %{ "vdivsd $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct vdivD_mem(regD dst, regD src1, memory src2) %{ + predicate(UseAVX > 0); + match(Set dst (DivD src1 (LoadD src2))); + + format %{ "vdivsd $dst, $src1, $src2" %} + ins_cost(150); + ins_encode %{ + __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct vdivD_imm(regD dst, regD src, immD con) %{ + predicate(UseAVX > 0); + match(Set dst (DivD src con)); + + format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} + ins_cost(150); + ins_encode %{ + __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct absF_reg(regF dst) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (AbsF dst)); + ins_cost(150); + format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} + ins_encode %{ + __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); + %} + ins_pipe(pipe_slow); +%} + +instruct vabsF_reg(regF dst, regF src) %{ + predicate(UseAVX > 0); + match(Set dst (AbsF src)); + ins_cost(150); + format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} + ins_encode %{ + __ vandps($dst$$XMMRegister, $src$$XMMRegister, + ExternalAddress(float_signmask())); + %} + ins_pipe(pipe_slow); +%} + +instruct absD_reg(regD dst) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (AbsD dst)); + ins_cost(150); + format %{ "andpd $dst, [0x7fffffffffffffff]\t" + "# abs double by sign masking" %} + ins_encode %{ + __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); + %} + ins_pipe(pipe_slow); +%} + +instruct vabsD_reg(regD dst, regD src) %{ + predicate(UseAVX > 0); + match(Set dst (AbsD src)); + ins_cost(150); + format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" + "# abs double by sign masking" %} + ins_encode %{ + __ vandpd($dst$$XMMRegister, $src$$XMMRegister, + ExternalAddress(double_signmask())); + %} + ins_pipe(pipe_slow); +%} + +instruct negF_reg(regF dst) %{ + predicate((UseSSE>=1) && (UseAVX == 0)); + match(Set dst (NegF dst)); + ins_cost(150); + format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} + ins_encode %{ + __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); + %} + ins_pipe(pipe_slow); +%} + +instruct vnegF_reg(regF dst, regF src) %{ + predicate(UseAVX > 0); + match(Set dst (NegF src)); + ins_cost(150); + format %{ "vxorps $dst, $src, [0x80000000]\t# neg float by sign flipping" %} + ins_encode %{ + __ vxorps($dst$$XMMRegister, $src$$XMMRegister, + ExternalAddress(float_signflip())); + %} + ins_pipe(pipe_slow); +%} + +instruct negD_reg(regD dst) %{ + predicate((UseSSE>=2) && (UseAVX == 0)); + match(Set dst (NegD dst)); + ins_cost(150); + format %{ "xorpd $dst, [0x8000000000000000]\t" + "# neg double by sign flipping" %} + ins_encode %{ + __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); + %} + ins_pipe(pipe_slow); +%} + +instruct vnegD_reg(regD dst, regD src) %{ + predicate(UseAVX > 0); + match(Set dst (NegD src)); + ins_cost(150); + format %{ "vxorpd $dst, $src, [0x8000000000000000]\t" + "# neg double by sign flipping" %} + ins_encode %{ + __ vxorpd($dst$$XMMRegister, $src$$XMMRegister, + ExternalAddress(double_signflip())); + %} + ins_pipe(pipe_slow); +%} + +instruct sqrtF_reg(regF dst, regF src) %{ + predicate(UseSSE>=1); + match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); + + format %{ "sqrtss $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ sqrtss($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct sqrtF_mem(regF dst, memory src) %{ + predicate(UseSSE>=1); + match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src))))); + + format %{ "sqrtss $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ sqrtss($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct sqrtF_imm(regF dst, immF con) %{ + predicate(UseSSE>=1); + match(Set dst (ConvD2F (SqrtD (ConvF2D con)))); + format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %} + ins_cost(150); + ins_encode %{ + __ sqrtss($dst$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + +instruct sqrtD_reg(regD dst, regD src) %{ + predicate(UseSSE>=2); + match(Set dst (SqrtD src)); + + format %{ "sqrtsd $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ sqrtsd($dst$$XMMRegister, $src$$XMMRegister); + %} + ins_pipe(pipe_slow); +%} + +instruct sqrtD_mem(regD dst, memory src) %{ + predicate(UseSSE>=2); + match(Set dst (SqrtD (LoadD src))); + + format %{ "sqrtsd $dst, $src" %} + ins_cost(150); + ins_encode %{ + __ sqrtsd($dst$$XMMRegister, $src$$Address); + %} + ins_pipe(pipe_slow); +%} + +instruct sqrtD_imm(regD dst, immD con) %{ + predicate(UseSSE>=2); + match(Set dst (SqrtD con)); + format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} + ins_cost(150); + ins_encode %{ + __ sqrtsd($dst$$XMMRegister, $constantaddress($con)); + %} + ins_pipe(pipe_slow); +%} + diff --git a/hotspot/src/cpu/x86/vm/x86_32.ad b/hotspot/src/cpu/x86/vm/x86_32.ad index 84d6bbac73b..ca85f1596fd 100644 --- a/hotspot/src/cpu/x86/vm/x86_32.ad +++ b/hotspot/src/cpu/x86/vm/x86_32.ad @@ -281,7 +281,7 @@ static int pre_call_FPU_size() { } static int preserve_SP_size() { - return LP64_ONLY(1 +) 2; // [rex,] op, rm(reg/reg) + return 2; // op, rm(reg/reg) } // !!!!! Special hack to get all type of calls to specify the byte offset @@ -495,14 +495,34 @@ void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { } } -void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { - if( dst_encoding == src_encoding ) { - // reg-reg copy, use an empty encoding - } else { - MacroAssembler _masm(&cbuf); +void emit_cmpfp_fixup(MacroAssembler& _masm) { + Label exit; + __ jccb(Assembler::noParity, exit); + __ pushf(); + // + // comiss/ucomiss instructions set ZF,PF,CF flags and + // zero OF,AF,SF for NaN values. + // Fixup flags by zeroing ZF,PF so that compare of NaN + // values returns 'less than' result (CF is set). + // Leave the rest of flags unchanged. + // + // 7 6 5 4 3 2 1 0 + // |S|Z|r|A|r|P|r|C| (r - reserved bit) + // 0 0 1 0 1 0 1 1 (0x2B) + // + __ andl(Address(rsp, 0), 0xffffff2b); + __ popf(); + __ bind(exit); +} - __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding)); - } +void emit_cmpfp3(MacroAssembler& _masm, Register dst) { + Label done; + __ movl(dst, -1); + __ jcc(Assembler::parity, done); + __ jcc(Assembler::below, done); + __ setb(Assembler::notEqual, dst); + __ movzbl(dst, dst); + __ bind(done); } @@ -792,92 +812,88 @@ static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset // Helper for XMM registers. Extra opcode bits, limited syntax. static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { - if( cbuf ) { - if( reg_lo+1 == reg_hi ) { // double move? - if( is_load && !UseXmmLoadAndClearUpper ) - emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load - else - emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise + if (cbuf) { + MacroAssembler _masm(cbuf); + if (reg_lo+1 == reg_hi) { // double move? + if (is_load) { + __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); + } else { + __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); + } } else { - emit_opcode(*cbuf, 0xF3 ); + if (is_load) { + __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); + } else { + __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); + } } - emit_opcode(*cbuf, 0x0F ); - if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper ) - emit_opcode(*cbuf, 0x12 ); // use 'movlpd' for load - else - emit_opcode(*cbuf, is_load ? 0x10 : 0x11 ); - encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false); #ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - if( reg_lo+1 == reg_hi ) { // double move? - if( is_load ) st->print("%s %s,[ESP + #%d]", - UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", - Matcher::regName[reg_lo], offset); - else st->print("MOVSD [ESP + #%d],%s", - offset, Matcher::regName[reg_lo]); + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + if (reg_lo+1 == reg_hi) { // double move? + if (is_load) st->print("%s %s,[ESP + #%d]", + UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", + Matcher::regName[reg_lo], offset); + else st->print("MOVSD [ESP + #%d],%s", + offset, Matcher::regName[reg_lo]); } else { - if( is_load ) st->print("MOVSS %s,[ESP + #%d]", - Matcher::regName[reg_lo], offset); - else st->print("MOVSS [ESP + #%d],%s", - offset, Matcher::regName[reg_lo]); + if (is_load) st->print("MOVSS %s,[ESP + #%d]", + Matcher::regName[reg_lo], offset); + else st->print("MOVSS [ESP + #%d],%s", + offset, Matcher::regName[reg_lo]); } #endif } int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); + // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes. return size+5+offset_size; } static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, int src_hi, int dst_hi, int size, outputStream* st ) { - if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers - if( cbuf ) { - if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) { - emit_opcode(*cbuf, 0x66 ); - } - emit_opcode(*cbuf, 0x0F ); - emit_opcode(*cbuf, 0x28 ); - emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] ); + if (cbuf) { + MacroAssembler _masm(cbuf); + if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? + __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), + as_XMMRegister(Matcher::_regEncode[src_lo])); + } else { + __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), + as_XMMRegister(Matcher::_regEncode[src_lo])); + } #ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers + if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); } else { st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); } -#endif - } - return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3); - } else { - if( cbuf ) { - emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 ); - emit_opcode(*cbuf, 0x0F ); - emit_opcode(*cbuf, 0x10 ); - emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] ); -#ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); + } else { if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); } else { st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); } -#endif } - return size+4; +#endif } + // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes. + // Only MOVAPS SSE prefix uses 1 byte. + int sz = 4; + if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && + UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; + return size + sz; } static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, int src_hi, int dst_hi, int size, outputStream* st ) { // 32-bit if (cbuf) { - emit_opcode(*cbuf, 0x66); - emit_opcode(*cbuf, 0x0F); - emit_opcode(*cbuf, 0x6E); - emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7); + MacroAssembler _masm(cbuf); + __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), + as_Register(Matcher::_regEncode[src_lo])); #ifndef PRODUCT } else if (!do_size) { st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); @@ -891,10 +907,9 @@ static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int int src_hi, int dst_hi, int size, outputStream* st ) { // 32-bit if (cbuf) { - emit_opcode(*cbuf, 0x66); - emit_opcode(*cbuf, 0x0F); - emit_opcode(*cbuf, 0x7E); - emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7); + MacroAssembler _masm(cbuf); + __ movdl(as_Register(Matcher::_regEncode[dst_lo]), + as_XMMRegister(Matcher::_regEncode[src_lo])); #ifndef PRODUCT } else if (!do_size) { st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); @@ -1760,7 +1775,7 @@ encode %{ emit_cc(cbuf, $secondary, $cop$$cmpcode); %} - enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV + enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); emit_d8(cbuf, op >> 8 ); emit_d8(cbuf, op & 255); @@ -1931,11 +1946,6 @@ encode %{ %} - enc_class Xor_Reg (eRegI dst) %{ - emit_opcode(cbuf, 0x33); - emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); - %} - // Following encoding is no longer used, but may be restored if calling // convention changes significantly. // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) @@ -2013,64 +2023,6 @@ encode %{ %} - enc_class MovI2X_reg(regX dst, eRegI src) %{ - emit_opcode(cbuf, 0x66 ); // MOVD dst,src - emit_opcode(cbuf, 0x0F ); - emit_opcode(cbuf, 0x6E ); - emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); - %} - - enc_class MovX2I_reg(eRegI dst, regX src) %{ - emit_opcode(cbuf, 0x66 ); // MOVD dst,src - emit_opcode(cbuf, 0x0F ); - emit_opcode(cbuf, 0x7E ); - emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg); - %} - - enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{ - { // MOVD $dst,$src.lo - emit_opcode(cbuf,0x66); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,0x6E); - emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); - } - { // MOVD $tmp,$src.hi - emit_opcode(cbuf,0x66); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,0x6E); - emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); - } - { // PUNPCKLDQ $dst,$tmp - emit_opcode(cbuf,0x66); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,0x62); - emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg); - } - %} - - enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{ - { // MOVD $dst.lo,$src - emit_opcode(cbuf,0x66); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,0x7E); - emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg); - } - { // PSHUFLW $tmp,$src,0x4E (01001110b) - emit_opcode(cbuf,0xF2); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,0x70); - emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); - emit_d8(cbuf, 0x4E); - } - { // MOVD $dst.hi,$tmp - emit_opcode(cbuf,0x66); - emit_opcode(cbuf,0x0F); - emit_opcode(cbuf,0x7E); - emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg)); - } - %} - - // Encode a reg-reg copy. If it is useless, then empty encoding. enc_class enc_Copy( eRegI dst, eRegI src ) %{ encode_Copy( cbuf, $dst$$reg, $src$$reg ); @@ -2080,11 +2032,6 @@ encode %{ encode_Copy( cbuf, $dst$$reg, $src$$reg ); %} - // Encode xmm reg-reg copy. If it is useless, then empty encoding. - enc_class enc_CopyXD( RegXD dst, RegXD src ) %{ - encode_CopyXD( cbuf, $dst$$reg, $src$$reg ); - %} - enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many) emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} @@ -2116,14 +2063,14 @@ encode %{ $$$emit32$src$$constant; %} - enc_class Con32F_as_bits(immF src) %{ // storeF_imm + enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm // Output Float immediate bits jfloat jf = $src$$constant; int jf_as_bits = jint_cast( jf ); emit_d32(cbuf, jf_as_bits); %} - enc_class Con32XF_as_bits(immXF src) %{ // storeX_imm + enc_class Con32F_as_bits(immF src) %{ // storeX_imm // Output Float immediate bits jfloat jf = $src$$constant; int jf_as_bits = jint_cast( jf ); @@ -2336,7 +2283,7 @@ encode %{ emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); %} - enc_class enc_FP_store(memory mem, regD src) %{ + enc_class enc_FPR_store(memory mem, regDPR src) %{ // If src is FPR1, we can just FST to store it. // Else we need to FLD it to FPR1, then FSTP to store/pop it. int reg_encoding = 0x2; // Just store @@ -2485,7 +2432,7 @@ encode %{ // ----------------- Encodings for floating point unit ----------------- // May leave result in FPU-TOS or FPU reg depending on opcodes - enc_class OpcReg_F (regF src) %{ // FMUL, FDIV + enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV $$$emit8$primary; emit_rm(cbuf, 0x3, $secondary, $src$$reg ); %} @@ -2497,17 +2444,17 @@ encode %{ %} // !!!!! equivalent to Pop_Reg_F - enc_class Pop_Reg_D( regD dst ) %{ + enc_class Pop_Reg_DPR( regDPR dst ) %{ emit_opcode( cbuf, 0xDD ); // FSTP ST(i) emit_d8( cbuf, 0xD8+$dst$$reg ); %} - enc_class Push_Reg_D( regD dst ) %{ + enc_class Push_Reg_DPR( regDPR dst ) %{ emit_opcode( cbuf, 0xD9 ); emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) %} - enc_class strictfp_bias1( regD dst ) %{ + enc_class strictfp_bias1( regDPR dst ) %{ emit_opcode( cbuf, 0xDB ); // FLD m80real emit_opcode( cbuf, 0x2D ); emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); @@ -2515,7 +2462,7 @@ encode %{ emit_opcode( cbuf, 0xC8+$dst$$reg ); %} - enc_class strictfp_bias2( regD dst ) %{ + enc_class strictfp_bias2( regDPR dst ) %{ emit_opcode( cbuf, 0xDB ); // FLD m80real emit_opcode( cbuf, 0x2D ); emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); @@ -2541,39 +2488,29 @@ encode %{ store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); %} - // Push the float in stackSlot 'src' onto FP-stack - enc_class Push_Mem_F( memory src ) %{ // FLD_S [ESP+src] - store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp ); - %} - - // Push the double in stackSlot 'src' onto FP-stack - enc_class Push_Mem_D( memory src ) %{ // FLD_D [ESP+src] - store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp ); - %} - // Push FPU's TOS float to a stack-slot, and pop FPU-stack - enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst] + enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); %} // Same as Pop_Mem_F except for opcode // Push FPU's TOS double to a stack-slot, and pop FPU-stack - enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst] + enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); %} - enc_class Pop_Reg_F( regF dst ) %{ + enc_class Pop_Reg_FPR( regFPR dst ) %{ emit_opcode( cbuf, 0xDD ); // FSTP ST(i) emit_d8( cbuf, 0xD8+$dst$$reg ); %} - enc_class Push_Reg_F( regF dst ) %{ + enc_class Push_Reg_FPR( regFPR dst ) %{ emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) emit_d8( cbuf, 0xC0-1+$dst$$reg ); %} // Push FPU's float to a stack-slot, and pop FPU-stack - enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{ + enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ int pop = 0x02; if ($src$$reg != FPR1L_enc) { emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) @@ -2584,7 +2521,7 @@ encode %{ %} // Push FPU's double to a stack-slot, and pop FPU-stack - enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{ + enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ int pop = 0x02; if ($src$$reg != FPR1L_enc) { emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) @@ -2595,7 +2532,7 @@ encode %{ %} // Push FPU's double to a FPU-stack-slot, and pop FPU-stack - enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{ + enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ int pop = 0xD0 - 1; // -1 since we skip FLD if ($src$$reg != FPR1L_enc) { emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) @@ -2607,16 +2544,7 @@ encode %{ %} - enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{ - MacroAssembler masm(&cbuf); - masm.fld_s( $src1$$reg-1); // nothing at TOS, load TOS from src1.reg - masm.fmul( $src2$$reg+0); // value at TOS - masm.fadd( $src$$reg+0); // value at TOS - masm.fstp_d( $dst$$reg+0); // value at TOS, popped off after store - %} - - - enc_class Push_Reg_Mod_D( regD dst, regD src) %{ + enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ // load dst in FPR0 emit_opcode( cbuf, 0xD9 ); emit_d8( cbuf, 0xC0-1+$dst$$reg ); @@ -2634,116 +2562,59 @@ encode %{ } %} - enc_class Push_ModD_encoding( regXD src0, regXD src1) %{ - // Allocate a word - emit_opcode(cbuf,0x83); // SUB ESP,8 - emit_opcode(cbuf,0xEC); - emit_d8(cbuf,0x08); - - emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src1 - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xDD ); // FLD_D [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); - - emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src0 - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xDD ); // FLD_D [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); - + enc_class Push_ModD_encoding(regD src0, regD src1) %{ + MacroAssembler _masm(&cbuf); + __ subptr(rsp, 8); + __ movdbl(Address(rsp, 0), $src1$$XMMRegister); + __ fld_d(Address(rsp, 0)); + __ movdbl(Address(rsp, 0), $src0$$XMMRegister); + __ fld_d(Address(rsp, 0)); %} - enc_class Push_ModX_encoding( regX src0, regX src1) %{ - // Allocate a word - emit_opcode(cbuf,0x83); // SUB ESP,4 - emit_opcode(cbuf,0xEC); - emit_d8(cbuf,0x04); - - emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src1 - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xD9 ); // FLD [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); - - emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src0 - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xD9 ); // FLD [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); - + enc_class Push_ModF_encoding(regF src0, regF src1) %{ + MacroAssembler _masm(&cbuf); + __ subptr(rsp, 4); + __ movflt(Address(rsp, 0), $src1$$XMMRegister); + __ fld_s(Address(rsp, 0)); + __ movflt(Address(rsp, 0), $src0$$XMMRegister); + __ fld_s(Address(rsp, 0)); %} - enc_class Push_ResultXD(regXD dst) %{ - store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP] - - // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp] - emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66); - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12); - encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0x83); // ADD ESP,8 - emit_opcode(cbuf,0xC4); - emit_d8(cbuf,0x08); + enc_class Push_ResultD(regD dst) %{ + MacroAssembler _masm(&cbuf); + __ fstp_d(Address(rsp, 0)); + __ movdbl($dst$$XMMRegister, Address(rsp, 0)); + __ addptr(rsp, 8); %} - enc_class Push_ResultX(regX dst, immI d8) %{ - store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP] - - emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP] - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x10 ); - encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0x83); // ADD ESP,d8 (4 or 8) - emit_opcode(cbuf,0xC4); - emit_d8(cbuf,$d8$$constant); + enc_class Push_ResultF(regF dst, immI d8) %{ + MacroAssembler _masm(&cbuf); + __ fstp_s(Address(rsp, 0)); + __ movflt($dst$$XMMRegister, Address(rsp, 0)); + __ addptr(rsp, $d8$$constant); %} - enc_class Push_SrcXD(regXD src) %{ - // Allocate a word - emit_opcode(cbuf,0x83); // SUB ESP,8 - emit_opcode(cbuf,0xEC); - emit_d8(cbuf,0x08); - - emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xDD ); // FLD_D [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); + enc_class Push_SrcD(regD src) %{ + MacroAssembler _masm(&cbuf); + __ subptr(rsp, 8); + __ movdbl(Address(rsp, 0), $src$$XMMRegister); + __ fld_d(Address(rsp, 0)); %} enc_class push_stack_temp_qword() %{ - emit_opcode(cbuf,0x83); // SUB ESP,8 - emit_opcode(cbuf,0xEC); - emit_d8 (cbuf,0x08); + MacroAssembler _masm(&cbuf); + __ subptr(rsp, 8); %} enc_class pop_stack_temp_qword() %{ - emit_opcode(cbuf,0x83); // ADD ESP,8 - emit_opcode(cbuf,0xC4); - emit_d8 (cbuf,0x08); + MacroAssembler _masm(&cbuf); + __ addptr(rsp, 8); %} - enc_class push_xmm_to_fpr1( regXD xmm_src ) %{ - emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], xmm_src - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xDD ); // FLD_D [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); + enc_class push_xmm_to_fpr1(regD src) %{ + MacroAssembler _masm(&cbuf); + __ movdbl(Address(rsp, 0), $src$$XMMRegister); + __ fld_d(Address(rsp, 0)); %} // Compute X^Y using Intel's fast hardware instructions, if possible. @@ -2785,10 +2656,7 @@ encode %{ encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false); %} -// enc_class Pop_Reg_Mod_D( regD dst, regD src) -// was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X() - - enc_class Push_Result_Mod_D( regD src) %{ + enc_class Push_Result_Mod_DPR( regDPR src) %{ if ($src$$reg != FPR1L_enc) { // fincstp emit_opcode (cbuf, 0xD9); @@ -2817,7 +2685,7 @@ encode %{ emit_opcode( cbuf, 0x05 ); %} - enc_class emitModD() %{ + enc_class emitModDPR() %{ // fprem must be iterative // :: loop // fprem @@ -2922,24 +2790,6 @@ encode %{ %} - // XMM version of CmpF_Result. Because the XMM compare - // instructions set the EFLAGS directly. It becomes simpler than - // the float version above. - enc_class CmpX_Result(eRegI dst) %{ - MacroAssembler _masm(&cbuf); - Label nan, inc, done; - - __ jccb(Assembler::parity, nan); - __ jccb(Assembler::equal, done); - __ jccb(Assembler::above, inc); - __ bind(nan); - __ decrement(as_Register($dst$$reg)); // NO L qqq - __ jmpb(done); - __ bind(inc); - __ increment(as_Register($dst$$reg)); // NO L qqq - __ bind(done); - %} - // Compare the longs and set flags // BROKEN! Do Not use as-is enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ @@ -3162,48 +3012,6 @@ encode %{ emit_d8 (cbuf,0 ); %} - enc_class movq_ld(regXD dst, memory mem) %{ - MacroAssembler _masm(&cbuf); - __ movq($dst$$XMMRegister, $mem$$Address); - %} - - enc_class movq_st(memory mem, regXD src) %{ - MacroAssembler _masm(&cbuf); - __ movq($mem$$Address, $src$$XMMRegister); - %} - - enc_class pshufd_8x8(regX dst, regX src) %{ - MacroAssembler _masm(&cbuf); - - encode_CopyXD(cbuf, $dst$$reg, $src$$reg); - __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg)); - __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00); - %} - - enc_class pshufd_4x16(regX dst, regX src) %{ - MacroAssembler _masm(&cbuf); - - __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00); - %} - - enc_class pshufd(regXD dst, regXD src, int mode) %{ - MacroAssembler _masm(&cbuf); - - __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode); - %} - - enc_class pxor(regXD dst, regXD src) %{ - MacroAssembler _masm(&cbuf); - - __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg)); - %} - - enc_class mov_i2x(regXD dst, eRegI src) %{ - MacroAssembler _masm(&cbuf); - - __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg)); - %} - // Because the transitions from emitted code to the runtime // monitorenter/exit helper stubs are so slow it's critical that @@ -3757,7 +3565,7 @@ encode %{ // 'zero', store the darned double down as an int, and reset the // rounding mode to 'nearest'. The hardware throws an exception which // patches up the correct value directly to the stack. - enc_class D2I_encoding( regD src ) %{ + enc_class DPR2I_encoding( regDPR src ) %{ // Flip to round-to-zero mode. We attempted to allow invalid-op // exceptions here, so that a NAN or other corner-case value will // thrown an exception (but normal values get converted at full speed). @@ -3800,7 +3608,7 @@ encode %{ // Carry on here... %} - enc_class D2L_encoding( regD src ) %{ + enc_class DPR2L_encoding( regDPR src ) %{ emit_opcode(cbuf,0xD9); // FLDCW trunc emit_opcode(cbuf,0x2D); emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); @@ -3842,294 +3650,27 @@ encode %{ // Carry on here... %} - enc_class X2L_encoding( regX src ) %{ - // Allocate a word - emit_opcode(cbuf,0x83); // SUB ESP,8 - emit_opcode(cbuf,0xEC); - emit_d8(cbuf,0x08); - - emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xD9 ); // FLD_S [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xD9); // FLDCW trunc - emit_opcode(cbuf,0x2D); - emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); - - // Encoding assumes a double has been pushed into FPR0. - // Store down the double as a long, popping the FPU stack - emit_opcode(cbuf,0xDF); // FISTP [ESP] - emit_opcode(cbuf,0x3C); - emit_d8(cbuf,0x24); - - // Restore the rounding mode; mask the exception - emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode - emit_opcode(cbuf,0x2D); - emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() - ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() - : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); - - // Load the converted int; adjust CPU stack - emit_opcode(cbuf,0x58); // POP EAX - - emit_opcode(cbuf,0x5A); // POP EDX - - emit_opcode(cbuf,0x81); // CMP EDX,imm - emit_d8 (cbuf,0xFA); // rdx - emit_d32 (cbuf,0x80000000);// 0x80000000 - - emit_opcode(cbuf,0x75); // JNE around_slow_call - emit_d8 (cbuf,0x13+4); // Size of slow_call - - emit_opcode(cbuf,0x85); // TEST EAX,EAX - emit_opcode(cbuf,0xC0); // 2/rax,/rax, - - emit_opcode(cbuf,0x75); // JNE around_slow_call - emit_d8 (cbuf,0x13); // Size of slow_call - - // Allocate a word - emit_opcode(cbuf,0x83); // SUB ESP,4 - emit_opcode(cbuf,0xEC); - emit_d8(cbuf,0x04); - - emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xD9 ); // FLD_S [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0x83); // ADD ESP,4 - emit_opcode(cbuf,0xC4); - emit_d8(cbuf,0x04); - - // CALL directly to the runtime - cbuf.set_insts_mark(); - emit_opcode(cbuf,0xE8); // Call into runtime - emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); - // Carry on here... - %} - - enc_class XD2L_encoding( regXD src ) %{ - // Allocate a word - emit_opcode(cbuf,0x83); // SUB ESP,8 - emit_opcode(cbuf,0xEC); - emit_d8(cbuf,0x08); - - emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xDD ); // FLD_D [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xD9); // FLDCW trunc - emit_opcode(cbuf,0x2D); - emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); - - // Encoding assumes a double has been pushed into FPR0. - // Store down the double as a long, popping the FPU stack - emit_opcode(cbuf,0xDF); // FISTP [ESP] - emit_opcode(cbuf,0x3C); - emit_d8(cbuf,0x24); - - // Restore the rounding mode; mask the exception - emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode - emit_opcode(cbuf,0x2D); - emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() - ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() - : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); - - // Load the converted int; adjust CPU stack - emit_opcode(cbuf,0x58); // POP EAX - - emit_opcode(cbuf,0x5A); // POP EDX - - emit_opcode(cbuf,0x81); // CMP EDX,imm - emit_d8 (cbuf,0xFA); // rdx - emit_d32 (cbuf,0x80000000); // 0x80000000 - - emit_opcode(cbuf,0x75); // JNE around_slow_call - emit_d8 (cbuf,0x13+4); // Size of slow_call - - emit_opcode(cbuf,0x85); // TEST EAX,EAX - emit_opcode(cbuf,0xC0); // 2/rax,/rax, - - emit_opcode(cbuf,0x75); // JNE around_slow_call - emit_d8 (cbuf,0x13); // Size of slow_call - - // Push src onto stack slow-path - // Allocate a word - emit_opcode(cbuf,0x83); // SUB ESP,8 - emit_opcode(cbuf,0xEC); - emit_d8(cbuf,0x08); - - emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src - emit_opcode (cbuf, 0x0F ); - emit_opcode (cbuf, 0x11 ); - encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0xDD ); // FLD_D [ESP] - encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false); - - emit_opcode(cbuf,0x83); // ADD ESP,8 - emit_opcode(cbuf,0xC4); - emit_d8(cbuf,0x08); - - // CALL directly to the runtime - cbuf.set_insts_mark(); - emit_opcode(cbuf,0xE8); // Call into runtime - emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); - // Carry on here... - %} - - enc_class D2X_encoding( regX dst, regD src ) %{ - // Allocate a word - emit_opcode(cbuf,0x83); // SUB ESP,4 - emit_opcode(cbuf,0xEC); - emit_d8(cbuf,0x04); - int pop = 0x02; - if ($src$$reg != FPR1L_enc) { - emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) - emit_d8( cbuf, 0xC0-1+$src$$reg ); - pop = 0x03; - } - store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST
_S [ESP]
-
- emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP]
- emit_opcode (cbuf, 0x0F );
- emit_opcode (cbuf, 0x10 );
- encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0x83); // ADD ESP,4
- emit_opcode(cbuf,0xC4);
- emit_d8(cbuf,0x04);
- // Carry on here...
- %}
-
- enc_class FX2I_encoding( regX src, eRegI dst ) %{
- emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
-
- // Compare the result to see if we need to go to the slow path
- emit_opcode(cbuf,0x81); // CMP dst,imm
- emit_rm (cbuf,0x3,0x7,$dst$$reg);
- emit_d32 (cbuf,0x80000000); // 0x80000000
-
- emit_opcode(cbuf,0x75); // JNE around_slow_call
- emit_d8 (cbuf,0x13); // Size of slow_call
- // Store xmm to a temp memory
- // location and push it onto stack.
-
- emit_opcode(cbuf,0x83); // SUB ESP,4
- emit_opcode(cbuf,0xEC);
- emit_d8(cbuf, $primary ? 0x8 : 0x4);
-
- emit_opcode (cbuf, $primary ? 0xF2 : 0xF3 ); // MOVSS [ESP], xmm
- emit_opcode (cbuf, 0x0F );
- emit_opcode (cbuf, 0x11 );
- encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf, $primary ? 0xDD : 0xD9 ); // FLD [ESP]
- encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0x83); // ADD ESP,4
- emit_opcode(cbuf,0xC4);
- emit_d8(cbuf, $primary ? 0x8 : 0x4);
-
- // CALL directly to the runtime
- cbuf.set_insts_mark();
- emit_opcode(cbuf,0xE8); // Call into runtime
- emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
-
- // Carry on here...
- %}
-
- enc_class X2D_encoding( regD dst, regX src ) %{
- // Allocate a word
- emit_opcode(cbuf,0x83); // SUB ESP,4
- emit_opcode(cbuf,0xEC);
- emit_d8(cbuf,0x04);
-
- emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], xmm
- emit_opcode (cbuf, 0x0F );
- emit_opcode (cbuf, 0x11 );
- encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0xD9 ); // FLD_S [ESP]
- encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
-
- emit_opcode(cbuf,0x83); // ADD ESP,4
- emit_opcode(cbuf,0xC4);
- emit_d8(cbuf,0x04);
-
- // Carry on here...
- %}
-
- enc_class AbsXF_encoding(regX dst) %{
- address signmask_address=(address)float_signmask_pool;
- // andpd:\tANDPS $dst,[signconst]
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x54);
- emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
- emit_d32(cbuf, (int)signmask_address);
- %}
-
- enc_class AbsXD_encoding(regXD dst) %{
- address signmask_address=(address)double_signmask_pool;
- // andpd:\tANDPD $dst,[signconst]
- emit_opcode(cbuf, 0x66);
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x54);
- emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
- emit_d32(cbuf, (int)signmask_address);
- %}
-
- enc_class NegXF_encoding(regX dst) %{
- address signmask_address=(address)float_signflip_pool;
- // andpd:\tXORPS $dst,[signconst]
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x57);
- emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
- emit_d32(cbuf, (int)signmask_address);
- %}
-
- enc_class NegXD_encoding(regXD dst) %{
- address signmask_address=(address)double_signflip_pool;
- // andpd:\tXORPD $dst,[signconst]
- emit_opcode(cbuf, 0x66);
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x57);
- emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
- emit_d32(cbuf, (int)signmask_address);
- %}
-
- enc_class FMul_ST_reg( eRegF src1 ) %{
+ enc_class FMul_ST_reg( eRegFPR src1 ) %{
// Operand was loaded from memory into fp ST (stack top)
// FMUL ST,$src /* D8 C8+i */
emit_opcode(cbuf, 0xD8);
emit_opcode(cbuf, 0xC8 + $src1$$reg);
%}
- enc_class FAdd_ST_reg( eRegF src2 ) %{
+ enc_class FAdd_ST_reg( eRegFPR src2 ) %{
// FADDP ST,src2 /* D8 C0+i */
emit_opcode(cbuf, 0xD8);
emit_opcode(cbuf, 0xC0 + $src2$$reg);
//could use FADDP src2,fpST /* DE C0+i */
%}
- enc_class FAddP_reg_ST( eRegF src2 ) %{
+ enc_class FAddP_reg_ST( eRegFPR src2 ) %{
// FADDP src2,ST /* DE C0+i */
emit_opcode(cbuf, 0xDE);
emit_opcode(cbuf, 0xC0 + $src2$$reg);
%}
- enc_class subF_divF_encode( eRegF src1, eRegF src2) %{
+ enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
// Operand has been loaded into fp ST (stack top)
// FSUB ST,$src1
emit_opcode(cbuf, 0xD8);
@@ -4140,7 +3681,7 @@ encode %{
emit_opcode(cbuf, 0xF0 + $src2$$reg);
%}
- enc_class MulFAddF (eRegF src1, eRegF src2) %{
+ enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
// Operand was loaded from memory into fp ST (stack top)
// FADD ST,$src /* D8 C0+i */
emit_opcode(cbuf, 0xD8);
@@ -4152,7 +3693,7 @@ encode %{
%}
- enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{
+ enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
// Operand was loaded from memory into fp ST (stack top)
// FADD ST,$src /* D8 C0+i */
emit_opcode(cbuf, 0xD8);
@@ -4176,66 +3717,6 @@ encode %{
store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
%}
- enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{
- { // Atomic long load
- // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
- emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
- int base = $mem$$base;
- int index = $mem$$index;
- int scale = $mem$$scale;
- int displace = $mem$$disp;
- bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
- encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
- }
- { // MOVSD $dst,$tmp ! atomic long store
- emit_opcode(cbuf,0xF2);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x11);
- int base = $dst$$base;
- int index = $dst$$index;
- int scale = $dst$$scale;
- int displace = $dst$$disp;
- bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals
- encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
- }
- %}
-
- enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{
- { // Atomic long load
- // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
- emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
- int base = $mem$$base;
- int index = $mem$$index;
- int scale = $mem$$scale;
- int displace = $mem$$disp;
- bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
- encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
- }
- { // MOVD $dst.lo,$tmp
- emit_opcode(cbuf,0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x7E);
- emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg);
- }
- { // PSRLQ $tmp,32
- emit_opcode(cbuf,0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x73);
- emit_rm(cbuf, 0x3, 0x02, $tmp$$reg);
- emit_d8(cbuf, 0x20);
- }
- { // MOVD $dst.hi,$tmp
- emit_opcode(cbuf,0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x7E);
- emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
- }
- %}
-
// Volatile Store Long. Must be atomic, so move it into
// the FP TOS and then do a 64-bit FIST. Has to probe the
// target address before the store (for null-ptr checks)
@@ -4253,66 +3734,6 @@ encode %{
encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
%}
- enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{
- { // Atomic long load
- // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src]
- emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
- int base = $src$$base;
- int index = $src$$index;
- int scale = $src$$scale;
- int displace = $src$$disp;
- bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals
- encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
- }
- cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop
- { // MOVSD $mem,$tmp ! atomic long store
- emit_opcode(cbuf,0xF2);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x11);
- int base = $mem$$base;
- int index = $mem$$index;
- int scale = $mem$$scale;
- int displace = $mem$$disp;
- bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
- encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
- }
- %}
-
- enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{
- { // MOVD $tmp,$src.lo
- emit_opcode(cbuf,0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x6E);
- emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
- }
- { // MOVD $tmp2,$src.hi
- emit_opcode(cbuf,0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x6E);
- emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg));
- }
- { // PUNPCKLDQ $tmp,$tmp2
- emit_opcode(cbuf,0x66);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x62);
- emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg);
- }
- cbuf.set_insts_mark(); // Mark start of MOVSD in case $mem has an oop
- { // MOVSD $mem,$tmp ! atomic long store
- emit_opcode(cbuf,0xF2);
- emit_opcode(cbuf,0x0F);
- emit_opcode(cbuf,0x11);
- int base = $mem$$base;
- int index = $mem$$index;
- int scale = $mem$$scale;
- int displace = $mem$$disp;
- bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
- encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
- }
- %}
-
// Safepoint Poll. This polls the safepoint page, and causes an
// exception if it is not readable. Unfortunately, it kills the condition code
// in the process
@@ -4705,7 +4126,7 @@ operand immL32() %{
%}
//Double Immediate zero
-operand immD0() %{
+operand immDPR0() %{
// Do additional (and counter-intuitive) test against NaN to work around VC++
// bug that generates code such that NaNs compare equal to 0.0
predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
@@ -4717,7 +4138,7 @@ operand immD0() %{
%}
// Double Immediate one
-operand immD1() %{
+operand immDPR1() %{
predicate( UseSSE<=1 && n->getd() == 1.0 );
match(ConD);
@@ -4727,7 +4148,7 @@ operand immD1() %{
%}
// Double Immediate
-operand immD() %{
+operand immDPR() %{
predicate(UseSSE<=1);
match(ConD);
@@ -4736,7 +4157,7 @@ operand immD() %{
interface(CONST_INTER);
%}
-operand immXD() %{
+operand immD() %{
predicate(UseSSE>=2);
match(ConD);
@@ -4746,7 +4167,7 @@ operand immXD() %{
%}
// Double Immediate zero
-operand immXD0() %{
+operand immD0() %{
// Do additional (and counter-intuitive) test against NaN to work around VC++
// bug that generates code such that NaNs compare equal to 0.0 AND do not
// compare equal to -0.0.
@@ -4758,7 +4179,7 @@ operand immXD0() %{
%}
// Float Immediate zero
-operand immF0() %{
+operand immFPR0() %{
predicate(UseSSE == 0 && n->getf() == 0.0F);
match(ConF);
@@ -4768,7 +4189,7 @@ operand immF0() %{
%}
// Float Immediate one
-operand immF1() %{
+operand immFPR1() %{
predicate(UseSSE == 0 && n->getf() == 1.0F);
match(ConF);
@@ -4778,7 +4199,7 @@ operand immF1() %{
%}
// Float Immediate
-operand immF() %{
+operand immFPR() %{
predicate( UseSSE == 0 );
match(ConF);
@@ -4788,7 +4209,7 @@ operand immF() %{
%}
// Float Immediate
-operand immXF() %{
+operand immF() %{
predicate(UseSSE >= 1);
match(ConF);
@@ -4798,7 +4219,7 @@ operand immXF() %{
%}
// Float Immediate zero. Zero and not -0.0
-operand immXF0() %{
+operand immF0() %{
predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
match(ConF);
@@ -5174,7 +4595,7 @@ operand flagsReg_long_LEGT() %{
%}
// Float register operands
-operand regD() %{
+operand regDPR() %{
predicate( UseSSE < 2 );
constraint(ALLOC_IN_RC(dbl_reg));
match(RegD);
@@ -5184,7 +4605,7 @@ operand regD() %{
interface(REG_INTER);
%}
-operand regDPR1(regD reg) %{
+operand regDPR1(regDPR reg) %{
predicate( UseSSE < 2 );
constraint(ALLOC_IN_RC(dbl_reg0));
match(reg);
@@ -5192,7 +4613,7 @@ operand regDPR1(regD reg) %{
interface(REG_INTER);
%}
-operand regDPR2(regD reg) %{
+operand regDPR2(regDPR reg) %{
predicate( UseSSE < 2 );
constraint(ALLOC_IN_RC(dbl_reg1));
match(reg);
@@ -5200,7 +4621,7 @@ operand regDPR2(regD reg) %{
interface(REG_INTER);
%}
-operand regnotDPR1(regD reg) %{
+operand regnotDPR1(regDPR reg) %{
predicate( UseSSE < 2 );
constraint(ALLOC_IN_RC(dbl_notreg0));
match(reg);
@@ -5209,18 +4630,18 @@ operand regnotDPR1(regD reg) %{
%}
// XMM Double register operands
-operand regXD() %{
+operand regD() %{
predicate( UseSSE>=2 );
constraint(ALLOC_IN_RC(xdb_reg));
match(RegD);
- match(regXD6);
- match(regXD7);
+ match(regD6);
+ match(regD7);
format %{ %}
interface(REG_INTER);
%}
// XMM6 double register operands
-operand regXD6(regXD reg) %{
+operand regD6(regD reg) %{
predicate( UseSSE>=2 );
constraint(ALLOC_IN_RC(xdb_reg6));
match(reg);
@@ -5229,7 +4650,7 @@ operand regXD6(regXD reg) %{
%}
// XMM7 double register operands
-operand regXD7(regXD reg) %{
+operand regD7(regD reg) %{
predicate( UseSSE>=2 );
constraint(ALLOC_IN_RC(xdb_reg7));
match(reg);
@@ -5238,7 +4659,7 @@ operand regXD7(regXD reg) %{
%}
// Float register operands
-operand regF() %{
+operand regFPR() %{
predicate( UseSSE < 2 );
constraint(ALLOC_IN_RC(flt_reg));
match(RegF);
@@ -5248,7 +4669,7 @@ operand regF() %{
%}
// Float register operands
-operand regFPR1(regF reg) %{
+operand regFPR1(regFPR reg) %{
predicate( UseSSE < 2 );
constraint(ALLOC_IN_RC(flt_reg0));
match(reg);
@@ -5257,7 +4678,7 @@ operand regFPR1(regF reg) %{
%}
// XMM register operands
-operand regX() %{
+operand regF() %{
predicate( UseSSE>=1 );
constraint(ALLOC_IN_RC(xmm_reg));
match(RegF);
@@ -6001,7 +5422,7 @@ pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
%}
// Conditional move double reg-reg
-pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{
+pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
single_instruction;
dst : S4(write);
src : S3(read);
@@ -6010,7 +5431,7 @@ pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{
%}
// Float reg-reg operation
-pipe_class fpu_reg(regD dst) %{
+pipe_class fpu_reg(regDPR dst) %{
instruction_count(2);
dst : S3(read);
DECODE : S0(2); // any 2 decoders
@@ -6018,7 +5439,7 @@ pipe_class fpu_reg(regD dst) %{
%}
// Float reg-reg operation
-pipe_class fpu_reg_reg(regD dst, regD src) %{
+pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
instruction_count(2);
dst : S4(write);
src : S3(read);
@@ -6027,7 +5448,7 @@ pipe_class fpu_reg_reg(regD dst, regD src) %{
%}
// Float reg-reg operation
-pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{
+pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
instruction_count(3);
dst : S4(write);
src1 : S3(read);
@@ -6037,7 +5458,7 @@ pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{
%}
// Float reg-reg operation
-pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
+pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
instruction_count(4);
dst : S4(write);
src1 : S3(read);
@@ -6048,7 +5469,7 @@ pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
%}
// Float reg-reg operation
-pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{
+pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
instruction_count(4);
dst : S4(write);
src1 : S3(read);
@@ -6061,7 +5482,7 @@ pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{
%}
// Float reg-mem operation
-pipe_class fpu_reg_mem(regD dst, memory mem) %{
+pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
instruction_count(2);
dst : S5(write);
mem : S3(read);
@@ -6072,7 +5493,7 @@ pipe_class fpu_reg_mem(regD dst, memory mem) %{
%}
// Float reg-mem operation
-pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{
+pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
instruction_count(3);
dst : S5(write);
src1 : S3(read);
@@ -6084,7 +5505,7 @@ pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{
%}
// Float mem-reg operation
-pipe_class fpu_mem_reg(memory mem, regD src) %{
+pipe_class fpu_mem_reg(memory mem, regDPR src) %{
instruction_count(2);
src : S5(read);
mem : S3(read);
@@ -6094,7 +5515,7 @@ pipe_class fpu_mem_reg(memory mem, regD src) %{
MEM : S3; // any mem
%}
-pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{
+pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
instruction_count(3);
src1 : S3(read);
src2 : S3(read);
@@ -6105,7 +5526,7 @@ pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{
MEM : S3; // any mem
%}
-pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{
+pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
instruction_count(3);
src1 : S3(read);
src2 : S3(read);
@@ -6134,7 +5555,7 @@ pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
MEM : S3(3); // any mem
%}
-pipe_class fpu_mem_reg_con(memory mem, regD src1) %{
+pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
instruction_count(3);
src1 : S4(read);
mem : S4(read);
@@ -6145,7 +5566,7 @@ pipe_class fpu_mem_reg_con(memory mem, regD src1) %{
%}
// Float load constant
-pipe_class fpu_reg_con(regD dst) %{
+pipe_class fpu_reg_con(regDPR dst) %{
instruction_count(2);
dst : S5(write);
D0 : S0; // big decoder only for the load
@@ -6155,7 +5576,7 @@ pipe_class fpu_reg_con(regD dst) %{
%}
// Float load constant
-pipe_class fpu_reg_reg_con(regD dst, regD src) %{
+pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
instruction_count(3);
dst : S5(write);
src : S3(read);
@@ -6870,18 +6291,21 @@ instruct loadL_volatile(stackSlotL dst, memory mem) %{
ins_pipe( fpu_reg_mem );
%}
-instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{
+instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
match(Set dst (LoadL mem));
effect(TEMP tmp);
ins_cost(180);
format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
"MOVSD $dst,$tmp" %}
- ins_encode(enc_loadLX_volatile(mem, dst, tmp));
+ ins_encode %{
+ __ movdbl($tmp$$XMMRegister, $mem$$Address);
+ __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
-instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
+instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
match(Set dst (LoadL mem));
effect(TEMP tmp);
@@ -6890,7 +6314,12 @@ instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
"MOVD $dst.lo,$tmp\n\t"
"PSRLQ $tmp,32\n\t"
"MOVD $dst.hi,$tmp" %}
- ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
+ ins_encode %{
+ __ movdbl($tmp$$XMMRegister, $mem$$Address);
+ __ movdl($dst$$Register, $tmp$$XMMRegister);
+ __ psrlq($tmp$$XMMRegister, 32);
+ __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -6929,7 +6358,7 @@ instruct loadKlass(eRegP dst, memory mem) %{
%}
// Load Double
-instruct loadD(regD dst, memory mem) %{
+instruct loadDPR(regDPR dst, memory mem) %{
predicate(UseSSE<=1);
match(Set dst (LoadD mem));
@@ -6938,42 +6367,48 @@ instruct loadD(regD dst, memory mem) %{
"FSTP $dst" %}
opcode(0xDD); /* DD /0 */
ins_encode( OpcP, RMopc_Mem(0x00,mem),
- Pop_Reg_D(dst) );
+ Pop_Reg_DPR(dst) );
ins_pipe( fpu_reg_mem );
%}
// Load Double to XMM
-instruct loadXD(regXD dst, memory mem) %{
+instruct loadD(regD dst, memory mem) %{
predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
match(Set dst (LoadD mem));
ins_cost(145);
format %{ "MOVSD $dst,$mem" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
+ ins_encode %{
+ __ movdbl ($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
-instruct loadXD_partial(regXD dst, memory mem) %{
+instruct loadD_partial(regD dst, memory mem) %{
predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
match(Set dst (LoadD mem));
ins_cost(145);
format %{ "MOVLPD $dst,$mem" %}
- ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem));
+ ins_encode %{
+ __ movdbl ($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
// Load to XMM register (single-precision floating point)
// MOVSS instruction
-instruct loadX(regX dst, memory mem) %{
+instruct loadF(regF dst, memory mem) %{
predicate(UseSSE>=1);
match(Set dst (LoadF mem));
ins_cost(145);
format %{ "MOVSS $dst,$mem" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
+ ins_encode %{
+ __ movflt ($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
// Load Float
-instruct loadF(regF dst, memory mem) %{
+instruct loadFPR(regFPR dst, memory mem) %{
predicate(UseSSE==0);
match(Set dst (LoadF mem));
@@ -6982,57 +6417,67 @@ instruct loadF(regF dst, memory mem) %{
"FSTP $dst" %}
opcode(0xD9); /* D9 /0 */
ins_encode( OpcP, RMopc_Mem(0x00,mem),
- Pop_Reg_F(dst) );
+ Pop_Reg_FPR(dst) );
ins_pipe( fpu_reg_mem );
%}
// Load Aligned Packed Byte to XMM register
-instruct loadA8B(regXD dst, memory mem) %{
+instruct loadA8B(regD dst, memory mem) %{
predicate(UseSSE>=1);
match(Set dst (Load8B mem));
ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed8B" %}
- ins_encode( movq_ld(dst, mem));
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
// Load Aligned Packed Short to XMM register
-instruct loadA4S(regXD dst, memory mem) %{
+instruct loadA4S(regD dst, memory mem) %{
predicate(UseSSE>=1);
match(Set dst (Load4S mem));
ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed4S" %}
- ins_encode( movq_ld(dst, mem));
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
// Load Aligned Packed Char to XMM register
-instruct loadA4C(regXD dst, memory mem) %{
+instruct loadA4C(regD dst, memory mem) %{
predicate(UseSSE>=1);
match(Set dst (Load4C mem));
ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed4C" %}
- ins_encode( movq_ld(dst, mem));
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
// Load Aligned Packed Integer to XMM register
-instruct load2IU(regXD dst, memory mem) %{
+instruct load2IU(regD dst, memory mem) %{
predicate(UseSSE>=1);
match(Set dst (Load2I mem));
ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed2I" %}
- ins_encode( movq_ld(dst, mem));
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
// Load Aligned Packed Single to XMM
-instruct loadA2F(regXD dst, memory mem) %{
+instruct loadA2F(regD dst, memory mem) %{
predicate(UseSSE>=1);
match(Set dst (Load2F mem));
ins_cost(145);
format %{ "MOVQ $dst,$mem\t! packed2F" %}
- ins_encode( movq_ld(dst, mem));
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
@@ -7139,8 +6584,8 @@ instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
ins_pipe( ialu_reg_long );
%}
-// The instruction usage is guarded by predicate in operand immF().
-instruct loadConF(regF dst, immF con) %{
+// The instruction usage is guarded by predicate in operand immFPR().
+instruct loadConFPR(regFPR dst, immFPR con) %{
match(Set dst con);
ins_cost(125);
format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
@@ -7152,8 +6597,8 @@ instruct loadConF(regF dst, immF con) %{
ins_pipe(fpu_reg_con);
%}
-// The instruction usage is guarded by predicate in operand immF0().
-instruct loadConF0(regF dst, immF0 con) %{
+// The instruction usage is guarded by predicate in operand immFPR0().
+instruct loadConFPR0(regFPR dst, immFPR0 con) %{
match(Set dst con);
ins_cost(125);
format %{ "FLDZ ST\n\t"
@@ -7165,8 +6610,8 @@ instruct loadConF0(regF dst, immF0 con) %{
ins_pipe(fpu_reg_con);
%}
-// The instruction usage is guarded by predicate in operand immF1().
-instruct loadConF1(regF dst, immF1 con) %{
+// The instruction usage is guarded by predicate in operand immFPR1().
+instruct loadConFPR1(regFPR dst, immFPR1 con) %{
match(Set dst con);
ins_cost(125);
format %{ "FLD1 ST\n\t"
@@ -7178,8 +6623,8 @@ instruct loadConF1(regF dst, immF1 con) %{
ins_pipe(fpu_reg_con);
%}
-// The instruction usage is guarded by predicate in operand immXF().
-instruct loadConX(regX dst, immXF con) %{
+// The instruction usage is guarded by predicate in operand immF().
+instruct loadConF(regF dst, immF con) %{
match(Set dst con);
ins_cost(125);
format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
@@ -7189,8 +6634,8 @@ instruct loadConX(regX dst, immXF con) %{
ins_pipe(pipe_slow);
%}
-// The instruction usage is guarded by predicate in operand immXF0().
-instruct loadConX0(regX dst, immXF0 src) %{
+// The instruction usage is guarded by predicate in operand immF0().
+instruct loadConF0(regF dst, immF0 src) %{
match(Set dst src);
ins_cost(100);
format %{ "XORPS $dst,$dst\t# float 0.0" %}
@@ -7200,8 +6645,8 @@ instruct loadConX0(regX dst, immXF0 src) %{
ins_pipe(pipe_slow);
%}
-// The instruction usage is guarded by predicate in operand immD().
-instruct loadConD(regD dst, immD con) %{
+// The instruction usage is guarded by predicate in operand immDPR().
+instruct loadConDPR(regDPR dst, immDPR con) %{
match(Set dst con);
ins_cost(125);
@@ -7214,8 +6659,8 @@ instruct loadConD(regD dst, immD con) %{
ins_pipe(fpu_reg_con);
%}
-// The instruction usage is guarded by predicate in operand immD0().
-instruct loadConD0(regD dst, immD0 con) %{
+// The instruction usage is guarded by predicate in operand immDPR0().
+instruct loadConDPR0(regDPR dst, immDPR0 con) %{
match(Set dst con);
ins_cost(125);
@@ -7228,8 +6673,8 @@ instruct loadConD0(regD dst, immD0 con) %{
ins_pipe(fpu_reg_con);
%}
-// The instruction usage is guarded by predicate in operand immD1().
-instruct loadConD1(regD dst, immD1 con) %{
+// The instruction usage is guarded by predicate in operand immDPR1().
+instruct loadConDPR1(regDPR dst, immDPR1 con) %{
match(Set dst con);
ins_cost(125);
@@ -7242,8 +6687,8 @@ instruct loadConD1(regD dst, immD1 con) %{
ins_pipe(fpu_reg_con);
%}
-// The instruction usage is guarded by predicate in operand immXD().
-instruct loadConXD(regXD dst, immXD con) %{
+// The instruction usage is guarded by predicate in operand immD().
+instruct loadConD(regD dst, immD con) %{
match(Set dst con);
ins_cost(125);
format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
@@ -7253,12 +6698,14 @@ instruct loadConXD(regXD dst, immXD con) %{
ins_pipe(pipe_slow);
%}
-// The instruction usage is guarded by predicate in operand immXD0().
-instruct loadConXD0(regXD dst, immXD0 src) %{
+// The instruction usage is guarded by predicate in operand immD0().
+instruct loadConD0(regD dst, immD0 src) %{
match(Set dst src);
ins_cost(100);
format %{ "XORPD $dst,$dst\t# double 0.0" %}
- ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
+ ins_encode %{
+ __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -7296,7 +6743,7 @@ instruct loadSSP(eRegP dst, stackSlotP src) %{
%}
// Load Stack Slot
-instruct loadSSF(regF dst, stackSlotF src) %{
+instruct loadSSF(regFPR dst, stackSlotF src) %{
match(Set dst src);
ins_cost(125);
@@ -7304,12 +6751,12 @@ instruct loadSSF(regF dst, stackSlotF src) %{
"FSTP $dst" %}
opcode(0xD9); /* D9 /0, FLD m32real */
ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
- Pop_Reg_F(dst) );
+ Pop_Reg_FPR(dst) );
ins_pipe( fpu_reg_mem );
%}
// Load Stack Slot
-instruct loadSSD(regD dst, stackSlotD src) %{
+instruct loadSSD(regDPR dst, stackSlotD src) %{
match(Set dst src);
ins_cost(125);
@@ -7317,7 +6764,7 @@ instruct loadSSD(regD dst, stackSlotD src) %{
"FSTP $dst" %}
opcode(0xDD); /* DD /0, FLD m64real */
ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
- Pop_Reg_D(dst) );
+ Pop_Reg_DPR(dst) );
ins_pipe( fpu_reg_mem );
%}
@@ -7552,7 +6999,7 @@ instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
ins_pipe( fpu_reg_mem );
%}
-instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{
+instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
match(Set mem (StoreL mem src));
effect( TEMP tmp, KILL cr );
@@ -7560,12 +7007,15 @@ instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %
format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t"
"MOVSD $tmp,$src\n\t"
"MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
- opcode(0x3B);
- ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp));
+ ins_encode %{
+ __ cmpl(rax, $mem$$Address);
+ __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
+ __ movdbl($mem$$Address, $tmp$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
-instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{
+instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
match(Set mem (StoreL mem src));
effect( TEMP tmp2 , TEMP tmp, KILL cr );
@@ -7575,8 +7025,13 @@ instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFla
"MOVD $tmp2,$src.hi\n\t"
"PUNPCKLDQ $tmp,$tmp2\n\t"
"MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %}
- opcode(0x3B);
- ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2));
+ ins_encode %{
+ __ cmpl(rax, $mem$$Address);
+ __ movdl($tmp$$XMMRegister, $src$$Register);
+ __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
+ __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
+ __ movdbl($mem$$Address, $tmp$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -7638,32 +7093,38 @@ instruct storeImmB(memory mem, immI8 src) %{
%}
// Store Aligned Packed Byte XMM register to memory
-instruct storeA8B(memory mem, regXD src) %{
+instruct storeA8B(memory mem, regD src) %{
predicate(UseSSE>=1);
match(Set mem (Store8B mem src));
ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed8B" %}
- ins_encode( movq_st(mem, src));
+ ins_encode %{
+ __ movq($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
// Store Aligned Packed Char/Short XMM register to memory
-instruct storeA4C(memory mem, regXD src) %{
+instruct storeA4C(memory mem, regD src) %{
predicate(UseSSE>=1);
match(Set mem (Store4C mem src));
ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed4C" %}
- ins_encode( movq_st(mem, src));
+ ins_encode %{
+ __ movq($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
// Store Aligned Packed Integer XMM register to memory
-instruct storeA2I(memory mem, regXD src) %{
+instruct storeA2I(memory mem, regD src) %{
predicate(UseSSE>=1);
match(Set mem (Store2I mem src));
ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed2I" %}
- ins_encode( movq_st(mem, src));
+ ins_encode %{
+ __ movq($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -7679,98 +7140,116 @@ instruct storeImmCM(memory mem, immI8 src) %{
%}
// Store Double
-instruct storeD( memory mem, regDPR1 src) %{
+instruct storeDPR( memory mem, regDPR1 src) %{
predicate(UseSSE<=1);
match(Set mem (StoreD mem src));
ins_cost(100);
format %{ "FST_D $mem,$src" %}
opcode(0xDD); /* DD /2 */
- ins_encode( enc_FP_store(mem,src) );
+ ins_encode( enc_FPR_store(mem,src) );
ins_pipe( fpu_mem_reg );
%}
// Store double does rounding on x86
-instruct storeD_rounded( memory mem, regDPR1 src) %{
+instruct storeDPR_rounded( memory mem, regDPR1 src) %{
predicate(UseSSE<=1);
match(Set mem (StoreD mem (RoundDouble src)));
ins_cost(100);
format %{ "FST_D $mem,$src\t# round" %}
opcode(0xDD); /* DD /2 */
- ins_encode( enc_FP_store(mem,src) );
+ ins_encode( enc_FPR_store(mem,src) );
ins_pipe( fpu_mem_reg );
%}
// Store XMM register to memory (double-precision floating points)
// MOVSD instruction
-instruct storeXD(memory mem, regXD src) %{
+instruct storeD(memory mem, regD src) %{
predicate(UseSSE>=2);
match(Set mem (StoreD mem src));
ins_cost(95);
format %{ "MOVSD $mem,$src" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
+ ins_encode %{
+ __ movdbl($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
// Store XMM register to memory (single-precision floating point)
// MOVSS instruction
-instruct storeX(memory mem, regX src) %{
+instruct storeF(memory mem, regF src) %{
predicate(UseSSE>=1);
match(Set mem (StoreF mem src));
ins_cost(95);
format %{ "MOVSS $mem,$src" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
+ ins_encode %{
+ __ movflt($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
// Store Aligned Packed Single Float XMM register to memory
-instruct storeA2F(memory mem, regXD src) %{
+instruct storeA2F(memory mem, regD src) %{
predicate(UseSSE>=1);
match(Set mem (Store2F mem src));
ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed2F" %}
- ins_encode( movq_st(mem, src));
+ ins_encode %{
+ __ movq($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
// Store Float
-instruct storeF( memory mem, regFPR1 src) %{
+instruct storeFPR( memory mem, regFPR1 src) %{
predicate(UseSSE==0);
match(Set mem (StoreF mem src));
ins_cost(100);
format %{ "FST_S $mem,$src" %}
opcode(0xD9); /* D9 /2 */
- ins_encode( enc_FP_store(mem,src) );
+ ins_encode( enc_FPR_store(mem,src) );
ins_pipe( fpu_mem_reg );
%}
// Store Float does rounding on x86
-instruct storeF_rounded( memory mem, regFPR1 src) %{
+instruct storeFPR_rounded( memory mem, regFPR1 src) %{
predicate(UseSSE==0);
match(Set mem (StoreF mem (RoundFloat src)));
ins_cost(100);
format %{ "FST_S $mem,$src\t# round" %}
opcode(0xD9); /* D9 /2 */
- ins_encode( enc_FP_store(mem,src) );
+ ins_encode( enc_FPR_store(mem,src) );
ins_pipe( fpu_mem_reg );
%}
// Store Float does rounding on x86
-instruct storeF_Drounded( memory mem, regDPR1 src) %{
+instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
predicate(UseSSE<=1);
match(Set mem (StoreF mem (ConvD2F src)));
ins_cost(100);
format %{ "FST_S $mem,$src\t# D-round" %}
opcode(0xD9); /* D9 /2 */
- ins_encode( enc_FP_store(mem,src) );
+ ins_encode( enc_FPR_store(mem,src) );
ins_pipe( fpu_mem_reg );
%}
// Store immediate Float value (it is faster than store from FPU register)
+// The instruction usage is guarded by predicate in operand immFPR().
+instruct storeFPR_imm( memory mem, immFPR src) %{
+ match(Set mem (StoreF mem src));
+
+ ins_cost(50);
+ format %{ "MOV $mem,$src\t# store float" %}
+ opcode(0xC7); /* C7 /0 */
+ ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src ));
+ ins_pipe( ialu_mem_imm );
+%}
+
+// Store immediate Float value (it is faster than store from XMM register)
// The instruction usage is guarded by predicate in operand immF().
instruct storeF_imm( memory mem, immF src) %{
match(Set mem (StoreF mem src));
@@ -7782,18 +7261,6 @@ instruct storeF_imm( memory mem, immF src) %{
ins_pipe( ialu_mem_imm );
%}
-// Store immediate Float value (it is faster than store from XMM register)
-// The instruction usage is guarded by predicate in operand immXF().
-instruct storeX_imm( memory mem, immXF src) %{
- match(Set mem (StoreF mem src));
-
- ins_cost(50);
- format %{ "MOV $mem,$src\t# store float" %}
- opcode(0xC7); /* C7 /0 */
- ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32XF_as_bits( src ));
- ins_pipe( ialu_mem_imm );
-%}
-
// Store Integer to stack slot
instruct storeSSI(stackSlotI dst, eRegI src) %{
match(Set dst src);
@@ -7901,6 +7368,16 @@ instruct unnecessary_membar_volatile() %{
ins_pipe(empty);
%}
+instruct membar_storestore() %{
+ match(MemBarStoreStore);
+ ins_cost(0);
+
+ size(0);
+ format %{ "MEMBAR-storestore (empty encoding)" %}
+ ins_encode( );
+ ins_pipe(empty);
+%}
+
//----------Move Instructions--------------------------------------------------
instruct castX2P(eAXRegP dst, eAXRegI src) %{
match(Set dst (CastX2P src));
@@ -8088,29 +7565,29 @@ instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
//%}
// Conditional move
-instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{
+instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
predicate(UseSSE<=1);
match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
ins_cost(200);
format %{ "FCMOV$cop $dst,$src\t# double" %}
opcode(0xDA);
- ins_encode( enc_cmov_d(cop,src) );
- ins_pipe( pipe_cmovD_reg );
+ ins_encode( enc_cmov_dpr(cop,src) );
+ ins_pipe( pipe_cmovDPR_reg );
%}
// Conditional move
-instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{
+instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
predicate(UseSSE==0);
match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
ins_cost(200);
format %{ "FCMOV$cop $dst,$src\t# float" %}
opcode(0xDA);
- ins_encode( enc_cmov_d(cop,src) );
- ins_pipe( pipe_cmovD_reg );
+ ins_encode( enc_cmov_dpr(cop,src) );
+ ins_pipe( pipe_cmovDPR_reg );
%}
// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
-instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
+instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
predicate(UseSSE<=1);
match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
ins_cost(200);
@@ -8118,12 +7595,12 @@ instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
"MOV $dst,$src\t# double\n"
"skip:" %}
opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
- ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) );
- ins_pipe( pipe_cmovD_reg );
+ ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
+ ins_pipe( pipe_cmovDPR_reg );
%}
// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
-instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
+instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
predicate(UseSSE==0);
match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
ins_cost(200);
@@ -8131,12 +7608,12 @@ instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
"MOV $dst,$src\t# float\n"
"skip:" %}
opcode (0xdd, 0x3); /* DD D8+i or DD /3 */
- ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) );
- ins_pipe( pipe_cmovD_reg );
+ ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
+ ins_pipe( pipe_cmovDPR_reg );
%}
// No CMOVE with SSE/SSE2
-instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{
+instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
predicate (UseSSE>=1);
match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
ins_cost(200);
@@ -8154,7 +7631,7 @@ instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{
%}
// No CMOVE with SSE/SSE2
-instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{
+instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
predicate (UseSSE>=2);
match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
ins_cost(200);
@@ -8172,7 +7649,7 @@ instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{
%}
// unsigned version
-instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{
+instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
predicate (UseSSE>=1);
match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
ins_cost(200);
@@ -8189,17 +7666,17 @@ instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{
ins_pipe( pipe_slow );
%}
-instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{
+instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
predicate (UseSSE>=1);
match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovX_regU(cop, cr, dst, src);
+ fcmovF_regU(cop, cr, dst, src);
%}
%}
// unsigned version
-instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{
+instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
predicate (UseSSE>=2);
match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
ins_cost(200);
@@ -8216,12 +7693,12 @@ instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{
ins_pipe( pipe_slow );
%}
-instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{
+instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
predicate (UseSSE>=2);
match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovXD_regU(cop, cr, dst, src);
+ fcmovD_regU(cop, cr, dst, src);
%}
%}
@@ -8440,7 +7917,7 @@ instruct loadPLocked(eRegP dst, memory mem) %{
%}
// LoadLong-locked - same as a volatile long load when used with compare-swap
-instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{
+instruct loadLLocked(stackSlotL dst, memory mem) %{
predicate(UseSSE<=1);
match(Set dst (LoadLLocked mem));
@@ -8451,18 +7928,21 @@ instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{
ins_pipe( fpu_reg_mem );
%}
-instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{
+instruct loadLX_Locked(stackSlotL dst, memory mem, regD tmp) %{
predicate(UseSSE>=2);
match(Set dst (LoadLLocked mem));
effect(TEMP tmp);
ins_cost(180);
format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t"
"MOVSD $dst,$tmp" %}
- ins_encode(enc_loadLX_volatile(mem, dst, tmp));
+ ins_encode %{
+ __ movdbl($tmp$$XMMRegister, $mem$$Address);
+ __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
-instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{
+instruct loadLX_reg_Locked(eRegL dst, memory mem, regD tmp) %{
predicate(UseSSE>=2);
match(Set dst (LoadLLocked mem));
effect(TEMP tmp);
@@ -8471,7 +7951,12 @@ instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{
"MOVD $dst.lo,$tmp\n\t"
"PSRLQ $tmp,32\n\t"
"MOVD $dst.hi,$tmp" %}
- ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
+ ins_encode %{
+ __ movdbl($tmp$$XMMRegister, $mem$$Address);
+ __ movdl($dst$$Register, $tmp$$XMMRegister);
+ __ psrlq($tmp$$XMMRegister, 32);
+ __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -10054,7 +9539,7 @@ instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
// Compare & branch
// P6 version of float compare, sets condition codes in EFLAGS
-instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
+instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
predicate(VM_Version::supports_cmov() && UseSSE <=1);
match(Set cr (CmpD src1 src2));
effect(KILL rax);
@@ -10066,26 +9551,26 @@ instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
"SAHF\n"
"exit:\tNOP // avoid branch to branch" %}
opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
- ins_encode( Push_Reg_D(src1),
+ ins_encode( Push_Reg_DPR(src1),
OpcP, RegOpc(src2),
cmpF_P6_fixup );
ins_pipe( pipe_slow );
%}
-instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{
+instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
predicate(VM_Version::supports_cmov() && UseSSE <=1);
match(Set cr (CmpD src1 src2));
ins_cost(150);
format %{ "FLD $src1\n\t"
"FUCOMIP ST,$src2 // P6 instruction" %}
opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
- ins_encode( Push_Reg_D(src1),
+ ins_encode( Push_Reg_DPR(src1),
OpcP, RegOpc(src2));
ins_pipe( pipe_slow );
%}
// Compare & branch
-instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
+instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
predicate(UseSSE<=1);
match(Set cr (CmpD src1 src2));
effect(KILL rax);
@@ -10098,138 +9583,140 @@ instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
"MOV AH,1\t# unordered treat as LT\n"
"flags:\tSAHF" %}
opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
- ins_encode( Push_Reg_D(src1),
+ ins_encode( Push_Reg_DPR(src1),
OpcP, RegOpc(src2),
fpu_flags);
ins_pipe( pipe_slow );
%}
// Compare vs zero into -1,0,1
-instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpDPR_0(eRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
predicate(UseSSE<=1);
match(Set dst (CmpD3 src1 zero));
effect(KILL cr, KILL rax);
ins_cost(280);
format %{ "FTSTD $dst,$src1" %}
opcode(0xE4, 0xD9);
- ins_encode( Push_Reg_D(src1),
+ ins_encode( Push_Reg_DPR(src1),
OpcS, OpcP, PopFPU,
CmpF_Result(dst));
ins_pipe( pipe_slow );
%}
// Compare into -1,0,1
-instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpDPR_reg(eRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
predicate(UseSSE<=1);
match(Set dst (CmpD3 src1 src2));
effect(KILL cr, KILL rax);
ins_cost(300);
format %{ "FCMPD $dst,$src1,$src2" %}
opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
- ins_encode( Push_Reg_D(src1),
+ ins_encode( Push_Reg_DPR(src1),
OpcP, RegOpc(src2),
CmpF_Result(dst));
ins_pipe( pipe_slow );
%}
// float compare and set condition codes in EFLAGS by XMM regs
-instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{
+instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
predicate(UseSSE>=2);
- match(Set cr (CmpD dst src));
- effect(KILL rax);
- ins_cost(125);
- format %{ "COMISD $dst,$src\n"
- "\tJNP exit\n"
- "\tMOV ah,1 // saw a NaN, set CF\n"
- "\tSAHF\n"
- "exit:\tNOP // avoid branch to branch" %}
- opcode(0x66, 0x0F, 0x2F);
- ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup);
+ match(Set cr (CmpD src1 src2));
+ ins_cost(145);
+ format %{ "UCOMISD $src1,$src2\n\t"
+ "JNP,s exit\n\t"
+ "PUSHF\t# saw NaN, set CF\n\t"
+ "AND [rsp], #0xffffff2b\n\t"
+ "POPF\n"
+ "exit:" %}
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+ emit_cmpfp_fixup(_masm);
+ %}
ins_pipe( pipe_slow );
%}
-instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{
+instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
predicate(UseSSE>=2);
- match(Set cr (CmpD dst src));
+ match(Set cr (CmpD src1 src2));
ins_cost(100);
- format %{ "COMISD $dst,$src" %}
- opcode(0x66, 0x0F, 0x2F);
- ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
+ format %{ "UCOMISD $src1,$src2" %}
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
// float compare and set condition codes in EFLAGS by XMM regs
-instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{
+instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
predicate(UseSSE>=2);
- match(Set cr (CmpD dst (LoadD src)));
- effect(KILL rax);
+ match(Set cr (CmpD src1 (LoadD src2)));
ins_cost(145);
- format %{ "COMISD $dst,$src\n"
- "\tJNP exit\n"
- "\tMOV ah,1 // saw a NaN, set CF\n"
- "\tSAHF\n"
- "exit:\tNOP // avoid branch to branch" %}
- opcode(0x66, 0x0F, 0x2F);
- ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup);
+ format %{ "UCOMISD $src1,$src2\n\t"
+ "JNP,s exit\n\t"
+ "PUSHF\t# saw NaN, set CF\n\t"
+ "AND [rsp], #0xffffff2b\n\t"
+ "POPF\n"
+ "exit:" %}
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$Address);
+ emit_cmpfp_fixup(_masm);
+ %}
ins_pipe( pipe_slow );
%}
-instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{
+instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
predicate(UseSSE>=2);
- match(Set cr (CmpD dst (LoadD src)));
+ match(Set cr (CmpD src1 (LoadD src2)));
ins_cost(100);
- format %{ "COMISD $dst,$src" %}
- opcode(0x66, 0x0F, 0x2F);
- ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src));
+ format %{ "UCOMISD $src1,$src2" %}
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$Address);
+ %}
ins_pipe( pipe_slow );
%}
// Compare into -1,0,1 in XMM
-instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
+instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
predicate(UseSSE>=2);
match(Set dst (CmpD3 src1 src2));
effect(KILL cr);
ins_cost(255);
- format %{ "XOR $dst,$dst\n"
- "\tCOMISD $src1,$src2\n"
- "\tJP,s nan\n"
- "\tJEQ,s exit\n"
- "\tJA,s inc\n"
- "nan:\tDEC $dst\n"
- "\tJMP,s exit\n"
- "inc:\tINC $dst\n"
- "exit:"
- %}
- opcode(0x66, 0x0F, 0x2F);
- ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2),
- CmpX_Result(dst));
+ format %{ "UCOMISD $src1, $src2\n\t"
+ "MOV $dst, #-1\n\t"
+ "JP,s done\n\t"
+ "JB,s done\n\t"
+ "SETNE $dst\n\t"
+ "MOVZB $dst, $dst\n"
+ "done:" %}
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+ emit_cmpfp3(_masm, $dst$$Register);
+ %}
ins_pipe( pipe_slow );
%}
// Compare into -1,0,1 in XMM and memory
-instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{
+instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
predicate(UseSSE>=2);
- match(Set dst (CmpD3 src1 (LoadD mem)));
+ match(Set dst (CmpD3 src1 (LoadD src2)));
effect(KILL cr);
ins_cost(275);
- format %{ "COMISD $src1,$mem\n"
- "\tMOV $dst,0\t\t# do not blow flags\n"
- "\tJP,s nan\n"
- "\tJEQ,s exit\n"
- "\tJA,s inc\n"
- "nan:\tDEC $dst\n"
- "\tJMP,s exit\n"
- "inc:\tINC $dst\n"
- "exit:"
- %}
- opcode(0x66, 0x0F, 0x2F);
- ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem),
- LdImmI(dst,0x0), CmpX_Result(dst));
+ format %{ "UCOMISD $src1, $src2\n\t"
+ "MOV $dst, #-1\n\t"
+ "JP,s done\n\t"
+ "JB,s done\n\t"
+ "SETNE $dst\n\t"
+ "MOVZB $dst, $dst\n"
+ "done:" %}
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$Address);
+ emit_cmpfp3(_masm, $dst$$Register);
+ %}
ins_pipe( pipe_slow );
%}
-instruct subD_reg(regD dst, regD src) %{
+instruct subDPR_reg(regDPR dst, regDPR src) %{
predicate (UseSSE <=1);
match(Set dst (SubD dst src));
@@ -10237,12 +9724,12 @@ instruct subD_reg(regD dst, regD src) %{
"DSUBp $dst,ST" %}
opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
ins_cost(150);
- ins_encode( Push_Reg_D(src),
+ ins_encode( Push_Reg_DPR(src),
OpcP, RegOpc(dst) );
ins_pipe( fpu_reg_reg );
%}
-instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
+instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
predicate (UseSSE <=1);
match(Set dst (RoundDouble (SubD src1 src2)));
ins_cost(250);
@@ -10251,13 +9738,13 @@ instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
"DSUB ST,$src1\n\t"
"FSTP_D $dst\t# D-round" %}
opcode(0xD8, 0x5);
- ins_encode( Push_Reg_D(src2),
- OpcP, RegOpc(src1), Pop_Mem_D(dst) );
+ ins_encode( Push_Reg_DPR(src2),
+ OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
ins_pipe( fpu_mem_reg_reg );
%}
-instruct subD_reg_mem(regD dst, memory src) %{
+instruct subDPR_reg_mem(regDPR dst, memory src) %{
predicate (UseSSE <=1);
match(Set dst (SubD dst (LoadD src)));
ins_cost(150);
@@ -10270,7 +9757,7 @@ instruct subD_reg_mem(regD dst, memory src) %{
ins_pipe( fpu_reg_mem );
%}
-instruct absD_reg(regDPR1 dst, regDPR1 src) %{
+instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1);
match(Set dst (AbsD src));
ins_cost(100);
@@ -10280,15 +9767,7 @@ instruct absD_reg(regDPR1 dst, regDPR1 src) %{
ins_pipe( fpu_reg_reg );
%}
-instruct absXD_reg( regXD dst ) %{
- predicate(UseSSE>=2);
- match(Set dst (AbsD dst));
- format %{ "ANDPD $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
- ins_encode( AbsXD_encoding(dst));
- ins_pipe( pipe_slow );
-%}
-
-instruct negD_reg(regDPR1 dst, regDPR1 src) %{
+instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
predicate(UseSSE<=1);
match(Set dst (NegD src));
ins_cost(100);
@@ -10298,18 +9777,7 @@ instruct negD_reg(regDPR1 dst, regDPR1 src) %{
ins_pipe( fpu_reg_reg );
%}
-instruct negXD_reg( regXD dst ) %{
- predicate(UseSSE>=2);
- match(Set dst (NegD dst));
- format %{ "XORPD $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
- ins_encode %{
- __ xorpd($dst$$XMMRegister,
- ExternalAddress((address)double_signflip_pool));
- %}
- ins_pipe( pipe_slow );
-%}
-
-instruct addD_reg(regD dst, regD src) %{
+instruct addDPR_reg(regDPR dst, regDPR src) %{
predicate(UseSSE<=1);
match(Set dst (AddD dst src));
format %{ "FLD $src\n\t"
@@ -10317,13 +9785,13 @@ instruct addD_reg(regD dst, regD src) %{
size(4);
ins_cost(150);
opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
- ins_encode( Push_Reg_D(src),
+ ins_encode( Push_Reg_DPR(src),
OpcP, RegOpc(dst) );
ins_pipe( fpu_reg_reg );
%}
-instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{
+instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
predicate(UseSSE<=1);
match(Set dst (RoundDouble (AddD src1 src2)));
ins_cost(250);
@@ -10332,13 +9800,13 @@ instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{
"DADD ST,$src1\n\t"
"FSTP_D $dst\t# D-round" %}
opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
- ins_encode( Push_Reg_D(src2),
- OpcP, RegOpc(src1), Pop_Mem_D(dst) );
+ ins_encode( Push_Reg_DPR(src2),
+ OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
ins_pipe( fpu_mem_reg_reg );
%}
-instruct addD_reg_mem(regD dst, memory src) %{
+instruct addDPR_reg_mem(regDPR dst, memory src) %{
predicate(UseSSE<=1);
match(Set dst (AddD dst (LoadD src)));
ins_cost(150);
@@ -10352,7 +9820,7 @@ instruct addD_reg_mem(regD dst, memory src) %{
%}
// add-to-memory
-instruct addD_mem_reg(memory dst, regD src) %{
+instruct addDPR_mem_reg(memory dst, regDPR src) %{
predicate(UseSSE<=1);
match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
ins_cost(150);
@@ -10368,7 +9836,7 @@ instruct addD_mem_reg(memory dst, regD src) %{
ins_pipe( fpu_reg_mem );
%}
-instruct addD_reg_imm1(regD dst, immD1 con) %{
+instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
predicate(UseSSE<=1);
match(Set dst (AddD dst con));
ins_cost(125);
@@ -10381,7 +9849,7 @@ instruct addD_reg_imm1(regD dst, immD1 con) %{
ins_pipe(fpu_reg);
%}
-instruct addD_reg_imm(regD dst, immD con) %{
+instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
match(Set dst (AddD dst con));
ins_cost(200);
@@ -10394,7 +9862,7 @@ instruct addD_reg_imm(regD dst, immD con) %{
ins_pipe(fpu_reg_mem);
%}
-instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
+instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
match(Set dst (RoundDouble (AddD src con)));
ins_cost(200);
@@ -10409,124 +9877,14 @@ instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
ins_pipe(fpu_mem_reg_con);
%}
-// Add two double precision floating point values in xmm
-instruct addXD_reg(regXD dst, regXD src) %{
- predicate(UseSSE>=2);
- match(Set dst (AddD dst src));
- format %{ "ADDSD $dst,$src" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
- ins_pipe( pipe_slow );
-%}
-
-instruct addXD_imm(regXD dst, immXD con) %{
- predicate(UseSSE>=2);
- match(Set dst (AddD dst con));
- format %{ "ADDSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
- ins_encode %{
- __ addsd($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct addXD_mem(regXD dst, memory mem) %{
- predicate(UseSSE>=2);
- match(Set dst (AddD dst (LoadD mem)));
- format %{ "ADDSD $dst,$mem" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem));
- ins_pipe( pipe_slow );
-%}
-
-// Sub two double precision floating point values in xmm
-instruct subXD_reg(regXD dst, regXD src) %{
- predicate(UseSSE>=2);
- match(Set dst (SubD dst src));
- format %{ "SUBSD $dst,$src" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
- ins_pipe( pipe_slow );
-%}
-
-instruct subXD_imm(regXD dst, immXD con) %{
- predicate(UseSSE>=2);
- match(Set dst (SubD dst con));
- format %{ "SUBSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
- ins_encode %{
- __ subsd($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct subXD_mem(regXD dst, memory mem) %{
- predicate(UseSSE>=2);
- match(Set dst (SubD dst (LoadD mem)));
- format %{ "SUBSD $dst,$mem" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
- ins_pipe( pipe_slow );
-%}
-
-// Mul two double precision floating point values in xmm
-instruct mulXD_reg(regXD dst, regXD src) %{
- predicate(UseSSE>=2);
- match(Set dst (MulD dst src));
- format %{ "MULSD $dst,$src" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
- ins_pipe( pipe_slow );
-%}
-
-instruct mulXD_imm(regXD dst, immXD con) %{
- predicate(UseSSE>=2);
- match(Set dst (MulD dst con));
- format %{ "MULSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
- ins_encode %{
- __ mulsd($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct mulXD_mem(regXD dst, memory mem) %{
- predicate(UseSSE>=2);
- match(Set dst (MulD dst (LoadD mem)));
- format %{ "MULSD $dst,$mem" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
- ins_pipe( pipe_slow );
-%}
-
-// Div two double precision floating point values in xmm
-instruct divXD_reg(regXD dst, regXD src) %{
- predicate(UseSSE>=2);
- match(Set dst (DivD dst src));
- format %{ "DIVSD $dst,$src" %}
- opcode(0xF2, 0x0F, 0x5E);
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
- ins_pipe( pipe_slow );
-%}
-
-instruct divXD_imm(regXD dst, immXD con) %{
- predicate(UseSSE>=2);
- match(Set dst (DivD dst con));
- format %{ "DIVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %}
- ins_encode %{
- __ divsd($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct divXD_mem(regXD dst, memory mem) %{
- predicate(UseSSE>=2);
- match(Set dst (DivD dst (LoadD mem)));
- format %{ "DIVSD $dst,$mem" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
- ins_pipe( pipe_slow );
-%}
-
-
-instruct mulD_reg(regD dst, regD src) %{
+instruct mulDPR_reg(regDPR dst, regDPR src) %{
predicate(UseSSE<=1);
match(Set dst (MulD dst src));
format %{ "FLD $src\n\t"
"DMULp $dst,ST" %}
opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
ins_cost(150);
- ins_encode( Push_Reg_D(src),
+ ins_encode( Push_Reg_DPR(src),
OpcP, RegOpc(dst) );
ins_pipe( fpu_reg_reg );
%}
@@ -10539,7 +9897,7 @@ instruct mulD_reg(regD dst, regD src) %{
// multiply scaled arg1 by arg2
// rescale product by 2^(15360)
//
-instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{
+instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
match(Set dst (MulD dst src));
ins_cost(1); // Select this instruction for all strict FP double multiplies
@@ -10552,13 +9910,13 @@ instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{
"DMULp $dst,ST\n\t" %}
opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
ins_encode( strictfp_bias1(dst),
- Push_Reg_D(src),
+ Push_Reg_DPR(src),
OpcP, RegOpc(dst),
strictfp_bias2(dst) );
ins_pipe( fpu_reg_reg );
%}
-instruct mulD_reg_imm(regD dst, immD con) %{
+instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
match(Set dst (MulD dst con));
ins_cost(200);
@@ -10572,7 +9930,7 @@ instruct mulD_reg_imm(regD dst, immD con) %{
%}
-instruct mulD_reg_mem(regD dst, memory src) %{
+instruct mulDPR_reg_mem(regDPR dst, memory src) %{
predicate( UseSSE<=1 );
match(Set dst (MulD dst (LoadD src)));
ins_cost(200);
@@ -10586,7 +9944,7 @@ instruct mulD_reg_mem(regD dst, memory src) %{
//
// Cisc-alternate to reg-reg multiply
-instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{
+instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
predicate( UseSSE<=1 );
match(Set dst (MulD src (LoadD mem)));
ins_cost(250);
@@ -10595,17 +9953,17 @@ instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{
"FSTP_D $dst" %}
opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */
ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
- OpcReg_F(src),
- Pop_Reg_D(dst) );
+ OpcReg_FPR(src),
+ Pop_Reg_DPR(dst) );
ins_pipe( fpu_reg_reg_mem );
%}
-// MACRO3 -- addD a mulD
+// MACRO3 -- addDPR a mulDPR
// This instruction is a '2-address' instruction in that the result goes
// back to src2. This eliminates a move from the macro; possibly the
// register allocator will have to add it back (and maybe not).
-instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{
+instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
predicate( UseSSE<=1 );
match(Set src2 (AddD (MulD src0 src1) src2));
format %{ "FLD $src0\t# ===MACRO3d===\n\t"
@@ -10613,29 +9971,29 @@ instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{
"DADDp $src2,ST" %}
ins_cost(250);
opcode(0xDD); /* LoadD DD /0 */
- ins_encode( Push_Reg_F(src0),
+ ins_encode( Push_Reg_FPR(src0),
FMul_ST_reg(src1),
FAddP_reg_ST(src2) );
ins_pipe( fpu_reg_reg_reg );
%}
-// MACRO3 -- subD a mulD
-instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{
+// MACRO3 -- subDPR a mulDPR
+instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
predicate( UseSSE<=1 );
match(Set src2 (SubD (MulD src0 src1) src2));
format %{ "FLD $src0\t# ===MACRO3d===\n\t"
"DMUL ST,$src1\n\t"
"DSUBRp $src2,ST" %}
ins_cost(250);
- ins_encode( Push_Reg_F(src0),
+ ins_encode( Push_Reg_FPR(src0),
FMul_ST_reg(src1),
Opcode(0xDE), Opc_plus(0xE0,src2));
ins_pipe( fpu_reg_reg_reg );
%}
-instruct divD_reg(regD dst, regD src) %{
+instruct divDPR_reg(regDPR dst, regDPR src) %{
predicate( UseSSE<=1 );
match(Set dst (DivD dst src));
@@ -10643,7 +10001,7 @@ instruct divD_reg(regD dst, regD src) %{
"FDIVp $dst,ST" %}
opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
ins_cost(150);
- ins_encode( Push_Reg_D(src),
+ ins_encode( Push_Reg_DPR(src),
OpcP, RegOpc(dst) );
ins_pipe( fpu_reg_reg );
%}
@@ -10656,7 +10014,7 @@ instruct divD_reg(regD dst, regD src) %{
// divide scaled dividend by divisor
// rescale quotient by 2^(15360)
//
-instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{
+instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
predicate (UseSSE<=1);
match(Set dst (DivD dst src));
predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
@@ -10670,13 +10028,13 @@ instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{
"DMULp $dst,ST\n\t" %}
opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
ins_encode( strictfp_bias1(dst),
- Push_Reg_D(src),
+ Push_Reg_DPR(src),
OpcP, RegOpc(dst),
strictfp_bias2(dst) );
ins_pipe( fpu_reg_reg );
%}
-instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{
+instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
match(Set dst (RoundDouble (DivD src1 src2)));
@@ -10684,27 +10042,27 @@ instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{
"FDIV ST,$src2\n\t"
"FSTP_D $dst\t# D-round" %}
opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
- ins_encode( Push_Reg_D(src1),
- OpcP, RegOpc(src2), Pop_Mem_D(dst) );
+ ins_encode( Push_Reg_DPR(src1),
+ OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
ins_pipe( fpu_mem_reg_reg );
%}
-instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{
+instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
predicate(UseSSE<=1);
match(Set dst (ModD dst src));
- effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
+ effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
format %{ "DMOD $dst,$src" %}
ins_cost(250);
- ins_encode(Push_Reg_Mod_D(dst, src),
- emitModD(),
- Push_Result_Mod_D(src),
- Pop_Reg_D(dst));
+ ins_encode(Push_Reg_Mod_DPR(dst, src),
+ emitModDPR(),
+ Push_Result_Mod_DPR(src),
+ Pop_Reg_DPR(dst));
ins_pipe( pipe_slow );
%}
-instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{
+instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
predicate(UseSSE>=2);
match(Set dst (ModD src0 src1));
effect(KILL rax, KILL cr);
@@ -10725,11 +10083,11 @@ instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr)
"\tFSTP ST0\t # Restore FPU Stack"
%}
ins_cost(250);
- ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU);
+ ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
ins_pipe( pipe_slow );
%}
-instruct sinD_reg(regDPR1 dst, regDPR1 src) %{
+instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1);
match(Set dst (SinD src));
ins_cost(1800);
@@ -10739,18 +10097,18 @@ instruct sinD_reg(regDPR1 dst, regDPR1 src) %{
ins_pipe( pipe_slow );
%}
-instruct sinXD_reg(regXD dst, eFlagsReg cr) %{
+instruct sinD_reg(regD dst, eFlagsReg cr) %{
predicate (UseSSE>=2);
match(Set dst (SinD dst));
- effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
+ effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
ins_cost(1800);
format %{ "DSIN $dst" %}
opcode(0xD9, 0xFE);
- ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
+ ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
ins_pipe( pipe_slow );
%}
-instruct cosD_reg(regDPR1 dst, regDPR1 src) %{
+instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1);
match(Set dst (CosD src));
ins_cost(1800);
@@ -10760,18 +10118,18 @@ instruct cosD_reg(regDPR1 dst, regDPR1 src) %{
ins_pipe( pipe_slow );
%}
-instruct cosXD_reg(regXD dst, eFlagsReg cr) %{
+instruct cosD_reg(regD dst, eFlagsReg cr) %{
predicate (UseSSE>=2);
match(Set dst (CosD dst));
- effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
+ effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
ins_cost(1800);
format %{ "DCOS $dst" %}
opcode(0xD9, 0xFF);
- ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
+ ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
ins_pipe( pipe_slow );
%}
-instruct tanD_reg(regDPR1 dst, regDPR1 src) %{
+instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1);
match(Set dst(TanD src));
format %{ "DTAN $dst" %}
@@ -10780,50 +10138,50 @@ instruct tanD_reg(regDPR1 dst, regDPR1 src) %{
ins_pipe( pipe_slow );
%}
-instruct tanXD_reg(regXD dst, eFlagsReg cr) %{
+instruct tanD_reg(regD dst, eFlagsReg cr) %{
predicate (UseSSE>=2);
match(Set dst(TanD dst));
- effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
+ effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
format %{ "DTAN $dst" %}
- ins_encode( Push_SrcXD(dst),
+ ins_encode( Push_SrcD(dst),
Opcode(0xD9), Opcode(0xF2), // fptan
Opcode(0xDD), Opcode(0xD8), // fstp st
- Push_ResultXD(dst) );
+ Push_ResultD(dst) );
ins_pipe( pipe_slow );
%}
-instruct atanD_reg(regD dst, regD src) %{
+instruct atanDPR_reg(regDPR dst, regDPR src) %{
predicate (UseSSE<=1);
match(Set dst(AtanD dst src));
format %{ "DATA $dst,$src" %}
opcode(0xD9, 0xF3);
- ins_encode( Push_Reg_D(src),
+ ins_encode( Push_Reg_DPR(src),
OpcP, OpcS, RegOpc(dst) );
ins_pipe( pipe_slow );
%}
-instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
+instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
predicate (UseSSE>=2);
match(Set dst(AtanD dst src));
- effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
+ effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
format %{ "DATA $dst,$src" %}
opcode(0xD9, 0xF3);
- ins_encode( Push_SrcXD(src),
- OpcP, OpcS, Push_ResultXD(dst) );
+ ins_encode( Push_SrcD(src),
+ OpcP, OpcS, Push_ResultD(dst) );
ins_pipe( pipe_slow );
%}
-instruct sqrtD_reg(regD dst, regD src) %{
+instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
predicate (UseSSE<=1);
match(Set dst (SqrtD src));
format %{ "DSQRT $dst,$src" %}
opcode(0xFA, 0xD9);
- ins_encode( Push_Reg_D(src),
- OpcS, OpcP, Pop_Reg_D(dst) );
+ ins_encode( Push_Reg_DPR(src),
+ OpcS, OpcP, Pop_Reg_DPR(dst) );
ins_pipe( pipe_slow );
%}
-instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
+instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
predicate (UseSSE<=1);
match(Set Y (PowD X Y)); // Raise X to the Yth power
effect(KILL rax, KILL rbx, KILL rcx);
@@ -10852,14 +10210,14 @@ instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
"ADD ESP,8"
%}
ins_encode( push_stack_temp_qword,
- Push_Reg_D(X),
+ Push_Reg_DPR(X),
Opcode(0xD9), Opcode(0xF1), // fyl2x
pow_exp_core_encoding,
pop_stack_temp_qword);
ins_pipe( pipe_slow );
%}
-instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
+instruct powD_reg(regD dst, regD src0, regD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
predicate (UseSSE>=2);
match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power
effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
@@ -10897,12 +10255,12 @@ instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax,
push_xmm_to_fpr1(src0),
Opcode(0xD9), Opcode(0xF1), // fyl2x
pow_exp_core_encoding,
- Push_ResultXD(dst) );
+ Push_ResultD(dst) );
ins_pipe( pipe_slow );
%}
-instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
+instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
predicate (UseSSE<=1);
match(Set dpr1 (ExpD dpr1));
effect(KILL rax, KILL rbx, KILL rcx);
@@ -10938,7 +10296,7 @@ instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
ins_pipe( pipe_slow );
%}
-instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
+instruct expD_reg(regD dst, regD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
predicate (UseSSE>=2);
match(Set dst (ExpD src));
effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
@@ -10969,17 +10327,17 @@ instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx,
"MOVSD $dst,[ESP]\n\t"
"ADD ESP,8"
%}
- ins_encode( Push_SrcXD(src),
+ ins_encode( Push_SrcD(src),
Opcode(0xD9), Opcode(0xEA), // fldl2e
Opcode(0xDE), Opcode(0xC9), // fmulp
pow_exp_core_encoding,
- Push_ResultXD(dst) );
+ Push_ResultD(dst) );
ins_pipe( pipe_slow );
%}
-instruct log10D_reg(regDPR1 dst, regDPR1 src) %{
+instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1);
// The source Double operand on FPU stack
match(Set dst (Log10D src));
@@ -10997,7 +10355,7 @@ instruct log10D_reg(regDPR1 dst, regDPR1 src) %{
ins_pipe( pipe_slow );
%}
-instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{
+instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
predicate (UseSSE>=2);
effect(KILL cr);
match(Set dst (Log10D src));
@@ -11007,14 +10365,14 @@ instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{
"FYL2X \t\t\t# Q=Log10*Log_2(x)"
%}
ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
- Push_SrcXD(src),
+ Push_SrcD(src),
Opcode(0xD9), Opcode(0xF1), // fyl2x
- Push_ResultXD(dst));
+ Push_ResultD(dst));
ins_pipe( pipe_slow );
%}
-instruct logD_reg(regDPR1 dst, regDPR1 src) %{
+instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1);
// The source Double operand on FPU stack
match(Set dst (LogD src));
@@ -11032,7 +10390,7 @@ instruct logD_reg(regDPR1 dst, regDPR1 src) %{
ins_pipe( pipe_slow );
%}
-instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
+instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{
predicate (UseSSE>=2);
effect(KILL cr);
// The source and result Double operands in XMM registers
@@ -11043,9 +10401,9 @@ instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
"FYL2X \t\t\t# Q=Log_e*Log_2(x)"
%}
ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2
- Push_SrcXD(src),
+ Push_SrcD(src),
Opcode(0xD9), Opcode(0xF1), // fyl2x
- Push_ResultXD(dst));
+ Push_ResultD(dst));
ins_pipe( pipe_slow );
%}
@@ -11066,7 +10424,7 @@ instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
// exit:
// P6 version of float compare, sets condition codes in EFLAGS
-instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
+instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
predicate(VM_Version::supports_cmov() && UseSSE == 0);
match(Set cr (CmpF src1 src2));
effect(KILL rax);
@@ -11078,27 +10436,27 @@ instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
"SAHF\n"
"exit:\tNOP // avoid branch to branch" %}
opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
- ins_encode( Push_Reg_D(src1),
+ ins_encode( Push_Reg_DPR(src1),
OpcP, RegOpc(src2),
cmpF_P6_fixup );
ins_pipe( pipe_slow );
%}
-instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{
+instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
predicate(VM_Version::supports_cmov() && UseSSE == 0);
match(Set cr (CmpF src1 src2));
ins_cost(100);
format %{ "FLD $src1\n\t"
"FUCOMIP ST,$src2 // P6 instruction" %}
opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
- ins_encode( Push_Reg_D(src1),
+ ins_encode( Push_Reg_DPR(src1),
OpcP, RegOpc(src2));
ins_pipe( pipe_slow );
%}
// Compare & branch
-instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
+instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
predicate(UseSSE == 0);
match(Set cr (CmpF src1 src2));
effect(KILL rax);
@@ -11111,328 +10469,190 @@ instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
"MOV AH,1\t# unordered treat as LT\n"
"flags:\tSAHF" %}
opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
- ins_encode( Push_Reg_D(src1),
+ ins_encode( Push_Reg_DPR(src1),
OpcP, RegOpc(src2),
fpu_flags);
ins_pipe( pipe_slow );
%}
// Compare vs zero into -1,0,1
-instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpFPR_0(eRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
predicate(UseSSE == 0);
match(Set dst (CmpF3 src1 zero));
effect(KILL cr, KILL rax);
ins_cost(280);
format %{ "FTSTF $dst,$src1" %}
opcode(0xE4, 0xD9);
- ins_encode( Push_Reg_D(src1),
+ ins_encode( Push_Reg_DPR(src1),
OpcS, OpcP, PopFPU,
CmpF_Result(dst));
ins_pipe( pipe_slow );
%}
// Compare into -1,0,1
-instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpFPR_reg(eRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
predicate(UseSSE == 0);
match(Set dst (CmpF3 src1 src2));
effect(KILL cr, KILL rax);
ins_cost(300);
format %{ "FCMPF $dst,$src1,$src2" %}
opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
- ins_encode( Push_Reg_D(src1),
+ ins_encode( Push_Reg_DPR(src1),
OpcP, RegOpc(src2),
CmpF_Result(dst));
ins_pipe( pipe_slow );
%}
// float compare and set condition codes in EFLAGS by XMM regs
-instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{
+instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
predicate(UseSSE>=1);
- match(Set cr (CmpF dst src));
- effect(KILL rax);
+ match(Set cr (CmpF src1 src2));
ins_cost(145);
- format %{ "COMISS $dst,$src\n"
- "\tJNP exit\n"
- "\tMOV ah,1 // saw a NaN, set CF\n"
- "\tSAHF\n"
- "exit:\tNOP // avoid branch to branch" %}
- opcode(0x0F, 0x2F);
- ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup);
+ format %{ "UCOMISS $src1,$src2\n\t"
+ "JNP,s exit\n\t"
+ "PUSHF\t# saw NaN, set CF\n\t"
+ "AND [rsp], #0xffffff2b\n\t"
+ "POPF\n"
+ "exit:" %}
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+ emit_cmpfp_fixup(_masm);
+ %}
ins_pipe( pipe_slow );
%}
-instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{
+instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
predicate(UseSSE>=1);
- match(Set cr (CmpF dst src));
+ match(Set cr (CmpF src1 src2));
ins_cost(100);
- format %{ "COMISS $dst,$src" %}
- opcode(0x0F, 0x2F);
- ins_encode(OpcP, OpcS, RegReg(dst, src));
+ format %{ "UCOMISS $src1,$src2" %}
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
// float compare and set condition codes in EFLAGS by XMM regs
-instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{
+instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
predicate(UseSSE>=1);
- match(Set cr (CmpF dst (LoadF src)));
- effect(KILL rax);
+ match(Set cr (CmpF src1 (LoadF src2)));
ins_cost(165);
- format %{ "COMISS $dst,$src\n"
- "\tJNP exit\n"
- "\tMOV ah,1 // saw a NaN, set CF\n"
- "\tSAHF\n"
- "exit:\tNOP // avoid branch to branch" %}
- opcode(0x0F, 0x2F);
- ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup);
+ format %{ "UCOMISS $src1,$src2\n\t"
+ "JNP,s exit\n\t"
+ "PUSHF\t# saw NaN, set CF\n\t"
+ "AND [rsp], #0xffffff2b\n\t"
+ "POPF\n"
+ "exit:" %}
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$Address);
+ emit_cmpfp_fixup(_masm);
+ %}
ins_pipe( pipe_slow );
%}
-instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{
+instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
predicate(UseSSE>=1);
- match(Set cr (CmpF dst (LoadF src)));
+ match(Set cr (CmpF src1 (LoadF src2)));
ins_cost(100);
- format %{ "COMISS $dst,$src" %}
- opcode(0x0F, 0x2F);
- ins_encode(OpcP, OpcS, RegMem(dst, src));
+ format %{ "UCOMISS $src1,$src2" %}
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$Address);
+ %}
ins_pipe( pipe_slow );
%}
// Compare into -1,0,1 in XMM
-instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{
+instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
predicate(UseSSE>=1);
match(Set dst (CmpF3 src1 src2));
effect(KILL cr);
ins_cost(255);
- format %{ "XOR $dst,$dst\n"
- "\tCOMISS $src1,$src2\n"
- "\tJP,s nan\n"
- "\tJEQ,s exit\n"
- "\tJA,s inc\n"
- "nan:\tDEC $dst\n"
- "\tJMP,s exit\n"
- "inc:\tINC $dst\n"
- "exit:"
- %}
- opcode(0x0F, 0x2F);
- ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst));
+ format %{ "UCOMISS $src1, $src2\n\t"
+ "MOV $dst, #-1\n\t"
+ "JP,s done\n\t"
+ "JB,s done\n\t"
+ "SETNE $dst\n\t"
+ "MOVZB $dst, $dst\n"
+ "done:" %}
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+ emit_cmpfp3(_masm, $dst$$Register);
+ %}
ins_pipe( pipe_slow );
%}
// Compare into -1,0,1 in XMM and memory
-instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{
+instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
predicate(UseSSE>=1);
- match(Set dst (CmpF3 src1 (LoadF mem)));
+ match(Set dst (CmpF3 src1 (LoadF src2)));
effect(KILL cr);
ins_cost(275);
- format %{ "COMISS $src1,$mem\n"
- "\tMOV $dst,0\t\t# do not blow flags\n"
- "\tJP,s nan\n"
- "\tJEQ,s exit\n"
- "\tJA,s inc\n"
- "nan:\tDEC $dst\n"
- "\tJMP,s exit\n"
- "inc:\tINC $dst\n"
- "exit:"
- %}
- opcode(0x0F, 0x2F);
- ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst));
+ format %{ "UCOMISS $src1, $src2\n\t"
+ "MOV $dst, #-1\n\t"
+ "JP,s done\n\t"
+ "JB,s done\n\t"
+ "SETNE $dst\n\t"
+ "MOVZB $dst, $dst\n"
+ "done:" %}
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$Address);
+ emit_cmpfp3(_masm, $dst$$Register);
+ %}
ins_pipe( pipe_slow );
%}
// Spill to obtain 24-bit precision
-instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{
+instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (SubF src1 src2));
format %{ "FSUB $dst,$src1 - $src2" %}
opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
- ins_encode( Push_Reg_F(src1),
- OpcReg_F(src2),
- Pop_Mem_F(dst) );
+ ins_encode( Push_Reg_FPR(src1),
+ OpcReg_FPR(src2),
+ Pop_Mem_FPR(dst) );
ins_pipe( fpu_mem_reg_reg );
%}
//
// This instruction does not round to 24-bits
-instruct subF_reg(regF dst, regF src) %{
+instruct subFPR_reg(regFPR dst, regFPR src) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (SubF dst src));
format %{ "FSUB $dst,$src" %}
opcode(0xDE, 0x5); /* DE E8+i or DE /5 */
- ins_encode( Push_Reg_F(src),
+ ins_encode( Push_Reg_FPR(src),
OpcP, RegOpc(dst) );
ins_pipe( fpu_reg_reg );
%}
// Spill to obtain 24-bit precision
-instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{
+instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (AddF src1 src2));
format %{ "FADD $dst,$src1,$src2" %}
opcode(0xD8, 0x0); /* D8 C0+i */
- ins_encode( Push_Reg_F(src2),
- OpcReg_F(src1),
- Pop_Mem_F(dst) );
+ ins_encode( Push_Reg_FPR(src2),
+ OpcReg_FPR(src1),
+ Pop_Mem_FPR(dst) );
ins_pipe( fpu_mem_reg_reg );
%}
//
// This instruction does not round to 24-bits
-instruct addF_reg(regF dst, regF src) %{
+instruct addFPR_reg(regFPR dst, regFPR src) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (AddF dst src));
format %{ "FLD $src\n\t"
"FADDp $dst,ST" %}
opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
- ins_encode( Push_Reg_F(src),
+ ins_encode( Push_Reg_FPR(src),
OpcP, RegOpc(dst) );
ins_pipe( fpu_reg_reg );
%}
-// Add two single precision floating point values in xmm
-instruct addX_reg(regX dst, regX src) %{
- predicate(UseSSE>=1);
- match(Set dst (AddF dst src));
- format %{ "ADDSS $dst,$src" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
- ins_pipe( pipe_slow );
-%}
-
-instruct addX_imm(regX dst, immXF con) %{
- predicate(UseSSE>=1);
- match(Set dst (AddF dst con));
- format %{ "ADDSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
- ins_encode %{
- __ addss($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct addX_mem(regX dst, memory mem) %{
- predicate(UseSSE>=1);
- match(Set dst (AddF dst (LoadF mem)));
- format %{ "ADDSS $dst,$mem" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem));
- ins_pipe( pipe_slow );
-%}
-
-// Subtract two single precision floating point values in xmm
-instruct subX_reg(regX dst, regX src) %{
- predicate(UseSSE>=1);
- match(Set dst (SubF dst src));
- format %{ "SUBSS $dst,$src" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
- ins_pipe( pipe_slow );
-%}
-
-instruct subX_imm(regX dst, immXF con) %{
- predicate(UseSSE>=1);
- match(Set dst (SubF dst con));
- format %{ "SUBSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
- ins_encode %{
- __ subss($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct subX_mem(regX dst, memory mem) %{
- predicate(UseSSE>=1);
- match(Set dst (SubF dst (LoadF mem)));
- format %{ "SUBSS $dst,$mem" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
- ins_pipe( pipe_slow );
-%}
-
-// Multiply two single precision floating point values in xmm
-instruct mulX_reg(regX dst, regX src) %{
- predicate(UseSSE>=1);
- match(Set dst (MulF dst src));
- format %{ "MULSS $dst,$src" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
- ins_pipe( pipe_slow );
-%}
-
-instruct mulX_imm(regX dst, immXF con) %{
- predicate(UseSSE>=1);
- match(Set dst (MulF dst con));
- format %{ "MULSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
- ins_encode %{
- __ mulss($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct mulX_mem(regX dst, memory mem) %{
- predicate(UseSSE>=1);
- match(Set dst (MulF dst (LoadF mem)));
- format %{ "MULSS $dst,$mem" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
- ins_pipe( pipe_slow );
-%}
-
-// Divide two single precision floating point values in xmm
-instruct divX_reg(regX dst, regX src) %{
- predicate(UseSSE>=1);
- match(Set dst (DivF dst src));
- format %{ "DIVSS $dst,$src" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
- ins_pipe( pipe_slow );
-%}
-
-instruct divX_imm(regX dst, immXF con) %{
- predicate(UseSSE>=1);
- match(Set dst (DivF dst con));
- format %{ "DIVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %}
- ins_encode %{
- __ divss($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct divX_mem(regX dst, memory mem) %{
- predicate(UseSSE>=1);
- match(Set dst (DivF dst (LoadF mem)));
- format %{ "DIVSS $dst,$mem" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
- ins_pipe( pipe_slow );
-%}
-
-// Get the square root of a single precision floating point values in xmm
-instruct sqrtX_reg(regX dst, regX src) %{
- predicate(UseSSE>=1);
- match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
- format %{ "SQRTSS $dst,$src" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
- ins_pipe( pipe_slow );
-%}
-
-instruct sqrtX_mem(regX dst, memory mem) %{
- predicate(UseSSE>=1);
- match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
- format %{ "SQRTSS $dst,$mem" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
- ins_pipe( pipe_slow );
-%}
-
-// Get the square root of a double precision floating point values in xmm
-instruct sqrtXD_reg(regXD dst, regXD src) %{
- predicate(UseSSE>=2);
- match(Set dst (SqrtD src));
- format %{ "SQRTSD $dst,$src" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
- ins_pipe( pipe_slow );
-%}
-
-instruct sqrtXD_mem(regXD dst, memory mem) %{
- predicate(UseSSE>=2);
- match(Set dst (SqrtD (LoadD mem)));
- format %{ "SQRTSD $dst,$mem" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
- ins_pipe( pipe_slow );
-%}
-
-instruct absF_reg(regFPR1 dst, regFPR1 src) %{
+instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
predicate(UseSSE==0);
match(Set dst (AbsF src));
ins_cost(100);
@@ -11442,15 +10662,7 @@ instruct absF_reg(regFPR1 dst, regFPR1 src) %{
ins_pipe( fpu_reg_reg );
%}
-instruct absX_reg(regX dst ) %{
- predicate(UseSSE>=1);
- match(Set dst (AbsF dst));
- format %{ "ANDPS $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
- ins_encode( AbsXF_encoding(dst));
- ins_pipe( pipe_slow );
-%}
-
-instruct negF_reg(regFPR1 dst, regFPR1 src) %{
+instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
predicate(UseSSE==0);
match(Set dst (NegF src));
ins_cost(100);
@@ -11460,17 +10672,9 @@ instruct negF_reg(regFPR1 dst, regFPR1 src) %{
ins_pipe( fpu_reg_reg );
%}
-instruct negX_reg( regX dst ) %{
- predicate(UseSSE>=1);
- match(Set dst (NegF dst));
- format %{ "XORPS $dst,[0x80000000]\t# CHS F by sign flipping" %}
- ins_encode( NegXF_encoding(dst));
- ins_pipe( pipe_slow );
-%}
-
-// Cisc-alternate to addF_reg
+// Cisc-alternate to addFPR_reg
// Spill to obtain 24-bit precision
-instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
+instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (AddF src1 (LoadF src2)));
@@ -11479,14 +10683,14 @@ instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
"FSTP_S $dst" %}
opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
- OpcReg_F(src1),
- Pop_Mem_F(dst) );
+ OpcReg_FPR(src1),
+ Pop_Mem_FPR(dst) );
ins_pipe( fpu_mem_reg_mem );
%}
//
-// Cisc-alternate to addF_reg
+// Cisc-alternate to addFPR_reg
// This instruction does not round to 24-bits
-instruct addF_reg_mem(regF dst, memory src) %{
+instruct addFPR_reg_mem(regFPR dst, memory src) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (AddF dst (LoadF src)));
@@ -11499,21 +10703,21 @@ instruct addF_reg_mem(regF dst, memory src) %{
// // Following two instructions for _222_mpegaudio
// Spill to obtain 24-bit precision
-instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{
+instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (AddF src1 src2));
format %{ "FADD $dst,$src1,$src2" %}
opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */
ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
- OpcReg_F(src2),
- Pop_Mem_F(dst) );
+ OpcReg_FPR(src2),
+ Pop_Mem_FPR(dst) );
ins_pipe( fpu_mem_reg_mem );
%}
// Cisc-spill variant
// Spill to obtain 24-bit precision
-instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
+instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (AddF src1 (LoadF src2)));
@@ -11522,12 +10726,12 @@ instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
set_instruction_start,
OpcP, RMopc_Mem(secondary,src1),
- Pop_Mem_F(dst) );
+ Pop_Mem_FPR(dst) );
ins_pipe( fpu_mem_mem_mem );
%}
// Spill to obtain 24-bit precision
-instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
+instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (AddF src1 src2));
@@ -11536,13 +10740,13 @@ instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
set_instruction_start,
OpcP, RMopc_Mem(secondary,src1),
- Pop_Mem_F(dst) );
+ Pop_Mem_FPR(dst) );
ins_pipe( fpu_mem_mem_mem );
%}
// Spill to obtain 24-bit precision
-instruct addF24_reg_imm(stackSlotF dst, regF src, immF con) %{
+instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (AddF src con));
format %{ "FLD $src\n\t"
@@ -11557,7 +10761,7 @@ instruct addF24_reg_imm(stackSlotF dst, regF src, immF con) %{
%}
//
// This instruction does not round to 24-bits
-instruct addF_reg_imm(regF dst, regF src, immF con) %{
+instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (AddF src con));
format %{ "FLD $src\n\t"
@@ -11572,7 +10776,7 @@ instruct addF_reg_imm(regF dst, regF src, immF con) %{
%}
// Spill to obtain 24-bit precision
-instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{
+instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (MulF src1 src2));
@@ -11580,14 +10784,14 @@ instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{
"FMUL $src2\n\t"
"FSTP_S $dst" %}
opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
- ins_encode( Push_Reg_F(src1),
- OpcReg_F(src2),
- Pop_Mem_F(dst) );
+ ins_encode( Push_Reg_FPR(src1),
+ OpcReg_FPR(src2),
+ Pop_Mem_FPR(dst) );
ins_pipe( fpu_mem_reg_reg );
%}
//
// This instruction does not round to 24-bits
-instruct mulF_reg(regF dst, regF src1, regF src2) %{
+instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (MulF src1 src2));
@@ -11595,16 +10799,16 @@ instruct mulF_reg(regF dst, regF src1, regF src2) %{
"FMUL $src2\n\t"
"FSTP_S $dst" %}
opcode(0xD8, 0x1); /* D8 C8+i */
- ins_encode( Push_Reg_F(src2),
- OpcReg_F(src1),
- Pop_Reg_F(dst) );
+ ins_encode( Push_Reg_FPR(src2),
+ OpcReg_FPR(src1),
+ Pop_Reg_FPR(dst) );
ins_pipe( fpu_reg_reg_reg );
%}
// Spill to obtain 24-bit precision
// Cisc-alternate to reg-reg multiply
-instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
+instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (MulF src1 (LoadF src2)));
@@ -11613,27 +10817,27 @@ instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
"FSTP_S $dst" %}
opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */
ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
- OpcReg_F(src1),
- Pop_Mem_F(dst) );
+ OpcReg_FPR(src1),
+ Pop_Mem_FPR(dst) );
ins_pipe( fpu_mem_reg_mem );
%}
//
// This instruction does not round to 24-bits
// Cisc-alternate to reg-reg multiply
-instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
+instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (MulF src1 (LoadF src2)));
format %{ "FMUL $dst,$src1,$src2" %}
opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */
ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
- OpcReg_F(src1),
- Pop_Reg_F(dst) );
+ OpcReg_FPR(src1),
+ Pop_Reg_FPR(dst) );
ins_pipe( fpu_reg_reg_mem );
%}
// Spill to obtain 24-bit precision
-instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
+instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (MulF src1 src2));
@@ -11642,12 +10846,12 @@ instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
set_instruction_start,
OpcP, RMopc_Mem(secondary,src1),
- Pop_Mem_F(dst) );
+ Pop_Mem_FPR(dst) );
ins_pipe( fpu_mem_mem_mem );
%}
// Spill to obtain 24-bit precision
-instruct mulF24_reg_imm(stackSlotF dst, regF src, immF con) %{
+instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (MulF src con));
@@ -11663,7 +10867,7 @@ instruct mulF24_reg_imm(stackSlotF dst, regF src, immF con) %{
%}
//
// This instruction does not round to 24-bits
-instruct mulF_reg_imm(regF dst, regF src, immF con) %{
+instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (MulF src con));
@@ -11680,9 +10884,9 @@ instruct mulF_reg_imm(regF dst, regF src, immF con) %{
//
-// MACRO1 -- subsume unshared load into mulF
+// MACRO1 -- subsume unshared load into mulFPR
// This instruction does not round to 24-bits
-instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{
+instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (MulF (LoadF mem1) src));
@@ -11691,36 +10895,36 @@ instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{
"FSTP $dst" %}
opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */
ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
- OpcReg_F(src),
- Pop_Reg_F(dst) );
+ OpcReg_FPR(src),
+ Pop_Reg_FPR(dst) );
ins_pipe( fpu_reg_reg_mem );
%}
//
-// MACRO2 -- addF a mulF which subsumed an unshared load
+// MACRO2 -- addFPR a mulFPR which subsumed an unshared load
// This instruction does not round to 24-bits
-instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{
+instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
ins_cost(95);
format %{ "FLD $mem1 ===MACRO2===\n\t"
- "FMUL ST,$src1 subsume mulF left load\n\t"
+ "FMUL ST,$src1 subsume mulFPR left load\n\t"
"FADD ST,$src2\n\t"
"FSTP $dst" %}
opcode(0xD9); /* LoadF D9 /0 */
ins_encode( OpcP, RMopc_Mem(0x00,mem1),
FMul_ST_reg(src1),
FAdd_ST_reg(src2),
- Pop_Reg_F(dst) );
+ Pop_Reg_FPR(dst) );
ins_pipe( fpu_reg_mem_reg_reg );
%}
-// MACRO3 -- addF a mulF
+// MACRO3 -- addFPR a mulFPR
// This instruction does not round to 24-bits. It is a '2-address'
// instruction in that the result goes back to src2. This eliminates
// a move from the macro; possibly the register allocator will have
// to add it back (and maybe not).
-instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{
+instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set src2 (AddF (MulF src0 src1) src2));
@@ -11728,15 +10932,15 @@ instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{
"FMUL ST,$src1\n\t"
"FADDP $src2,ST" %}
opcode(0xD9); /* LoadF D9 /0 */
- ins_encode( Push_Reg_F(src0),
+ ins_encode( Push_Reg_FPR(src0),
FMul_ST_reg(src1),
FAddP_reg_ST(src2) );
ins_pipe( fpu_reg_reg_reg );
%}
-// MACRO4 -- divF subF
+// MACRO4 -- divFPR subFPR
// This instruction does not round to 24-bits
-instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{
+instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (DivF (SubF src2 src1) src3));
@@ -11745,67 +10949,67 @@ instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{
"FDIV ST,$src3\n\t"
"FSTP $dst" %}
opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
- ins_encode( Push_Reg_F(src2),
- subF_divF_encode(src1,src3),
- Pop_Reg_F(dst) );
+ ins_encode( Push_Reg_FPR(src2),
+ subFPR_divFPR_encode(src1,src3),
+ Pop_Reg_FPR(dst) );
ins_pipe( fpu_reg_reg_reg_reg );
%}
// Spill to obtain 24-bit precision
-instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{
+instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (DivF src1 src2));
format %{ "FDIV $dst,$src1,$src2" %}
opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
- ins_encode( Push_Reg_F(src1),
- OpcReg_F(src2),
- Pop_Mem_F(dst) );
+ ins_encode( Push_Reg_FPR(src1),
+ OpcReg_FPR(src2),
+ Pop_Mem_FPR(dst) );
ins_pipe( fpu_mem_reg_reg );
%}
//
// This instruction does not round to 24-bits
-instruct divF_reg(regF dst, regF src) %{
+instruct divFPR_reg(regFPR dst, regFPR src) %{
predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (DivF dst src));
format %{ "FDIV $dst,$src" %}
opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
- ins_encode( Push_Reg_F(src),
+ ins_encode( Push_Reg_FPR(src),
OpcP, RegOpc(dst) );
ins_pipe( fpu_reg_reg );
%}
// Spill to obtain 24-bit precision
-instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
+instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (ModF src1 src2));
- effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
+ effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
format %{ "FMOD $dst,$src1,$src2" %}
- ins_encode( Push_Reg_Mod_D(src1, src2),
- emitModD(),
- Push_Result_Mod_D(src2),
- Pop_Mem_F(dst));
+ ins_encode( Push_Reg_Mod_DPR(src1, src2),
+ emitModDPR(),
+ Push_Result_Mod_DPR(src2),
+ Pop_Mem_FPR(dst));
ins_pipe( pipe_slow );
%}
//
// This instruction does not round to 24-bits
-instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{
+instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (ModF dst src));
- effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
+ effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
format %{ "FMOD $dst,$src" %}
- ins_encode(Push_Reg_Mod_D(dst, src),
- emitModD(),
- Push_Result_Mod_D(src),
- Pop_Reg_F(dst));
+ ins_encode(Push_Reg_Mod_DPR(dst, src),
+ emitModDPR(),
+ Push_Result_Mod_DPR(src),
+ Pop_Reg_FPR(dst));
ins_pipe( pipe_slow );
%}
-instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{
+instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
predicate(UseSSE>=1);
match(Set dst (ModF src0 src1));
effect(KILL rax, KILL cr);
@@ -11825,7 +11029,7 @@ instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{
"\tFSTP ST0\t # Restore FPU Stack"
%}
ins_cost(250);
- ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU);
+ ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
ins_pipe( pipe_slow );
%}
@@ -11833,26 +11037,26 @@ instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{
//----------Arithmetic Conversion Instructions---------------------------------
// The conversions operations are all Alpha sorted. Please keep it that way!
-instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{
+instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
predicate(UseSSE==0);
match(Set dst (RoundFloat src));
ins_cost(125);
format %{ "FST_S $dst,$src\t# F-round" %}
- ins_encode( Pop_Mem_Reg_F(dst, src) );
+ ins_encode( Pop_Mem_Reg_FPR(dst, src) );
ins_pipe( fpu_mem_reg );
%}
-instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{
+instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
predicate(UseSSE<=1);
match(Set dst (RoundDouble src));
ins_cost(125);
format %{ "FST_D $dst,$src\t# D-round" %}
- ins_encode( Pop_Mem_Reg_D(dst, src) );
+ ins_encode( Pop_Mem_Reg_DPR(dst, src) );
ins_pipe( fpu_mem_reg );
%}
// Force rounding to 24-bit precision and 6-bit exponent
-instruct convD2F_reg(stackSlotF dst, regD src) %{
+instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
predicate(UseSSE==0);
match(Set dst (ConvD2F src));
format %{ "FST_S $dst,$src\t# F-round" %}
@@ -11862,7 +11066,7 @@ instruct convD2F_reg(stackSlotF dst, regD src) %{
%}
// Force rounding to 24-bit precision and 6-bit exponent
-instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
+instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
predicate(UseSSE==1);
match(Set dst (ConvD2F src));
effect( KILL cr );
@@ -11870,29 +11074,40 @@ instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
"FST_S [ESP],$src\t# F-round\n\t"
"MOVSS $dst,[ESP]\n\t"
"ADD ESP,4" %}
- ins_encode( D2X_encoding(dst, src) );
+ ins_encode %{
+ __ subptr(rsp, 4);
+ if ($src$$reg != FPR1L_enc) {
+ __ fld_s($src$$reg-1);
+ __ fstp_s(Address(rsp, 0));
+ } else {
+ __ fst_s(Address(rsp, 0));
+ }
+ __ movflt($dst$$XMMRegister, Address(rsp, 0));
+ __ addptr(rsp, 4);
+ %}
ins_pipe( pipe_slow );
%}
// Force rounding double precision to single precision
-instruct convXD2X_reg(regX dst, regXD src) %{
+instruct convD2F_reg(regF dst, regD src) %{
predicate(UseSSE>=2);
match(Set dst (ConvD2F src));
format %{ "CVTSD2SS $dst,$src\t# F-round" %}
- opcode(0xF2, 0x0F, 0x5A);
- ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
+ ins_encode %{
+ __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
-instruct convF2D_reg_reg(regD dst, regF src) %{
+instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
predicate(UseSSE==0);
match(Set dst (ConvF2D src));
format %{ "FST_S $dst,$src\t# D-round" %}
- ins_encode( Pop_Reg_Reg_D(dst, src));
+ ins_encode( Pop_Reg_Reg_DPR(dst, src));
ins_pipe( fpu_reg_reg );
%}
-instruct convF2D_reg(stackSlotD dst, regF src) %{
+instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
predicate(UseSSE==1);
match(Set dst (ConvF2D src));
format %{ "FST_D $dst,$src\t# D-round" %}
@@ -11901,7 +11116,7 @@ instruct convF2D_reg(stackSlotD dst, regF src) %{
%}
%}
-instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
+instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
predicate(UseSSE==1);
match(Set dst (ConvF2D src));
effect( KILL cr );
@@ -11910,21 +11125,28 @@ instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
"FLD_S [ESP]\n\t"
"ADD ESP,4\n\t"
"FSTP $dst\t# D-round" %}
- ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst));
+ ins_encode %{
+ __ subptr(rsp, 4);
+ __ movflt(Address(rsp, 0), $src$$XMMRegister);
+ __ fld_s(Address(rsp, 0));
+ __ addptr(rsp, 4);
+ __ fstp_d($dst$$reg);
+ %}
ins_pipe( pipe_slow );
%}
-instruct convX2XD_reg(regXD dst, regX src) %{
+instruct convF2D_reg(regD dst, regF src) %{
predicate(UseSSE>=2);
match(Set dst (ConvF2D src));
format %{ "CVTSS2SD $dst,$src\t# D-round" %}
- opcode(0xF3, 0x0F, 0x5A);
- ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
+ ins_encode %{
+ __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
// Convert a double to an int. If the double is a NAN, stuff a zero in instead.
-instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
+instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
predicate(UseSSE<=1);
match(Set dst (ConvD2I src));
effect( KILL tmp, KILL cr );
@@ -11939,12 +11161,12 @@ instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
"FLD_D $src\n\t"
"CALL d2i_wrapper\n"
"fast:" %}
- ins_encode( Push_Reg_D(src), D2I_encoding(src) );
+ ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
ins_pipe( pipe_slow );
%}
// Convert a double to an int. If the double is a NAN, stuff a zero in instead.
-instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{
+instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
predicate(UseSSE>=2);
match(Set dst (ConvD2I src));
effect( KILL tmp, KILL cr );
@@ -11957,12 +11179,22 @@ instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %
"ADD ESP, 8\n\t"
"CALL d2i_wrapper\n"
"fast:" %}
- opcode(0x1); // double-precision conversion
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
+ ins_encode %{
+ Label fast;
+ __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
+ __ cmpl($dst$$Register, 0x80000000);
+ __ jccb(Assembler::notEqual, fast);
+ __ subptr(rsp, 8);
+ __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+ __ fld_d(Address(rsp, 0));
+ __ addptr(rsp, 8);
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
+ __ bind(fast);
+ %}
ins_pipe( pipe_slow );
%}
-instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
+instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
predicate(UseSSE<=1);
match(Set dst (ConvD2L src));
effect( KILL cr );
@@ -11980,12 +11212,12 @@ instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
"FLD $src\n\t"
"CALL d2l_wrapper\n"
"fast:" %}
- ins_encode( Push_Reg_D(src), D2L_encoding(src) );
+ ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) );
ins_pipe( pipe_slow );
%}
// XMM lacks a float/double->long conversion, so use the old FPU stack.
-instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
+instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
predicate (UseSSE>=2);
match(Set dst (ConvD2L src));
effect( KILL cr );
@@ -12004,9 +11236,36 @@ instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
"SUB ESP,8\n\t"
"MOVSD [ESP],$src\n\t"
"FLD_D [ESP]\n\t"
+ "ADD ESP,8\n\t"
"CALL d2l_wrapper\n"
"fast:" %}
- ins_encode( XD2L_encoding(src) );
+ ins_encode %{
+ Label fast;
+ __ subptr(rsp, 8);
+ __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+ __ fld_d(Address(rsp, 0));
+ __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
+ __ fistp_d(Address(rsp, 0));
+ // Restore the rounding mode, mask the exception
+ if (Compile::current()->in_24_bit_fp_mode()) {
+ __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
+ } else {
+ __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
+ }
+ // Load the converted long, adjust CPU stack
+ __ pop(rax);
+ __ pop(rdx);
+ __ cmpl(rdx, 0x80000000);
+ __ jccb(Assembler::notEqual, fast);
+ __ testl(rax, rax);
+ __ jccb(Assembler::notEqual, fast);
+ __ subptr(rsp, 8);
+ __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+ __ fld_d(Address(rsp, 0));
+ __ addptr(rsp, 8);
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
+ __ bind(fast);
+ %}
ins_pipe( pipe_slow );
%}
@@ -12016,7 +11275,7 @@ instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
// rounding mode to 'nearest'. The hardware stores a flag value down
// if we would overflow or converted a NAN; we check for this and
// and go the slow path if needed.
-instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
+instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
predicate(UseSSE==0);
match(Set dst (ConvF2I src));
effect( KILL tmp, KILL cr );
@@ -12031,13 +11290,13 @@ instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
"FLD $src\n\t"
"CALL d2i_wrapper\n"
"fast:" %}
- // D2I_encoding works for F2I
- ins_encode( Push_Reg_F(src), D2I_encoding(src) );
+ // DPR2I_encoding works for FPR2I
+ ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
ins_pipe( pipe_slow );
%}
// Convert a float in xmm to an int reg.
-instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
+instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
predicate(UseSSE>=1);
match(Set dst (ConvF2I src));
effect( KILL tmp, KILL cr );
@@ -12050,12 +11309,22 @@ instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
"ADD ESP, 4\n\t"
"CALL d2i_wrapper\n"
"fast:" %}
- opcode(0x0); // single-precision conversion
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
+ ins_encode %{
+ Label fast;
+ __ cvttss2sil($dst$$Register, $src$$XMMRegister);
+ __ cmpl($dst$$Register, 0x80000000);
+ __ jccb(Assembler::notEqual, fast);
+ __ subptr(rsp, 4);
+ __ movflt(Address(rsp, 0), $src$$XMMRegister);
+ __ fld_s(Address(rsp, 0));
+ __ addptr(rsp, 4);
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
+ __ bind(fast);
+ %}
ins_pipe( pipe_slow );
%}
-instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
+instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
predicate(UseSSE==0);
match(Set dst (ConvF2L src));
effect( KILL cr );
@@ -12073,13 +11342,13 @@ instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
"FLD $src\n\t"
"CALL d2l_wrapper\n"
"fast:" %}
- // D2L_encoding works for F2L
- ins_encode( Push_Reg_F(src), D2L_encoding(src) );
+ // DPR2L_encoding works for FPR2L
+ ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
ins_pipe( pipe_slow );
%}
// XMM lacks a float/double->long conversion, so use the old FPU stack.
-instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{
+instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
predicate (UseSSE>=1);
match(Set dst (ConvF2L src));
effect( KILL cr );
@@ -12101,39 +11370,67 @@ instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{
"ADD ESP,4\n\t"
"CALL d2l_wrapper\n"
"fast:" %}
- ins_encode( X2L_encoding(src) );
+ ins_encode %{
+ Label fast;
+ __ subptr(rsp, 8);
+ __ movflt(Address(rsp, 0), $src$$XMMRegister);
+ __ fld_s(Address(rsp, 0));
+ __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
+ __ fistp_d(Address(rsp, 0));
+ // Restore the rounding mode, mask the exception
+ if (Compile::current()->in_24_bit_fp_mode()) {
+ __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
+ } else {
+ __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
+ }
+ // Load the converted long, adjust CPU stack
+ __ pop(rax);
+ __ pop(rdx);
+ __ cmpl(rdx, 0x80000000);
+ __ jccb(Assembler::notEqual, fast);
+ __ testl(rax, rax);
+ __ jccb(Assembler::notEqual, fast);
+ __ subptr(rsp, 4);
+ __ movflt(Address(rsp, 0), $src$$XMMRegister);
+ __ fld_s(Address(rsp, 0));
+ __ addptr(rsp, 4);
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
+ __ bind(fast);
+ %}
ins_pipe( pipe_slow );
%}
-instruct convI2D_reg(regD dst, stackSlotI src) %{
+instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
predicate( UseSSE<=1 );
match(Set dst (ConvI2D src));
format %{ "FILD $src\n\t"
"FSTP $dst" %}
opcode(0xDB, 0x0); /* DB /0 */
- ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
+ ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
ins_pipe( fpu_reg_mem );
%}
-instruct convI2XD_reg(regXD dst, eRegI src) %{
+instruct convI2D_reg(regD dst, eRegI src) %{
predicate( UseSSE>=2 && !UseXmmI2D );
match(Set dst (ConvI2D src));
format %{ "CVTSI2SD $dst,$src" %}
- opcode(0xF2, 0x0F, 0x2A);
- ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
+ ins_encode %{
+ __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
+ %}
ins_pipe( pipe_slow );
%}
-instruct convI2XD_mem(regXD dst, memory mem) %{
+instruct convI2D_mem(regD dst, memory mem) %{
predicate( UseSSE>=2 );
match(Set dst (ConvI2D (LoadI mem)));
format %{ "CVTSI2SD $dst,$mem" %}
- opcode(0xF2, 0x0F, 0x2A);
- ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem));
+ ins_encode %{
+ __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
-instruct convXI2XD_reg(regXD dst, eRegI src)
+instruct convXI2D_reg(regD dst, eRegI src)
%{
predicate( UseSSE>=2 && UseXmmI2D );
match(Set dst (ConvI2D src));
@@ -12147,31 +11444,31 @@ instruct convXI2XD_reg(regXD dst, eRegI src)
ins_pipe(pipe_slow); // XXX
%}
-instruct convI2D_mem(regD dst, memory mem) %{
+instruct convI2DPR_mem(regDPR dst, memory mem) %{
predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
match(Set dst (ConvI2D (LoadI mem)));
format %{ "FILD $mem\n\t"
"FSTP $dst" %}
opcode(0xDB); /* DB /0 */
ins_encode( OpcP, RMopc_Mem(0x00,mem),
- Pop_Reg_D(dst));
+ Pop_Reg_DPR(dst));
ins_pipe( fpu_reg_mem );
%}
// Convert a byte to a float; no rounding step needed.
-instruct conv24I2F_reg(regF dst, stackSlotI src) %{
+instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
match(Set dst (ConvI2F src));
format %{ "FILD $src\n\t"
"FSTP $dst" %}
opcode(0xDB, 0x0); /* DB /0 */
- ins_encode(Push_Mem_I(src), Pop_Reg_F(dst));
+ ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
ins_pipe( fpu_reg_mem );
%}
// In 24-bit mode, force exponent rounding by storing back out
-instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{
+instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (ConvI2F src));
ins_cost(200);
@@ -12179,12 +11476,12 @@ instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{
"FSTP_S $dst" %}
opcode(0xDB, 0x0); /* DB /0 */
ins_encode( Push_Mem_I(src),
- Pop_Mem_F(dst));
+ Pop_Mem_FPR(dst));
ins_pipe( fpu_mem_mem );
%}
// In 24-bit mode, force exponent rounding by storing back out
-instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{
+instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
match(Set dst (ConvI2F (LoadI mem)));
ins_cost(200);
@@ -12192,46 +11489,46 @@ instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{
"FSTP_S $dst" %}
opcode(0xDB); /* DB /0 */
ins_encode( OpcP, RMopc_Mem(0x00,mem),
- Pop_Mem_F(dst));
+ Pop_Mem_FPR(dst));
ins_pipe( fpu_mem_mem );
%}
// This instruction does not round to 24-bits
-instruct convI2F_reg(regF dst, stackSlotI src) %{
+instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (ConvI2F src));
format %{ "FILD $src\n\t"
"FSTP $dst" %}
opcode(0xDB, 0x0); /* DB /0 */
ins_encode( Push_Mem_I(src),
- Pop_Reg_F(dst));
+ Pop_Reg_FPR(dst));
ins_pipe( fpu_reg_mem );
%}
// This instruction does not round to 24-bits
-instruct convI2F_mem(regF dst, memory mem) %{
+instruct convI2FPR_mem(regFPR dst, memory mem) %{
predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
match(Set dst (ConvI2F (LoadI mem)));
format %{ "FILD $mem\n\t"
"FSTP $dst" %}
opcode(0xDB); /* DB /0 */
ins_encode( OpcP, RMopc_Mem(0x00,mem),
- Pop_Reg_F(dst));
+ Pop_Reg_FPR(dst));
ins_pipe( fpu_reg_mem );
%}
// Convert an int to a float in xmm; no rounding step needed.
-instruct convI2X_reg(regX dst, eRegI src) %{
+instruct convI2F_reg(regF dst, eRegI src) %{
predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
match(Set dst (ConvI2F src));
format %{ "CVTSI2SS $dst, $src" %}
-
- opcode(0xF3, 0x0F, 0x2A); /* F3 0F 2A /r */
- ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
+ ins_encode %{
+ __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
+ %}
ins_pipe( pipe_slow );
%}
- instruct convXI2X_reg(regX dst, eRegI src)
+ instruct convXI2F_reg(regF dst, eRegI src)
%{
predicate( UseSSE>=2 && UseXmmI2F );
match(Set dst (ConvI2F src));
@@ -12280,7 +11577,7 @@ instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
ins_pipe( ialu_reg_reg_long );
%}
-instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
+instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
predicate (UseSSE<=1);
match(Set dst (ConvL2D src));
effect( KILL cr );
@@ -12290,11 +11587,11 @@ instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
"ADD ESP,8\n\t"
"FSTP_D $dst\t# D-round" %}
opcode(0xDF, 0x5); /* DF /5 */
- ins_encode(convert_long_double(src), Pop_Mem_D(dst));
+ ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
ins_pipe( pipe_slow );
%}
-instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{
+instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
predicate (UseSSE>=2);
match(Set dst (ConvL2D src));
effect( KILL cr );
@@ -12305,11 +11602,11 @@ instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{
"MOVSD $dst,[ESP]\n\t"
"ADD ESP,8" %}
opcode(0xDF, 0x5); /* DF /5 */
- ins_encode(convert_long_double2(src), Push_ResultXD(dst));
+ ins_encode(convert_long_double2(src), Push_ResultD(dst));
ins_pipe( pipe_slow );
%}
-instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{
+instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
predicate (UseSSE>=1);
match(Set dst (ConvL2F src));
effect( KILL cr );
@@ -12320,11 +11617,11 @@ instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{
"MOVSS $dst,[ESP]\n\t"
"ADD ESP,8" %}
opcode(0xDF, 0x5); /* DF /5 */
- ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8));
+ ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
ins_pipe( pipe_slow );
%}
-instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
+instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
match(Set dst (ConvL2F src));
effect( KILL cr );
format %{ "PUSH $src.hi\t# Convert long to single float\n\t"
@@ -12333,7 +11630,7 @@ instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
"ADD ESP,8\n\t"
"FSTP_S $dst\t# F-round" %}
opcode(0xDF, 0x5); /* DF /5 */
- ins_encode(convert_long_double(src), Pop_Mem_F(dst));
+ ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
ins_pipe( pipe_slow );
%}
@@ -12351,40 +11648,45 @@ instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
effect( DEF dst, USE src );
ins_cost(100);
format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %}
- opcode(0x8B);
- ins_encode( OpcP, RegMem(dst,src));
+ ins_encode %{
+ __ movl($dst$$Register, Address(rsp, $src$$disp));
+ %}
ins_pipe( ialu_reg_mem );
%}
-instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
+instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
predicate(UseSSE==0);
match(Set dst (MoveF2I src));
effect( DEF dst, USE src );
ins_cost(125);
format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %}
- ins_encode( Pop_Mem_Reg_F(dst, src) );
+ ins_encode( Pop_Mem_Reg_FPR(dst, src) );
ins_pipe( fpu_mem_reg );
%}
-instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
+instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
predicate(UseSSE>=1);
match(Set dst (MoveF2I src));
effect( DEF dst, USE src );
ins_cost(95);
format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst));
+ ins_encode %{
+ __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
-instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{
+instruct MoveF2I_reg_reg_sse(eRegI dst, regF src) %{
predicate(UseSSE>=2);
match(Set dst (MoveF2I src));
effect( DEF dst, USE src );
ins_cost(85);
format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %}
- ins_encode( MovX2I_reg(dst, src));
+ ins_encode %{
+ __ movdl($dst$$Register, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -12394,13 +11696,14 @@ instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
ins_cost(100);
format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %}
- opcode(0x89);
- ins_encode( OpcPRegSS( dst, src ) );
+ ins_encode %{
+ __ movl(Address(rsp, $dst$$disp), $src$$Register);
+ %}
ins_pipe( ialu_mem_reg );
%}
-instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
+instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
predicate(UseSSE==0);
match(Set dst (MoveI2F src));
effect(DEF dst, USE src);
@@ -12410,29 +11713,33 @@ instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
"FSTP $dst\t# MoveI2F_stack_reg" %}
opcode(0xD9); /* D9 /0, FLD m32real */
ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
- Pop_Reg_F(dst) );
+ Pop_Reg_FPR(dst) );
ins_pipe( fpu_reg_mem );
%}
-instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
+instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
predicate(UseSSE>=1);
match(Set dst (MoveI2F src));
effect( DEF dst, USE src );
ins_cost(95);
format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %}
- ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
+ ins_encode %{
+ __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
+ %}
ins_pipe( pipe_slow );
%}
-instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{
+instruct MoveI2F_reg_reg_sse(regF dst, eRegI src) %{
predicate(UseSSE>=2);
match(Set dst (MoveI2F src));
effect( DEF dst, USE src );
ins_cost(85);
format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %}
- ins_encode( MovI2X_reg(dst, src) );
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ %}
ins_pipe( pipe_slow );
%}
@@ -12448,29 +11755,30 @@ instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
ins_pipe( ialu_mem_long_reg );
%}
-instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
+instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
predicate(UseSSE<=1);
match(Set dst (MoveD2L src));
effect(DEF dst, USE src);
ins_cost(125);
format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %}
- ins_encode( Pop_Mem_Reg_D(dst, src) );
+ ins_encode( Pop_Mem_Reg_DPR(dst, src) );
ins_pipe( fpu_mem_reg );
%}
-instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
+instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
predicate(UseSSE>=2);
match(Set dst (MoveD2L src));
effect(DEF dst, USE src);
ins_cost(95);
-
format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst));
+ ins_encode %{
+ __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
-instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
+instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
predicate(UseSSE>=2);
match(Set dst (MoveD2L src));
effect(DEF dst, USE src, TEMP tmp);
@@ -12478,7 +11786,11 @@ instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
format %{ "MOVD $dst.lo,$src\n\t"
"PSHUFLW $tmp,$src,0x4E\n\t"
"MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
- ins_encode( MovXD2L_reg(dst, src, tmp) );
+ ins_encode %{
+ __ movdl($dst$$Register, $src$$XMMRegister);
+ __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
+ __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -12495,7 +11807,7 @@ instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
%}
-instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
+instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
predicate(UseSSE<=1);
match(Set dst (MoveL2D src));
effect(DEF dst, USE src);
@@ -12505,34 +11817,38 @@ instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
"FSTP $dst\t# MoveL2D_stack_reg" %}
opcode(0xDD); /* DD /0, FLD m64real */
ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
- Pop_Reg_D(dst) );
+ Pop_Reg_DPR(dst) );
ins_pipe( fpu_reg_mem );
%}
-instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
+instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
match(Set dst (MoveL2D src));
effect(DEF dst, USE src);
ins_cost(95);
format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %}
- ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
+ %}
ins_pipe( pipe_slow );
%}
-instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{
+instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
match(Set dst (MoveL2D src));
effect(DEF dst, USE src);
ins_cost(95);
format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
- ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src));
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
+ %}
ins_pipe( pipe_slow );
%}
-instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
+instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
predicate(UseSSE>=2);
match(Set dst (MoveL2D src));
effect(TEMP dst, USE src, TEMP tmp);
@@ -12540,149 +11856,192 @@ instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
format %{ "MOVD $dst,$src.lo\n\t"
"MOVD $tmp,$src.hi\n\t"
"PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
- ins_encode( MovL2XD_reg(dst, src, tmp) );
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
+ __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
// Replicate scalar to packed byte (1 byte) values in xmm
-instruct Repl8B_reg(regXD dst, regXD src) %{
+instruct Repl8B_reg(regD dst, regD src) %{
predicate(UseSSE>=2);
match(Set dst (Replicate8B src));
format %{ "MOVDQA $dst,$src\n\t"
"PUNPCKLBW $dst,$dst\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
- ins_encode( pshufd_8x8(dst, src));
+ ins_encode %{
+ if ($dst$$reg != $src$$reg) {
+ __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
+ }
+ __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
ins_pipe( pipe_slow );
%}
// Replicate scalar to packed byte (1 byte) values in xmm
-instruct Repl8B_eRegI(regXD dst, eRegI src) %{
+instruct Repl8B_eRegI(regD dst, eRegI src) %{
predicate(UseSSE>=2);
match(Set dst (Replicate8B src));
format %{ "MOVD $dst,$src\n\t"
"PUNPCKLBW $dst,$dst\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
- ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
ins_pipe( pipe_slow );
%}
// Replicate scalar zero to packed byte (1 byte) values in xmm
-instruct Repl8B_immI0(regXD dst, immI0 zero) %{
+instruct Repl8B_immI0(regD dst, immI0 zero) %{
predicate(UseSSE>=2);
match(Set dst (Replicate8B zero));
format %{ "PXOR $dst,$dst\t! replicate8B" %}
- ins_encode( pxor(dst, dst));
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar to packed shore (2 byte) values in xmm
-instruct Repl4S_reg(regXD dst, regXD src) %{
+instruct Repl4S_reg(regD dst, regD src) %{
predicate(UseSSE>=2);
match(Set dst (Replicate4S src));
format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
- ins_encode( pshufd_4x16(dst, src));
+ ins_encode %{
+ __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar to packed shore (2 byte) values in xmm
-instruct Repl4S_eRegI(regXD dst, eRegI src) %{
+instruct Repl4S_eRegI(regD dst, eRegI src) %{
predicate(UseSSE>=2);
match(Set dst (Replicate4S src));
format %{ "MOVD $dst,$src\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
- ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar zero to packed short (2 byte) values in xmm
-instruct Repl4S_immI0(regXD dst, immI0 zero) %{
+instruct Repl4S_immI0(regD dst, immI0 zero) %{
predicate(UseSSE>=2);
match(Set dst (Replicate4S zero));
format %{ "PXOR $dst,$dst\t! replicate4S" %}
- ins_encode( pxor(dst, dst));
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar to packed char (2 byte) values in xmm
-instruct Repl4C_reg(regXD dst, regXD src) %{
+instruct Repl4C_reg(regD dst, regD src) %{
predicate(UseSSE>=2);
match(Set dst (Replicate4C src));
format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
- ins_encode( pshufd_4x16(dst, src));
+ ins_encode %{
+ __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar to packed char (2 byte) values in xmm
-instruct Repl4C_eRegI(regXD dst, eRegI src) %{
+instruct Repl4C_eRegI(regD dst, eRegI src) %{
predicate(UseSSE>=2);
match(Set dst (Replicate4C src));
format %{ "MOVD $dst,$src\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
- ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar zero to packed char (2 byte) values in xmm
-instruct Repl4C_immI0(regXD dst, immI0 zero) %{
+instruct Repl4C_immI0(regD dst, immI0 zero) %{
predicate(UseSSE>=2);
match(Set dst (Replicate4C zero));
format %{ "PXOR $dst,$dst\t! replicate4C" %}
- ins_encode( pxor(dst, dst));
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar to packed integer (4 byte) values in xmm
-instruct Repl2I_reg(regXD dst, regXD src) %{
+instruct Repl2I_reg(regD dst, regD src) %{
predicate(UseSSE>=2);
match(Set dst (Replicate2I src));
format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
- ins_encode( pshufd(dst, src, 0x00));
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar to packed integer (4 byte) values in xmm
-instruct Repl2I_eRegI(regXD dst, eRegI src) %{
+instruct Repl2I_eRegI(regD dst, eRegI src) %{
predicate(UseSSE>=2);
match(Set dst (Replicate2I src));
format %{ "MOVD $dst,$src\n\t"
"PSHUFD $dst,$dst,0x00\t! replicate2I" %}
- ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar zero to packed integer (2 byte) values in xmm
-instruct Repl2I_immI0(regXD dst, immI0 zero) %{
+instruct Repl2I_immI0(regD dst, immI0 zero) %{
predicate(UseSSE>=2);
match(Set dst (Replicate2I zero));
format %{ "PXOR $dst,$dst\t! replicate2I" %}
- ins_encode( pxor(dst, dst));
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_reg(regXD dst, regXD src) %{
+instruct Repl2F_reg(regD dst, regD src) %{
predicate(UseSSE>=2);
match(Set dst (Replicate2F src));
format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
- ins_encode( pshufd(dst, src, 0xe0));
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_regX(regXD dst, regX src) %{
+instruct Repl2F_regF(regD dst, regF src) %{
predicate(UseSSE>=2);
match(Set dst (Replicate2F src));
format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
- ins_encode( pshufd(dst, src, 0xe0));
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
+ %}
ins_pipe( fpu_reg_reg );
%}
// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
+instruct Repl2F_immF0(regD dst, immF0 zero) %{
predicate(UseSSE>=2);
match(Set dst (Replicate2F zero));
format %{ "PXOR $dst,$dst\t! replicate2F" %}
- ins_encode( pxor(dst, dst));
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -12702,7 +12061,7 @@ instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlag
%}
instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
- eAXRegI result, regXD tmp1, eFlagsReg cr) %{
+ eAXRegI result, regD tmp1, eFlagsReg cr) %{
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
@@ -12717,7 +12076,7 @@ instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
// fast string equals
instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
- regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
+ regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
match(Set result (StrEquals (Binary str1 str2) cnt));
effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
@@ -12732,7 +12091,7 @@ instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
// fast search of substring with known size.
instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
- eBXRegI result, regXD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
+ eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
predicate(UseSSE42Intrinsics);
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
@@ -12759,7 +12118,7 @@ instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_c
%}
instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
- eBXRegI result, regXD vec, eCXRegI tmp, eFlagsReg cr) %{
+ eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
predicate(UseSSE42Intrinsics);
match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
@@ -12776,7 +12135,7 @@ instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
// fast array equals
instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
- regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
+ regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
%{
match(Set result (AryEq ary1 ary2));
effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
@@ -13602,40 +12961,40 @@ instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP s
%}
// Compare 2 longs and CMOVE doubles
-instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
+instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
ins_cost(200);
+ expand %{
+ fcmovDPR_regS(cmp,flags,dst,src);
+ %}
+%}
+
+// Compare 2 longs and CMOVE doubles
+instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
+ predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
+ match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
+ ins_cost(200);
expand %{
fcmovD_regS(cmp,flags,dst,src);
%}
%}
-// Compare 2 longs and CMOVE doubles
-instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{
- predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
- match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
- ins_cost(200);
- expand %{
- fcmovXD_regS(cmp,flags,dst,src);
- %}
-%}
-
-instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
+instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovF_regS(cmp,flags,dst,src);
+ fcmovFPR_regS(cmp,flags,dst,src);
%}
%}
-instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{
+instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovX_regS(cmp,flags,dst,src);
+ fcmovF_regS(cmp,flags,dst,src);
%}
%}
@@ -13730,40 +13089,40 @@ instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP s
%}
// Compare 2 longs and CMOVE doubles
-instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
+instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
ins_cost(200);
+ expand %{
+ fcmovDPR_regS(cmp,flags,dst,src);
+ %}
+%}
+
+// Compare 2 longs and CMOVE doubles
+instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
+ predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
+ match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
+ ins_cost(200);
expand %{
fcmovD_regS(cmp,flags,dst,src);
%}
%}
-// Compare 2 longs and CMOVE doubles
-instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{
- predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
- match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
- ins_cost(200);
- expand %{
- fcmovXD_regS(cmp,flags,dst,src);
- %}
-%}
-
-instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
+instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovF_regS(cmp,flags,dst,src);
+ fcmovFPR_regS(cmp,flags,dst,src);
%}
%}
-instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{
+instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovX_regS(cmp,flags,dst,src);
+ fcmovF_regS(cmp,flags,dst,src);
%}
%}
@@ -13863,41 +13222,41 @@ instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst,
%}
// Compare 2 longs and CMOVE doubles
-instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
+instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
ins_cost(200);
+ expand %{
+ fcmovDPR_regS(cmp,flags,dst,src);
+ %}
+%}
+
+// Compare 2 longs and CMOVE doubles
+instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
+ predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
+ match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
+ ins_cost(200);
expand %{
fcmovD_regS(cmp,flags,dst,src);
%}
%}
-// Compare 2 longs and CMOVE doubles
-instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{
- predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
- match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
- ins_cost(200);
- expand %{
- fcmovXD_regS(cmp,flags,dst,src);
- %}
-%}
-
-instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
+instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovF_regS(cmp,flags,dst,src);
+ fcmovFPR_regS(cmp,flags,dst,src);
%}
%}
-instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{
+instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
ins_cost(200);
expand %{
- fcmovX_regS(cmp,flags,dst,src);
+ fcmovF_regS(cmp,flags,dst,src);
%}
%}
@@ -14076,20 +13435,20 @@ instruct RethrowException()
// inlined locking and unlocking
-instruct cmpFastLock( eFlagsReg cr, eRegP object, eRegP box, eAXRegI tmp, eRegP scr) %{
+instruct cmpFastLock( eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
match( Set cr (FastLock object box) );
- effect( TEMP tmp, TEMP scr );
+ effect( TEMP tmp, TEMP scr, USE_KILL box );
ins_cost(300);
- format %{ "FASTLOCK $object, $box KILLS $tmp,$scr" %}
+ format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
ins_encode( Fast_Lock(object,box,tmp,scr) );
ins_pipe( pipe_slow );
%}
instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
match( Set cr (FastUnlock object box) );
- effect( TEMP tmp );
+ effect( TEMP tmp, USE_KILL box );
ins_cost(300);
- format %{ "FASTUNLOCK $object, $box, $tmp" %}
+ format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
ins_encode( Fast_Unlock(object,box,tmp) );
ins_pipe( pipe_slow );
%}
diff --git a/hotspot/src/cpu/x86/vm/x86_64.ad b/hotspot/src/cpu/x86/vm/x86_64.ad
index 57e82bd4323..338db98c33d 100644
--- a/hotspot/src/cpu/x86/vm/x86_64.ad
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad
@@ -552,7 +552,7 @@ source %{
#define __ _masm.
static int preserve_SP_size() {
- return LP64_ONLY(1 +) 2; // [rex,] op, rm(reg/reg)
+ return 3; // rex.w, op, rm(reg/reg)
}
// !!!!! Special hack to get all types of calls to specify the byte offset
@@ -797,48 +797,35 @@ void encode_RegMem(CodeBuffer &cbuf,
}
}
-void encode_copy(CodeBuffer &cbuf, int dstenc, int srcenc)
-{
- if (dstenc != srcenc) {
- if (dstenc < 8) {
- if (srcenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- srcenc -= 8;
- }
- } else {
- if (srcenc < 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- } else {
- emit_opcode(cbuf, Assembler::REX_RB);
- srcenc -= 8;
- }
- dstenc -= 8;
- }
-
- emit_opcode(cbuf, 0x8B);
- emit_rm(cbuf, 0x3, dstenc, srcenc);
- }
-}
-
-void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
- if( dst_encoding == src_encoding ) {
- // reg-reg copy, use an empty encoding
- } else {
- MacroAssembler _masm(&cbuf);
-
- __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
- }
-}
-
// This could be in MacroAssembler but it's fairly C2 specific
void emit_cmpfp_fixup(MacroAssembler& _masm) {
Label exit;
__ jccb(Assembler::noParity, exit);
__ pushf();
+ //
+ // comiss/ucomiss instructions set ZF,PF,CF flags and
+ // zero OF,AF,SF for NaN values.
+ // Fixup flags by zeroing ZF,PF so that compare of NaN
+ // values returns 'less than' result (CF is set).
+ // Leave the rest of flags unchanged.
+ //
+ // 7 6 5 4 3 2 1 0
+ // |S|Z|r|A|r|P|r|C| (r - reserved bit)
+ // 0 0 1 0 1 0 1 1 (0x2B)
+ //
__ andq(Address(rsp, 0), 0xffffff2b);
__ popf();
__ bind(exit);
- __ nop(); // (target for branch to avoid branch to branch)
+}
+
+void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
+ Label done;
+ __ movl(dst, -1);
+ __ jcc(Assembler::parity, done);
+ __ jcc(Assembler::below, done);
+ __ setb(Assembler::notEqual, dst);
+ __ movzbl(dst, dst);
+ __ bind(done);
}
@@ -1274,16 +1261,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
// 64-bit
int offset = ra_->reg2offset(src_first);
if (cbuf) {
- emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
- if (Matcher::_regEncode[dst_first] >= 8) {
- emit_opcode(*cbuf, Assembler::REX_R);
- }
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
- encode_RegMem(*cbuf,
- Matcher::_regEncode[dst_first],
- RSP_enc, 0x4, 0, offset,
- false);
+ MacroAssembler _masm(cbuf);
+ __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
#ifndef PRODUCT
} else if (!do_size) {
st->print("%s %s, [rsp + #%d]\t# spill",
@@ -1294,25 +1273,17 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
}
return
((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
- ((Matcher::_regEncode[dst_first] < 8)
- ? 5
- : 6); // REX
+ ((Matcher::_regEncode[dst_first] >= 8)
+ ? 6
+ : (5 + ((UseAVX>0)?1:0))); // REX
} else {
// 32-bit
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
int offset = ra_->reg2offset(src_first);
if (cbuf) {
- emit_opcode(*cbuf, 0xF3);
- if (Matcher::_regEncode[dst_first] >= 8) {
- emit_opcode(*cbuf, Assembler::REX_R);
- }
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, 0x10);
- encode_RegMem(*cbuf,
- Matcher::_regEncode[dst_first],
- RSP_enc, 0x4, 0, offset,
- false);
+ MacroAssembler _masm(cbuf);
+ __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
#ifndef PRODUCT
} else if (!do_size) {
st->print("movss %s, [rsp + #%d]\t# spill",
@@ -1322,9 +1293,9 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
}
return
((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
- ((Matcher::_regEncode[dst_first] < 8)
- ? 5
- : 6); // REX
+ ((Matcher::_regEncode[dst_first] >= 8)
+ ? 6
+ : (5 + ((UseAVX>0)?1:0))); // REX
}
}
} else if (src_first_rc == rc_int) {
@@ -1450,25 +1421,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
(dst_first & 1) == 0 && dst_first + 1 == dst_second) {
// 64-bit
if (cbuf) {
- emit_opcode(*cbuf, 0x66);
- if (Matcher::_regEncode[dst_first] < 8) {
- if (Matcher::_regEncode[src_first] < 8) {
- emit_opcode(*cbuf, Assembler::REX_W);
- } else {
- emit_opcode(*cbuf, Assembler::REX_WB);
- }
- } else {
- if (Matcher::_regEncode[src_first] < 8) {
- emit_opcode(*cbuf, Assembler::REX_WR);
- } else {
- emit_opcode(*cbuf, Assembler::REX_WRB);
- }
- }
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, 0x6E);
- emit_rm(*cbuf, 0x3,
- Matcher::_regEncode[dst_first] & 7,
- Matcher::_regEncode[src_first] & 7);
+ MacroAssembler _masm(cbuf);
+ __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
} else if (!do_size) {
st->print("movdq %s, %s\t# spill",
@@ -1482,23 +1436,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
if (cbuf) {
- emit_opcode(*cbuf, 0x66);
- if (Matcher::_regEncode[dst_first] < 8) {
- if (Matcher::_regEncode[src_first] >= 8) {
- emit_opcode(*cbuf, Assembler::REX_B);
- }
- } else {
- if (Matcher::_regEncode[src_first] < 8) {
- emit_opcode(*cbuf, Assembler::REX_R);
- } else {
- emit_opcode(*cbuf, Assembler::REX_RB);
- }
- }
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, 0x6E);
- emit_rm(*cbuf, 0x3,
- Matcher::_regEncode[dst_first] & 7,
- Matcher::_regEncode[src_first] & 7);
+ MacroAssembler _masm(cbuf);
+ __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
} else if (!do_size) {
st->print("movdl %s, %s\t# spill",
@@ -1507,9 +1446,9 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
#endif
}
return
- (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
- ? 4
- : 5; // REX
+ (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
+ ? 5
+ : (4 + ((UseAVX>0)?1:0)); // REX
}
}
} else if (src_first_rc == rc_float) {
@@ -1521,16 +1460,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
// 64-bit
int offset = ra_->reg2offset(dst_first);
if (cbuf) {
- emit_opcode(*cbuf, 0xF2);
- if (Matcher::_regEncode[src_first] >= 8) {
- emit_opcode(*cbuf, Assembler::REX_R);
- }
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, 0x11);
- encode_RegMem(*cbuf,
- Matcher::_regEncode[src_first],
- RSP_enc, 0x4, 0, offset,
- false);
+ MacroAssembler _masm(cbuf);
+ __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
} else if (!do_size) {
st->print("movsd [rsp + #%d], %s\t# spill",
@@ -1540,25 +1471,17 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
}
return
((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
- ((Matcher::_regEncode[src_first] < 8)
- ? 5
- : 6); // REX
+ ((Matcher::_regEncode[src_first] >= 8)
+ ? 6
+ : (5 + ((UseAVX>0)?1:0))); // REX
} else {
// 32-bit
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
int offset = ra_->reg2offset(dst_first);
if (cbuf) {
- emit_opcode(*cbuf, 0xF3);
- if (Matcher::_regEncode[src_first] >= 8) {
- emit_opcode(*cbuf, Assembler::REX_R);
- }
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, 0x11);
- encode_RegMem(*cbuf,
- Matcher::_regEncode[src_first],
- RSP_enc, 0x4, 0, offset,
- false);
+ MacroAssembler _masm(cbuf);
+ __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
} else if (!do_size) {
st->print("movss [rsp + #%d], %s\t# spill",
@@ -1568,9 +1491,9 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
}
return
((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
- ((Matcher::_regEncode[src_first] < 8)
- ? 5
- : 6); // REX
+ ((Matcher::_regEncode[src_first] >=8)
+ ? 6
+ : (5 + ((UseAVX>0)?1:0))); // REX
}
} else if (dst_first_rc == rc_int) {
// xmm -> gpr
@@ -1578,25 +1501,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
(dst_first & 1) == 0 && dst_first + 1 == dst_second) {
// 64-bit
if (cbuf) {
- emit_opcode(*cbuf, 0x66);
- if (Matcher::_regEncode[dst_first] < 8) {
- if (Matcher::_regEncode[src_first] < 8) {
- emit_opcode(*cbuf, Assembler::REX_W);
- } else {
- emit_opcode(*cbuf, Assembler::REX_WR); // attention!
- }
- } else {
- if (Matcher::_regEncode[src_first] < 8) {
- emit_opcode(*cbuf, Assembler::REX_WB); // attention!
- } else {
- emit_opcode(*cbuf, Assembler::REX_WRB);
- }
- }
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, 0x7E);
- emit_rm(*cbuf, 0x3,
- Matcher::_regEncode[src_first] & 7,
- Matcher::_regEncode[dst_first] & 7);
+ MacroAssembler _masm(cbuf);
+ __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
} else if (!do_size) {
st->print("movdq %s, %s\t# spill",
@@ -1610,23 +1516,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
if (cbuf) {
- emit_opcode(*cbuf, 0x66);
- if (Matcher::_regEncode[dst_first] < 8) {
- if (Matcher::_regEncode[src_first] >= 8) {
- emit_opcode(*cbuf, Assembler::REX_R); // attention!
- }
- } else {
- if (Matcher::_regEncode[src_first] < 8) {
- emit_opcode(*cbuf, Assembler::REX_B); // attention!
- } else {
- emit_opcode(*cbuf, Assembler::REX_RB);
- }
- }
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, 0x7E);
- emit_rm(*cbuf, 0x3,
- Matcher::_regEncode[src_first] & 7,
- Matcher::_regEncode[dst_first] & 7);
+ MacroAssembler _masm(cbuf);
+ __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
} else if (!do_size) {
st->print("movdl %s, %s\t# spill",
@@ -1635,9 +1526,9 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
#endif
}
return
- (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
- ? 4
- : 5; // REX
+ (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
+ ? 5
+ : (4 + ((UseAVX>0)?1:0)); // REX
}
} else if (dst_first_rc == rc_float) {
// xmm -> xmm
@@ -1645,23 +1536,8 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
(dst_first & 1) == 0 && dst_first + 1 == dst_second) {
// 64-bit
if (cbuf) {
- emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
- if (Matcher::_regEncode[dst_first] < 8) {
- if (Matcher::_regEncode[src_first] >= 8) {
- emit_opcode(*cbuf, Assembler::REX_B);
- }
- } else {
- if (Matcher::_regEncode[src_first] < 8) {
- emit_opcode(*cbuf, Assembler::REX_R);
- } else {
- emit_opcode(*cbuf, Assembler::REX_RB);
- }
- }
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
- emit_rm(*cbuf, 0x3,
- Matcher::_regEncode[dst_first] & 7,
- Matcher::_regEncode[src_first] & 7);
+ MacroAssembler _masm(cbuf);
+ __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
} else if (!do_size) {
st->print("%s %s, %s\t# spill",
@@ -1671,32 +1547,16 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
#endif
}
return
- (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
- ? 4
- : 5; // REX
+ (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
+ ? 5
+ : (4 + ((UseAVX>0)?1:0)); // REX
} else {
// 32-bit
assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
if (cbuf) {
- if (!UseXmmRegToRegMoveAll)
- emit_opcode(*cbuf, 0xF3);
- if (Matcher::_regEncode[dst_first] < 8) {
- if (Matcher::_regEncode[src_first] >= 8) {
- emit_opcode(*cbuf, Assembler::REX_B);
- }
- } else {
- if (Matcher::_regEncode[src_first] < 8) {
- emit_opcode(*cbuf, Assembler::REX_R);
- } else {
- emit_opcode(*cbuf, Assembler::REX_RB);
- }
- }
- emit_opcode(*cbuf, 0x0F);
- emit_opcode(*cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
- emit_rm(*cbuf, 0x3,
- Matcher::_regEncode[dst_first] & 7,
- Matcher::_regEncode[src_first] & 7);
+ MacroAssembler _masm(cbuf);
+ __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
} else if (!do_size) {
st->print("%s %s, %s\t# spill",
@@ -1705,10 +1565,10 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
Matcher::regName[src_first]);
#endif
}
- return
- (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
- ? (UseXmmRegToRegMoveAll ? 3 : 4)
- : (UseXmmRegToRegMoveAll ? 4 : 5); // REX
+ return ((UseAVX>0) ? 5:
+ ((Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
+ ? (UseXmmRegToRegMoveAll ? 4 : 5)
+ : (UseXmmRegToRegMoveAll ? 3 : 4))); // REX
}
}
}
@@ -2205,47 +2065,6 @@ encode %{
emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
%}
- enc_class cmpfp_fixup() %{
- MacroAssembler _masm(&cbuf);
- emit_cmpfp_fixup(_masm);
- %}
-
- enc_class cmpfp3(rRegI dst)
- %{
- int dstenc = $dst$$reg;
-
- // movl $dst, -1
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- }
- emit_opcode(cbuf, 0xB8 | (dstenc & 7));
- emit_d32(cbuf, -1);
-
- // jp,s done
- emit_opcode(cbuf, 0x7A);
- emit_d8(cbuf, dstenc < 4 ? 0x08 : 0x0A);
-
- // jb,s done
- emit_opcode(cbuf, 0x72);
- emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);
-
- // setne $dst
- if (dstenc >= 4) {
- emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
- }
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x95);
- emit_opcode(cbuf, 0xC0 | (dstenc & 7));
-
- // movzbl $dst, $dst
- if (dstenc >= 4) {
- emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
- }
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0xB6);
- emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
- %}
-
enc_class cdql_enc(no_rax_rdx_RegI div)
%{
// Full implementation of Java idiv and irem; checks for
@@ -2472,55 +2291,6 @@ encode %{
emit_cc(cbuf, $secondary, $cop$$cmpcode);
%}
- enc_class enc_cmovf_branch(cmpOp cop, regF dst, regF src)
- %{
- // Invert sense of branch from sense of cmov
- emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
- emit_d8(cbuf, ($dst$$reg < 8 && $src$$reg < 8)
- ? (UseXmmRegToRegMoveAll ? 3 : 4)
- : (UseXmmRegToRegMoveAll ? 4 : 5) ); // REX
- // UseXmmRegToRegMoveAll ? movaps(dst, src) : movss(dst, src)
- if (!UseXmmRegToRegMoveAll) emit_opcode(cbuf, 0xF3);
- if ($dst$$reg < 8) {
- if ($src$$reg >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- }
- } else {
- if ($src$$reg < 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- } else {
- emit_opcode(cbuf, Assembler::REX_RB);
- }
- }
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
- emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
- %}
-
- enc_class enc_cmovd_branch(cmpOp cop, regD dst, regD src)
- %{
- // Invert sense of branch from sense of cmov
- emit_cc(cbuf, 0x70, $cop$$cmpcode ^ 1);
- emit_d8(cbuf, $dst$$reg < 8 && $src$$reg < 8 ? 4 : 5); // REX
-
- // UseXmmRegToRegMoveAll ? movapd(dst, src) : movsd(dst, src)
- emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x66 : 0xF2);
- if ($dst$$reg < 8) {
- if ($src$$reg >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- }
- } else {
- if ($src$$reg < 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- } else {
- emit_opcode(cbuf, Assembler::REX_RB);
- }
- }
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, UseXmmRegToRegMoveAll ? 0x28 : 0x10);
- emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
- %}
-
enc_class enc_PartialSubtypeCheck()
%{
Register Rrdi = as_Register(RDI_enc); // result register
@@ -2751,68 +2521,6 @@ encode %{
}
%}
- // Encode a reg-reg copy. If it is useless, then empty encoding.
- enc_class enc_copy(rRegI dst, rRegI src)
- %{
- encode_copy(cbuf, $dst$$reg, $src$$reg);
- %}
-
- // Encode xmm reg-reg copy. If it is useless, then empty encoding.
- enc_class enc_CopyXD( RegD dst, RegD src ) %{
- encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
- %}
-
- enc_class enc_copy_always(rRegI dst, rRegI src)
- %{
- int srcenc = $src$$reg;
- int dstenc = $dst$$reg;
-
- if (dstenc < 8) {
- if (srcenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- srcenc -= 8;
- }
- } else {
- if (srcenc < 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- } else {
- emit_opcode(cbuf, Assembler::REX_RB);
- srcenc -= 8;
- }
- dstenc -= 8;
- }
-
- emit_opcode(cbuf, 0x8B);
- emit_rm(cbuf, 0x3, dstenc, srcenc);
- %}
-
- enc_class enc_copy_wide(rRegL dst, rRegL src)
- %{
- int srcenc = $src$$reg;
- int dstenc = $dst$$reg;
-
- if (dstenc != srcenc) {
- if (dstenc < 8) {
- if (srcenc < 8) {
- emit_opcode(cbuf, Assembler::REX_W);
- } else {
- emit_opcode(cbuf, Assembler::REX_WB);
- srcenc -= 8;
- }
- } else {
- if (srcenc < 8) {
- emit_opcode(cbuf, Assembler::REX_WR);
- } else {
- emit_opcode(cbuf, Assembler::REX_WRB);
- srcenc -= 8;
- }
- dstenc -= 8;
- }
- emit_opcode(cbuf, 0x8B);
- emit_rm(cbuf, 0x3, dstenc, srcenc);
- }
- %}
-
enc_class Con32(immI src)
%{
// Output immediate
@@ -3212,92 +2920,19 @@ encode %{
%}
enc_class Push_ResultXD(regD dst) %{
- int dstenc = $dst$$reg;
-
- store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [RSP]
-
- // UseXmmLoadAndClearUpper ? movsd dst,[rsp] : movlpd dst,[rsp]
- emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- }
- emit_opcode (cbuf, 0x0F );
- emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12 );
- encode_RegMem(cbuf, dstenc, RSP_enc, 0x4, 0, 0, false);
-
- // add rsp,8
- emit_opcode(cbuf, Assembler::REX_W);
- emit_opcode(cbuf,0x83);
- emit_rm(cbuf,0x3, 0x0, RSP_enc);
- emit_d8(cbuf,0x08);
+ MacroAssembler _masm(&cbuf);
+ __ fstp_d(Address(rsp, 0));
+ __ movdbl($dst$$XMMRegister, Address(rsp, 0));
+ __ addptr(rsp, 8);
%}
enc_class Push_SrcXD(regD src) %{
- int srcenc = $src$$reg;
-
- // subq rsp,#8
- emit_opcode(cbuf, Assembler::REX_W);
- emit_opcode(cbuf, 0x83);
- emit_rm(cbuf, 0x3, 0x5, RSP_enc);
- emit_d8(cbuf, 0x8);
-
- // movsd [rsp],src
- emit_opcode(cbuf, 0xF2);
- if (srcenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- }
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x11);
- encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false);
-
- // fldd [rsp]
- emit_opcode(cbuf, 0x66);
- emit_opcode(cbuf, 0xDD);
- encode_RegMem(cbuf, 0x0, RSP_enc, 0x4, 0, 0, false);
- %}
-
-
- enc_class movq_ld(regD dst, memory mem) %{
MacroAssembler _masm(&cbuf);
- __ movq($dst$$XMMRegister, $mem$$Address);
+ __ subptr(rsp, 8);
+ __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+ __ fld_d(Address(rsp, 0));
%}
- enc_class movq_st(memory mem, regD src) %{
- MacroAssembler _masm(&cbuf);
- __ movq($mem$$Address, $src$$XMMRegister);
- %}
-
- enc_class pshufd_8x8(regF dst, regF src) %{
- MacroAssembler _masm(&cbuf);
-
- encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
- __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
- __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
- %}
-
- enc_class pshufd_4x16(regF dst, regF src) %{
- MacroAssembler _masm(&cbuf);
-
- __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
- %}
-
- enc_class pshufd(regD dst, regD src, int mode) %{
- MacroAssembler _masm(&cbuf);
-
- __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
- %}
-
- enc_class pxor(regD dst, regD src) %{
- MacroAssembler _masm(&cbuf);
-
- __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
- %}
-
- enc_class mov_i2x(regD dst, rRegI src) %{
- MacroAssembler _masm(&cbuf);
-
- __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
- %}
// obj: object to lock
// box: box address (header location) -- killed
@@ -3534,303 +3169,6 @@ encode %{
RELOC_DISP32);
%}
- enc_class absF_encoding(regF dst)
- %{
- int dstenc = $dst$$reg;
- address signmask_address = (address) StubRoutines::x86::float_sign_mask();
-
- cbuf.set_insts_mark();
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- dstenc -= 8;
- }
- // XXX reg_mem doesn't support RIP-relative addressing yet
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x54);
- emit_rm(cbuf, 0x0, dstenc, 0x5); // 00 reg 101
- emit_d32_reloc(cbuf, signmask_address);
- %}
-
- enc_class absD_encoding(regD dst)
- %{
- int dstenc = $dst$$reg;
- address signmask_address = (address) StubRoutines::x86::double_sign_mask();
-
- cbuf.set_insts_mark();
- emit_opcode(cbuf, 0x66);
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- dstenc -= 8;
- }
- // XXX reg_mem doesn't support RIP-relative addressing yet
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x54);
- emit_rm(cbuf, 0x0, dstenc, 0x5); // 00 reg 101
- emit_d32_reloc(cbuf, signmask_address);
- %}
-
- enc_class negF_encoding(regF dst)
- %{
- int dstenc = $dst$$reg;
- address signflip_address = (address) StubRoutines::x86::float_sign_flip();
-
- cbuf.set_insts_mark();
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- dstenc -= 8;
- }
- // XXX reg_mem doesn't support RIP-relative addressing yet
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x57);
- emit_rm(cbuf, 0x0, dstenc, 0x5); // 00 reg 101
- emit_d32_reloc(cbuf, signflip_address);
- %}
-
- enc_class negD_encoding(regD dst)
- %{
- int dstenc = $dst$$reg;
- address signflip_address = (address) StubRoutines::x86::double_sign_flip();
-
- cbuf.set_insts_mark();
- emit_opcode(cbuf, 0x66);
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- dstenc -= 8;
- }
- // XXX reg_mem doesn't support RIP-relative addressing yet
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x57);
- emit_rm(cbuf, 0x0, dstenc, 0x5); // 00 reg 101
- emit_d32_reloc(cbuf, signflip_address);
- %}
-
- enc_class f2i_fixup(rRegI dst, regF src)
- %{
- int dstenc = $dst$$reg;
- int srcenc = $src$$reg;
-
- // cmpl $dst, #0x80000000
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- }
- emit_opcode(cbuf, 0x81);
- emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
- emit_d32(cbuf, 0x80000000);
-
- // jne,s done
- emit_opcode(cbuf, 0x75);
- if (srcenc < 8 && dstenc < 8) {
- emit_d8(cbuf, 0xF);
- } else if (srcenc >= 8 && dstenc >= 8) {
- emit_d8(cbuf, 0x11);
- } else {
- emit_d8(cbuf, 0x10);
- }
-
- // subq rsp, #8
- emit_opcode(cbuf, Assembler::REX_W);
- emit_opcode(cbuf, 0x83);
- emit_rm(cbuf, 0x3, 0x5, RSP_enc);
- emit_d8(cbuf, 8);
-
- // movss [rsp], $src
- emit_opcode(cbuf, 0xF3);
- if (srcenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- }
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x11);
- encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
-
- // call f2i_fixup
- cbuf.set_insts_mark();
- emit_opcode(cbuf, 0xE8);
- emit_d32_reloc(cbuf,
- (int)
- (StubRoutines::x86::f2i_fixup() - cbuf.insts_end() - 4),
- runtime_call_Relocation::spec(),
- RELOC_DISP32);
-
- // popq $dst
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- }
- emit_opcode(cbuf, 0x58 | (dstenc & 7));
-
- // done:
- %}
-
- enc_class f2l_fixup(rRegL dst, regF src)
- %{
- int dstenc = $dst$$reg;
- int srcenc = $src$$reg;
- address const_address = (address) StubRoutines::x86::double_sign_flip();
-
- // cmpq $dst, [0x8000000000000000]
- cbuf.set_insts_mark();
- emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
- emit_opcode(cbuf, 0x39);
- // XXX reg_mem doesn't support RIP-relative addressing yet
- emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
- emit_d32_reloc(cbuf, const_address);
-
-
- // jne,s done
- emit_opcode(cbuf, 0x75);
- if (srcenc < 8 && dstenc < 8) {
- emit_d8(cbuf, 0xF);
- } else if (srcenc >= 8 && dstenc >= 8) {
- emit_d8(cbuf, 0x11);
- } else {
- emit_d8(cbuf, 0x10);
- }
-
- // subq rsp, #8
- emit_opcode(cbuf, Assembler::REX_W);
- emit_opcode(cbuf, 0x83);
- emit_rm(cbuf, 0x3, 0x5, RSP_enc);
- emit_d8(cbuf, 8);
-
- // movss [rsp], $src
- emit_opcode(cbuf, 0xF3);
- if (srcenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- }
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x11);
- encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
-
- // call f2l_fixup
- cbuf.set_insts_mark();
- emit_opcode(cbuf, 0xE8);
- emit_d32_reloc(cbuf,
- (int)
- (StubRoutines::x86::f2l_fixup() - cbuf.insts_end() - 4),
- runtime_call_Relocation::spec(),
- RELOC_DISP32);
-
- // popq $dst
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- }
- emit_opcode(cbuf, 0x58 | (dstenc & 7));
-
- // done:
- %}
-
- enc_class d2i_fixup(rRegI dst, regD src)
- %{
- int dstenc = $dst$$reg;
- int srcenc = $src$$reg;
-
- // cmpl $dst, #0x80000000
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- }
- emit_opcode(cbuf, 0x81);
- emit_rm(cbuf, 0x3, 0x7, dstenc & 7);
- emit_d32(cbuf, 0x80000000);
-
- // jne,s done
- emit_opcode(cbuf, 0x75);
- if (srcenc < 8 && dstenc < 8) {
- emit_d8(cbuf, 0xF);
- } else if (srcenc >= 8 && dstenc >= 8) {
- emit_d8(cbuf, 0x11);
- } else {
- emit_d8(cbuf, 0x10);
- }
-
- // subq rsp, #8
- emit_opcode(cbuf, Assembler::REX_W);
- emit_opcode(cbuf, 0x83);
- emit_rm(cbuf, 0x3, 0x5, RSP_enc);
- emit_d8(cbuf, 8);
-
- // movsd [rsp], $src
- emit_opcode(cbuf, 0xF2);
- if (srcenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- }
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x11);
- encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
-
- // call d2i_fixup
- cbuf.set_insts_mark();
- emit_opcode(cbuf, 0xE8);
- emit_d32_reloc(cbuf,
- (int)
- (StubRoutines::x86::d2i_fixup() - cbuf.insts_end() - 4),
- runtime_call_Relocation::spec(),
- RELOC_DISP32);
-
- // popq $dst
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- }
- emit_opcode(cbuf, 0x58 | (dstenc & 7));
-
- // done:
- %}
-
- enc_class d2l_fixup(rRegL dst, regD src)
- %{
- int dstenc = $dst$$reg;
- int srcenc = $src$$reg;
- address const_address = (address) StubRoutines::x86::double_sign_flip();
-
- // cmpq $dst, [0x8000000000000000]
- cbuf.set_insts_mark();
- emit_opcode(cbuf, dstenc < 8 ? Assembler::REX_W : Assembler::REX_WR);
- emit_opcode(cbuf, 0x39);
- // XXX reg_mem doesn't support RIP-relative addressing yet
- emit_rm(cbuf, 0x0, dstenc & 7, 0x5); // 00 reg 101
- emit_d32_reloc(cbuf, const_address);
-
-
- // jne,s done
- emit_opcode(cbuf, 0x75);
- if (srcenc < 8 && dstenc < 8) {
- emit_d8(cbuf, 0xF);
- } else if (srcenc >= 8 && dstenc >= 8) {
- emit_d8(cbuf, 0x11);
- } else {
- emit_d8(cbuf, 0x10);
- }
-
- // subq rsp, #8
- emit_opcode(cbuf, Assembler::REX_W);
- emit_opcode(cbuf, 0x83);
- emit_rm(cbuf, 0x3, 0x5, RSP_enc);
- emit_d8(cbuf, 8);
-
- // movsd [rsp], $src
- emit_opcode(cbuf, 0xF2);
- if (srcenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_R);
- }
- emit_opcode(cbuf, 0x0F);
- emit_opcode(cbuf, 0x11);
- encode_RegMem(cbuf, srcenc, RSP_enc, 0x4, 0, 0, false); // 2 bytes
-
- // call d2l_fixup
- cbuf.set_insts_mark();
- emit_opcode(cbuf, 0xE8);
- emit_d32_reloc(cbuf,
- (int)
- (StubRoutines::x86::d2l_fixup() - cbuf.insts_end() - 4),
- runtime_call_Relocation::spec(),
- RELOC_DISP32);
-
- // popq $dst
- if (dstenc >= 8) {
- emit_opcode(cbuf, Assembler::REX_B);
- }
- emit_opcode(cbuf, 0x58 | (dstenc & 7));
-
- // done:
- %}
%}
@@ -6156,8 +5494,9 @@ instruct loadF(regF dst, memory mem)
ins_cost(145); // XXX
format %{ "movss $dst, $mem\t# float" %}
- opcode(0xF3, 0x0F, 0x10);
- ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
+ ins_encode %{
+ __ movflt($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -6169,8 +5508,9 @@ instruct loadD_partial(regD dst, memory mem)
ins_cost(145); // XXX
format %{ "movlpd $dst, $mem\t# double" %}
- opcode(0x66, 0x0F, 0x12);
- ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -6181,8 +5521,9 @@ instruct loadD(regD dst, memory mem)
ins_cost(145); // XXX
format %{ "movsd $dst, $mem\t# double" %}
- opcode(0xF2, 0x0F, 0x10);
- ins_encode(OpcP, REX_reg_mem(dst, mem), OpcS, OpcT, reg_mem(dst, mem));
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -6191,7 +5532,9 @@ instruct loadA8B(regD dst, memory mem) %{
match(Set dst (Load8B mem));
ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed8B" %}
- ins_encode( movq_ld(dst, mem));
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
@@ -6200,7 +5543,9 @@ instruct loadA4S(regD dst, memory mem) %{
match(Set dst (Load4S mem));
ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed4S" %}
- ins_encode( movq_ld(dst, mem));
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
@@ -6209,7 +5554,9 @@ instruct loadA4C(regD dst, memory mem) %{
match(Set dst (Load4C mem));
ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed4C" %}
- ins_encode( movq_ld(dst, mem));
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
@@ -6218,16 +5565,20 @@ instruct load2IU(regD dst, memory mem) %{
match(Set dst (Load2I mem));
ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed2I" %}
- ins_encode( movq_ld(dst, mem));
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
// Load Aligned Packed Single to XMM
instruct loadA2F(regD dst, memory mem) %{
match(Set dst (Load2F mem));
- ins_cost(145);
+ ins_cost(125);
format %{ "MOVQ $dst,$mem\t! packed2F" %}
- ins_encode( movq_ld(dst, mem));
+ ins_encode %{
+ __ movq($dst$$XMMRegister, $mem$$Address);
+ %}
ins_pipe( pipe_slow );
%}
@@ -6540,8 +5891,9 @@ instruct loadConF0(regF dst, immF0 src)
ins_cost(100);
format %{ "xorps $dst, $dst\t# float 0.0" %}
- opcode(0x0F, 0x57);
- ins_encode(REX_reg_reg(dst, dst), OpcP, OpcS, reg_reg(dst, dst));
+ ins_encode %{
+ __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe(pipe_slow);
%}
@@ -6562,8 +5914,9 @@ instruct loadConD0(regD dst, immD0 src)
ins_cost(100);
format %{ "xorpd $dst, $dst\t# double 0.0" %}
- opcode(0x66, 0x0F, 0x57);
- ins_encode(OpcP, REX_reg_reg(dst, dst), OpcS, OpcT, reg_reg(dst, dst));
+ ins_encode %{
+ __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe(pipe_slow);
%}
@@ -6606,8 +5959,9 @@ instruct loadSSF(regF dst, stackSlotF src)
ins_cost(125);
format %{ "movss $dst, $src\t# float stk" %}
- opcode(0xF3, 0x0F, 0x10);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+ ins_encode %{
+ __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -6972,7 +6326,9 @@ instruct storeA8B(memory mem, regD src) %{
match(Set mem (Store8B mem src));
ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed8B" %}
- ins_encode( movq_st(mem, src));
+ ins_encode %{
+ __ movq($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -6981,7 +6337,9 @@ instruct storeA4C(memory mem, regD src) %{
match(Set mem (Store4C mem src));
ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed4C" %}
- ins_encode( movq_st(mem, src));
+ ins_encode %{
+ __ movq($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -6990,7 +6348,9 @@ instruct storeA2I(memory mem, regD src) %{
match(Set mem (Store2I mem src));
ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed2I" %}
- ins_encode( movq_st(mem, src));
+ ins_encode %{
+ __ movq($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -7024,7 +6384,9 @@ instruct storeA2F(memory mem, regD src) %{
match(Set mem (Store2F mem src));
ins_cost(145);
format %{ "MOVQ $mem,$src\t! packed2F" %}
- ins_encode( movq_st(mem, src));
+ ins_encode %{
+ __ movq($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -7035,8 +6397,9 @@ instruct storeF(memory mem, regF src)
ins_cost(95); // XXX
format %{ "movss $mem, $src\t# float" %}
- opcode(0xF3, 0x0F, 0x11);
- ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
+ ins_encode %{
+ __ movflt($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -7072,8 +6435,9 @@ instruct storeD(memory mem, regD src)
ins_cost(95); // XXX
format %{ "movsd $mem, $src\t# double" %}
- opcode(0xF2, 0x0F, 0x11);
- ins_encode(OpcP, REX_reg_mem(src, mem), OpcS, OpcT, reg_mem(src, mem));
+ ins_encode %{
+ __ movdbl($mem$$Address, $src$$XMMRegister);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -7142,8 +6506,9 @@ instruct storeSSF(stackSlotF dst, regF src)
ins_cost(95); // XXX
format %{ "movss $dst, $src\t# float stk" %}
- opcode(0xF3, 0x0F, 0x11);
- ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
+ ins_encode %{
+ __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -7153,8 +6518,9 @@ instruct storeSSD(stackSlotD dst, regD src)
ins_cost(95); // XXX
format %{ "movsd $dst, $src\t# double stk" %}
- opcode(0xF2, 0x0F, 0x11);
- ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
+ ins_encode %{
+ __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -7444,6 +6810,16 @@ instruct unnecessary_membar_volatile()
ins_pipe(empty);
%}
+instruct membar_storestore() %{
+ match(MemBarStoreStore);
+ ins_cost(0);
+
+ size(0);
+ format %{ "MEMBAR-storestore (empty encoding)" %}
+ ins_encode( );
+ ins_pipe(empty);
+%}
+
//----------Move Instructions--------------------------------------------------
instruct castX2P(rRegP dst, rRegL src)
@@ -7451,7 +6827,11 @@ instruct castX2P(rRegP dst, rRegL src)
match(Set dst (CastX2P src));
format %{ "movq $dst, $src\t# long->ptr" %}
- ins_encode(enc_copy_wide(dst, src));
+ ins_encode %{
+ if ($dst$$reg != $src$$reg) {
+ __ movptr($dst$$Register, $src$$Register);
+ }
+ %}
ins_pipe(ialu_reg_reg); // XXX
%}
@@ -7460,7 +6840,11 @@ instruct castP2X(rRegL dst, rRegP src)
match(Set dst (CastP2X src));
format %{ "movq $dst, $src\t# ptr -> long" %}
- ins_encode(enc_copy_wide(dst, src));
+ ins_encode %{
+ if ($dst$$reg != $src$$reg) {
+ __ movptr($dst$$Register, $src$$Register);
+ }
+ %}
ins_pipe(ialu_reg_reg); // XXX
%}
@@ -7813,7 +7197,13 @@ instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
format %{ "jn$cop skip\t# signed cmove float\n\t"
"movss $dst, $src\n"
"skip:" %}
- ins_encode(enc_cmovf_branch(cop, dst, src));
+ ins_encode %{
+ Label Lskip;
+ // Invert sense of branch from sense of CMOV
+ __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+ __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+ __ bind(Lskip);
+ %}
ins_pipe(pipe_slow);
%}
@@ -7837,7 +7227,13 @@ instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
format %{ "jn$cop skip\t# unsigned cmove float\n\t"
"movss $dst, $src\n"
"skip:" %}
- ins_encode(enc_cmovf_branch(cop, dst, src));
+ ins_encode %{
+ Label Lskip;
+ // Invert sense of branch from sense of CMOV
+ __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+ __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+ __ bind(Lskip);
+ %}
ins_pipe(pipe_slow);
%}
@@ -7857,7 +7253,13 @@ instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
format %{ "jn$cop skip\t# signed cmove double\n\t"
"movsd $dst, $src\n"
"skip:" %}
- ins_encode(enc_cmovd_branch(cop, dst, src));
+ ins_encode %{
+ Label Lskip;
+ // Invert sense of branch from sense of CMOV
+ __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+ __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+ __ bind(Lskip);
+ %}
ins_pipe(pipe_slow);
%}
@@ -7869,7 +7271,13 @@ instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
format %{ "jn$cop skip\t# unsigned cmove double\n\t"
"movsd $dst, $src\n"
"skip:" %}
- ins_encode(enc_cmovd_branch(cop, dst, src));
+ ins_encode %{
+ Label Lskip;
+ // Invert sense of branch from sense of CMOV
+ __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
+ __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+ __ bind(Lskip);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10191,17 +9599,18 @@ instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
"pushfq\t# saw NaN, set CF\n\t"
"andq [rsp], #0xffffff2b\n\t"
"popfq\n"
- "exit: nop\t# avoid branch to branch" %}
- opcode(0x0F, 0x2E);
- ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
- cmpfp_fixup);
+ "exit:" %}
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+ emit_cmpfp_fixup(_masm);
+ %}
ins_pipe(pipe_slow);
%}
instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
match(Set cr (CmpF src1 src2));
- ins_cost(145);
+ ins_cost(100);
format %{ "ucomiss $src1, $src2" %}
ins_encode %{
__ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
@@ -10219,10 +9628,11 @@ instruct cmpF_cc_mem(rFlagsRegU cr, regF src1, memory src2)
"pushfq\t# saw NaN, set CF\n\t"
"andq [rsp], #0xffffff2b\n\t"
"popfq\n"
- "exit: nop\t# avoid branch to branch" %}
- opcode(0x0F, 0x2E);
- ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
- cmpfp_fixup);
+ "exit:" %}
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$Address);
+ emit_cmpfp_fixup(_masm);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10231,8 +9641,9 @@ instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
ins_cost(100);
format %{ "ucomiss $src1, $src2" %}
- opcode(0x0F, 0x2E);
- ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2));
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$Address);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10245,7 +9656,7 @@ instruct cmpF_cc_imm(rFlagsRegU cr, regF src, immF con) %{
"pushfq\t# saw NaN, set CF\n\t"
"andq [rsp], #0xffffff2b\n\t"
"popfq\n"
- "exit: nop\t# avoid branch to branch" %}
+ "exit:" %}
ins_encode %{
__ ucomiss($src$$XMMRegister, $constantaddress($con));
emit_cmpfp_fixup(_masm);
@@ -10273,10 +9684,11 @@ instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
"pushfq\t# saw NaN, set CF\n\t"
"andq [rsp], #0xffffff2b\n\t"
"popfq\n"
- "exit: nop\t# avoid branch to branch" %}
- opcode(0x66, 0x0F, 0x2E);
- ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
- cmpfp_fixup);
+ "exit:" %}
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+ emit_cmpfp_fixup(_masm);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10301,10 +9713,11 @@ instruct cmpD_cc_mem(rFlagsRegU cr, regD src1, memory src2)
"pushfq\t# saw NaN, set CF\n\t"
"andq [rsp], #0xffffff2b\n\t"
"popfq\n"
- "exit: nop\t# avoid branch to branch" %}
- opcode(0x66, 0x0F, 0x2E);
- ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
- cmpfp_fixup);
+ "exit:" %}
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$Address);
+ emit_cmpfp_fixup(_masm);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10313,8 +9726,9 @@ instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
ins_cost(100);
format %{ "ucomisd $src1, $src2" %}
- opcode(0x66, 0x0F, 0x2E);
- ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2));
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$Address);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10327,7 +9741,7 @@ instruct cmpD_cc_imm(rFlagsRegU cr, regD src, immD con) %{
"pushfq\t# saw NaN, set CF\n\t"
"andq [rsp], #0xffffff2b\n\t"
"popfq\n"
- "exit: nop\t# avoid branch to branch" %}
+ "exit:" %}
ins_encode %{
__ ucomisd($src$$XMMRegister, $constantaddress($con));
emit_cmpfp_fixup(_masm);
@@ -10359,10 +9773,10 @@ instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
"setne $dst\n\t"
"movzbl $dst, $dst\n"
"done:" %}
-
- opcode(0x0F, 0x2E);
- ins_encode(REX_reg_reg(src1, src2), OpcP, OpcS, reg_reg(src1, src2),
- cmpfp3(dst));
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
+ emit_cmpfp3(_masm, $dst$$Register);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10380,10 +9794,10 @@ instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
"setne $dst\n\t"
"movzbl $dst, $dst\n"
"done:" %}
-
- opcode(0x0F, 0x2E);
- ins_encode(REX_reg_mem(src1, src2), OpcP, OpcS, reg_mem(src1, src2),
- cmpfp3(dst));
+ ins_encode %{
+ __ ucomiss($src1$$XMMRegister, $src2$$Address);
+ emit_cmpfp3(_masm, $dst$$Register);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10401,15 +9815,8 @@ instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
"movzbl $dst, $dst\n"
"done:" %}
ins_encode %{
- Label L_done;
- Register Rdst = $dst$$Register;
__ ucomiss($src$$XMMRegister, $constantaddress($con));
- __ movl(Rdst, -1);
- __ jcc(Assembler::parity, L_done);
- __ jcc(Assembler::below, L_done);
- __ setb(Assembler::notEqual, Rdst);
- __ movzbl(Rdst, Rdst);
- __ bind(L_done);
+ emit_cmpfp3(_masm, $dst$$Register);
%}
ins_pipe(pipe_slow);
%}
@@ -10428,10 +9835,10 @@ instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
"setne $dst\n\t"
"movzbl $dst, $dst\n"
"done:" %}
-
- opcode(0x66, 0x0F, 0x2E);
- ins_encode(OpcP, REX_reg_reg(src1, src2), OpcS, OpcT, reg_reg(src1, src2),
- cmpfp3(dst));
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
+ emit_cmpfp3(_masm, $dst$$Register);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10449,10 +9856,10 @@ instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
"setne $dst\n\t"
"movzbl $dst, $dst\n"
"done:" %}
-
- opcode(0x66, 0x0F, 0x2E);
- ins_encode(OpcP, REX_reg_mem(src1, src2), OpcS, OpcT, reg_mem(src1, src2),
- cmpfp3(dst));
+ ins_encode %{
+ __ ucomisd($src1$$XMMRegister, $src2$$Address);
+ emit_cmpfp3(_masm, $dst$$Register);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10470,377 +9877,12 @@ instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
"movzbl $dst, $dst\n"
"done:" %}
ins_encode %{
- Register Rdst = $dst$$Register;
- Label L_done;
__ ucomisd($src$$XMMRegister, $constantaddress($con));
- __ movl(Rdst, -1);
- __ jcc(Assembler::parity, L_done);
- __ jcc(Assembler::below, L_done);
- __ setb(Assembler::notEqual, Rdst);
- __ movzbl(Rdst, Rdst);
- __ bind(L_done);
+ emit_cmpfp3(_masm, $dst$$Register);
%}
ins_pipe(pipe_slow);
%}
-instruct addF_reg(regF dst, regF src)
-%{
- match(Set dst (AddF dst src));
-
- format %{ "addss $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF3, 0x0F, 0x58);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct addF_mem(regF dst, memory src)
-%{
- match(Set dst (AddF dst (LoadF src)));
-
- format %{ "addss $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF3, 0x0F, 0x58);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct addF_imm(regF dst, immF con) %{
- match(Set dst (AddF dst con));
- format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
- ins_cost(150); // XXX
- ins_encode %{
- __ addss($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct addD_reg(regD dst, regD src)
-%{
- match(Set dst (AddD dst src));
-
- format %{ "addsd $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF2, 0x0F, 0x58);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct addD_mem(regD dst, memory src)
-%{
- match(Set dst (AddD dst (LoadD src)));
-
- format %{ "addsd $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF2, 0x0F, 0x58);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct addD_imm(regD dst, immD con) %{
- match(Set dst (AddD dst con));
- format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
- ins_cost(150); // XXX
- ins_encode %{
- __ addsd($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct subF_reg(regF dst, regF src)
-%{
- match(Set dst (SubF dst src));
-
- format %{ "subss $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF3, 0x0F, 0x5C);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct subF_mem(regF dst, memory src)
-%{
- match(Set dst (SubF dst (LoadF src)));
-
- format %{ "subss $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF3, 0x0F, 0x5C);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct subF_imm(regF dst, immF con) %{
- match(Set dst (SubF dst con));
- format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
- ins_cost(150); // XXX
- ins_encode %{
- __ subss($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct subD_reg(regD dst, regD src)
-%{
- match(Set dst (SubD dst src));
-
- format %{ "subsd $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF2, 0x0F, 0x5C);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct subD_mem(regD dst, memory src)
-%{
- match(Set dst (SubD dst (LoadD src)));
-
- format %{ "subsd $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF2, 0x0F, 0x5C);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct subD_imm(regD dst, immD con) %{
- match(Set dst (SubD dst con));
- format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
- ins_cost(150); // XXX
- ins_encode %{
- __ subsd($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct mulF_reg(regF dst, regF src)
-%{
- match(Set dst (MulF dst src));
-
- format %{ "mulss $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF3, 0x0F, 0x59);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct mulF_mem(regF dst, memory src)
-%{
- match(Set dst (MulF dst (LoadF src)));
-
- format %{ "mulss $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF3, 0x0F, 0x59);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct mulF_imm(regF dst, immF con) %{
- match(Set dst (MulF dst con));
- format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
- ins_cost(150); // XXX
- ins_encode %{
- __ mulss($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct mulD_reg(regD dst, regD src)
-%{
- match(Set dst (MulD dst src));
-
- format %{ "mulsd $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF2, 0x0F, 0x59);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct mulD_mem(regD dst, memory src)
-%{
- match(Set dst (MulD dst (LoadD src)));
-
- format %{ "mulsd $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF2, 0x0F, 0x59);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct mulD_imm(regD dst, immD con) %{
- match(Set dst (MulD dst con));
- format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
- ins_cost(150); // XXX
- ins_encode %{
- __ mulsd($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct divF_reg(regF dst, regF src)
-%{
- match(Set dst (DivF dst src));
-
- format %{ "divss $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF3, 0x0F, 0x5E);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct divF_mem(regF dst, memory src)
-%{
- match(Set dst (DivF dst (LoadF src)));
-
- format %{ "divss $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF3, 0x0F, 0x5E);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct divF_imm(regF dst, immF con) %{
- match(Set dst (DivF dst con));
- format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
- ins_cost(150); // XXX
- ins_encode %{
- __ divss($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct divD_reg(regD dst, regD src)
-%{
- match(Set dst (DivD dst src));
-
- format %{ "divsd $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF2, 0x0F, 0x5E);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct divD_mem(regD dst, memory src)
-%{
- match(Set dst (DivD dst (LoadD src)));
-
- format %{ "divsd $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF2, 0x0F, 0x5E);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct divD_imm(regD dst, immD con) %{
- match(Set dst (DivD dst con));
- format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
- ins_cost(150); // XXX
- ins_encode %{
- __ divsd($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct sqrtF_reg(regF dst, regF src)
-%{
- match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
-
- format %{ "sqrtss $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF3, 0x0F, 0x51);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct sqrtF_mem(regF dst, memory src)
-%{
- match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF src)))));
-
- format %{ "sqrtss $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF3, 0x0F, 0x51);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct sqrtF_imm(regF dst, immF con) %{
- match(Set dst (ConvD2F (SqrtD (ConvF2D con))));
- format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
- ins_cost(150); // XXX
- ins_encode %{
- __ sqrtss($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct sqrtD_reg(regD dst, regD src)
-%{
- match(Set dst (SqrtD src));
-
- format %{ "sqrtsd $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF2, 0x0F, 0x51);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct sqrtD_mem(regD dst, memory src)
-%{
- match(Set dst (SqrtD (LoadD src)));
-
- format %{ "sqrtsd $dst, $src" %}
- ins_cost(150); // XXX
- opcode(0xF2, 0x0F, 0x51);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
- ins_pipe(pipe_slow);
-%}
-
-instruct sqrtD_imm(regD dst, immD con) %{
- match(Set dst (SqrtD con));
- format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
- ins_cost(150); // XXX
- ins_encode %{
- __ sqrtsd($dst$$XMMRegister, $constantaddress($con));
- %}
- ins_pipe(pipe_slow);
-%}
-
-instruct absF_reg(regF dst)
-%{
- match(Set dst (AbsF dst));
-
- format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
- ins_encode(absF_encoding(dst));
- ins_pipe(pipe_slow);
-%}
-
-instruct absD_reg(regD dst)
-%{
- match(Set dst (AbsD dst));
-
- format %{ "andpd $dst, [0x7fffffffffffffff]\t"
- "# abs double by sign masking" %}
- ins_encode(absD_encoding(dst));
- ins_pipe(pipe_slow);
-%}
-
-instruct negF_reg(regF dst)
-%{
- match(Set dst (NegF dst));
-
- format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
- ins_encode(negF_encoding(dst));
- ins_pipe(pipe_slow);
-%}
-
-instruct negD_reg(regD dst)
-%{
- match(Set dst (NegD dst));
-
- format %{ "xorpd $dst, [0x8000000000000000]\t"
- "# neg double by sign flipping" %}
- ins_encode(negD_encoding(dst));
- ins_pipe(pipe_slow);
-%}
-
// -----------Trig and Trancendental Instructions------------------------------
instruct cosD_reg(regD dst) %{
match(Set dst (CosD dst));
@@ -10929,8 +9971,9 @@ instruct convF2D_reg_reg(regD dst, regF src)
match(Set dst (ConvF2D src));
format %{ "cvtss2sd $dst, $src" %}
- opcode(0xF3, 0x0F, 0x5A);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+ ins_encode %{
+ __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -10939,8 +9982,9 @@ instruct convF2D_reg_mem(regD dst, memory src)
match(Set dst (ConvF2D (LoadF src)));
format %{ "cvtss2sd $dst, $src" %}
- opcode(0xF3, 0x0F, 0x5A);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+ ins_encode %{
+ __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -10949,8 +9993,9 @@ instruct convD2F_reg_reg(regF dst, regD src)
match(Set dst (ConvD2F src));
format %{ "cvtsd2ss $dst, $src" %}
- opcode(0xF2, 0x0F, 0x5A);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+ ins_encode %{
+ __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -10959,8 +10004,9 @@ instruct convD2F_reg_mem(regF dst, memory src)
match(Set dst (ConvD2F (LoadD src)));
format %{ "cvtsd2ss $dst, $src" %}
- opcode(0xF2, 0x0F, 0x5A);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+ ins_encode %{
+ __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -10978,9 +10024,17 @@ instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
"call f2i_fixup\n\t"
"popq $dst\n"
"done: "%}
- opcode(0xF3, 0x0F, 0x2C);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
- f2i_fixup(dst, src));
+ ins_encode %{
+ Label done;
+ __ cvttss2sil($dst$$Register, $src$$XMMRegister);
+ __ cmpl($dst$$Register, 0x80000000);
+ __ jccb(Assembler::notEqual, done);
+ __ subptr(rsp, 8);
+ __ movflt(Address(rsp, 0), $src$$XMMRegister);
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2i_fixup())));
+ __ pop($dst$$Register);
+ __ bind(done);
+ %}
ins_pipe(pipe_slow);
%}
@@ -10997,9 +10051,18 @@ instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
"call f2l_fixup\n\t"
"popq $dst\n"
"done: "%}
- opcode(0xF3, 0x0F, 0x2C);
- ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
- f2l_fixup(dst, src));
+ ins_encode %{
+ Label done;
+ __ cvttss2siq($dst$$Register, $src$$XMMRegister);
+ __ cmp64($dst$$Register,
+ ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
+ __ jccb(Assembler::notEqual, done);
+ __ subptr(rsp, 8);
+ __ movflt(Address(rsp, 0), $src$$XMMRegister);
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::f2l_fixup())));
+ __ pop($dst$$Register);
+ __ bind(done);
+ %}
ins_pipe(pipe_slow);
%}
@@ -11016,9 +10079,17 @@ instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
"call d2i_fixup\n\t"
"popq $dst\n"
"done: "%}
- opcode(0xF2, 0x0F, 0x2C);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src),
- d2i_fixup(dst, src));
+ ins_encode %{
+ Label done;
+ __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
+ __ cmpl($dst$$Register, 0x80000000);
+ __ jccb(Assembler::notEqual, done);
+ __ subptr(rsp, 8);
+ __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_fixup())));
+ __ pop($dst$$Register);
+ __ bind(done);
+ %}
ins_pipe(pipe_slow);
%}
@@ -11035,9 +10106,18 @@ instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
"call d2l_fixup\n\t"
"popq $dst\n"
"done: "%}
- opcode(0xF2, 0x0F, 0x2C);
- ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src),
- d2l_fixup(dst, src));
+ ins_encode %{
+ Label done;
+ __ cvttsd2siq($dst$$Register, $src$$XMMRegister);
+ __ cmp64($dst$$Register,
+ ExternalAddress((address) StubRoutines::x86::double_sign_flip()));
+ __ jccb(Assembler::notEqual, done);
+ __ subptr(rsp, 8);
+ __ movdbl(Address(rsp, 0), $src$$XMMRegister);
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_fixup())));
+ __ pop($dst$$Register);
+ __ bind(done);
+ %}
ins_pipe(pipe_slow);
%}
@@ -11047,8 +10127,9 @@ instruct convI2F_reg_reg(regF dst, rRegI src)
match(Set dst (ConvI2F src));
format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
- opcode(0xF3, 0x0F, 0x2A);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+ ins_encode %{
+ __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -11057,8 +10138,9 @@ instruct convI2F_reg_mem(regF dst, memory src)
match(Set dst (ConvI2F (LoadI src)));
format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
- opcode(0xF3, 0x0F, 0x2A);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+ ins_encode %{
+ __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -11068,8 +10150,9 @@ instruct convI2D_reg_reg(regD dst, rRegI src)
match(Set dst (ConvI2D src));
format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
- opcode(0xF2, 0x0F, 0x2A);
- ins_encode(OpcP, REX_reg_reg(dst, src), OpcS, OpcT, reg_reg(dst, src));
+ ins_encode %{
+ __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -11078,8 +10161,9 @@ instruct convI2D_reg_mem(regD dst, memory src)
match(Set dst (ConvI2D (LoadI src)));
format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
- opcode(0xF2, 0x0F, 0x2A);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+ ins_encode %{
+ __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -11116,8 +10200,9 @@ instruct convL2F_reg_reg(regF dst, rRegL src)
match(Set dst (ConvL2F src));
format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
- opcode(0xF3, 0x0F, 0x2A);
- ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
+ ins_encode %{
+ __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -11126,8 +10211,9 @@ instruct convL2F_reg_mem(regF dst, memory src)
match(Set dst (ConvL2F (LoadL src)));
format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
- opcode(0xF3, 0x0F, 0x2A);
- ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
+ ins_encode %{
+ __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -11136,8 +10222,9 @@ instruct convL2D_reg_reg(regD dst, rRegL src)
match(Set dst (ConvL2D src));
format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
- opcode(0xF2, 0x0F, 0x2A);
- ins_encode(OpcP, REX_reg_reg_wide(dst, src), OpcS, OpcT, reg_reg(dst, src));
+ ins_encode %{
+ __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -11146,8 +10233,9 @@ instruct convL2D_reg_mem(regD dst, memory src)
match(Set dst (ConvL2D (LoadL src)));
format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
- opcode(0xF2, 0x0F, 0x2A);
- ins_encode(OpcP, REX_reg_mem_wide(dst, src), OpcS, OpcT, reg_mem(dst, src));
+ ins_encode %{
+ __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
+ %}
ins_pipe(pipe_slow); // XXX
%}
@@ -11186,7 +10274,11 @@ instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
match(Set dst (AndL (ConvI2L src) mask));
format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
- ins_encode(enc_copy(dst, src));
+ ins_encode %{
+ if ($dst$$reg != $src$$reg) {
+ __ movl($dst$$Register, $src$$Register);
+ }
+ %}
ins_pipe(ialu_reg_reg);
%}
@@ -11196,8 +10288,9 @@ instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
match(Set dst (AndL (ConvI2L (LoadI src)) mask));
format %{ "movl $dst, $src\t# i2l zero-extend\n\t" %}
- opcode(0x8B);
- ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
+ ins_encode %{
+ __ movl($dst$$Register, $src$$Address);
+ %}
ins_pipe(ialu_reg_mem);
%}
@@ -11206,7 +10299,9 @@ instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
match(Set dst (AndL src mask));
format %{ "movl $dst, $src\t# zero-extend long" %}
- ins_encode(enc_copy_always(dst, src));
+ ins_encode %{
+ __ movl($dst$$Register, $src$$Register);
+ %}
ins_pipe(ialu_reg_reg);
%}
@@ -11215,7 +10310,9 @@ instruct convL2I_reg_reg(rRegI dst, rRegL src)
match(Set dst (ConvL2I src));
format %{ "movl $dst, $src\t# l2i" %}
- ins_encode(enc_copy_always(dst, src));
+ ins_encode %{
+ __ movl($dst$$Register, $src$$Register);
+ %}
ins_pipe(ialu_reg_reg);
%}
@@ -11226,8 +10323,9 @@ instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
ins_cost(125);
format %{ "movl $dst, $src\t# MoveF2I_stack_reg" %}
- opcode(0x8B);
- ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
+ ins_encode %{
+ __ movl($dst$$Register, Address(rsp, $src$$disp));
+ %}
ins_pipe(ialu_reg_mem);
%}
@@ -11237,8 +10335,9 @@ instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
ins_cost(125);
format %{ "movss $dst, $src\t# MoveI2F_stack_reg" %}
- opcode(0xF3, 0x0F, 0x10);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+ ins_encode %{
+ __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
+ %}
ins_pipe(pipe_slow);
%}
@@ -11248,8 +10347,9 @@ instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
ins_cost(125);
format %{ "movq $dst, $src\t# MoveD2L_stack_reg" %}
- opcode(0x8B);
- ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
+ ins_encode %{
+ __ movq($dst$$Register, Address(rsp, $src$$disp));
+ %}
ins_pipe(ialu_reg_mem);
%}
@@ -11260,8 +10360,9 @@ instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
ins_cost(125);
format %{ "movlpd $dst, $src\t# MoveL2D_stack_reg" %}
- opcode(0x66, 0x0F, 0x12);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
+ %}
ins_pipe(pipe_slow);
%}
@@ -11272,8 +10373,9 @@ instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
ins_cost(125);
format %{ "movsd $dst, $src\t# MoveL2D_stack_reg" %}
- opcode(0xF2, 0x0F, 0x10);
- ins_encode(OpcP, REX_reg_mem(dst, src), OpcS, OpcT, reg_mem(dst, src));
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
+ %}
ins_pipe(pipe_slow);
%}
@@ -11284,8 +10386,9 @@ instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
ins_cost(95); // XXX
format %{ "movss $dst, $src\t# MoveF2I_reg_stack" %}
- opcode(0xF3, 0x0F, 0x11);
- ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
+ ins_encode %{
+ __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
+ %}
ins_pipe(pipe_slow);
%}
@@ -11295,8 +10398,9 @@ instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
ins_cost(100);
format %{ "movl $dst, $src\t# MoveI2F_reg_stack" %}
- opcode(0x89);
- ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
+ ins_encode %{
+ __ movl(Address(rsp, $dst$$disp), $src$$Register);
+ %}
ins_pipe( ialu_mem_reg );
%}
@@ -11306,8 +10410,9 @@ instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
ins_cost(95); // XXX
format %{ "movsd $dst, $src\t# MoveL2D_reg_stack" %}
- opcode(0xF2, 0x0F, 0x11);
- ins_encode(OpcP, REX_reg_mem(src, dst), OpcS, OpcT, reg_mem(src, dst));
+ ins_encode %{
+ __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
+ %}
ins_pipe(pipe_slow);
%}
@@ -11317,8 +10422,9 @@ instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
ins_cost(100);
format %{ "movq $dst, $src\t# MoveL2D_reg_stack" %}
- opcode(0x89);
- ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
+ ins_encode %{
+ __ movq(Address(rsp, $dst$$disp), $src$$Register);
+ %}
ins_pipe(ialu_mem_reg);
%}
@@ -11327,7 +10433,9 @@ instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
effect(DEF dst, USE src);
ins_cost(85);
format %{ "movd $dst,$src\t# MoveF2I" %}
- ins_encode %{ __ movdl($dst$$Register, $src$$XMMRegister); %}
+ ins_encode %{
+ __ movdl($dst$$Register, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -11336,7 +10444,9 @@ instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
effect(DEF dst, USE src);
ins_cost(85);
format %{ "movd $dst,$src\t# MoveD2L" %}
- ins_encode %{ __ movdq($dst$$Register, $src$$XMMRegister); %}
+ ins_encode %{
+ __ movdq($dst$$Register, $src$$XMMRegister);
+ %}
ins_pipe( pipe_slow );
%}
@@ -11346,7 +10456,9 @@ instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
effect(DEF dst, USE src);
ins_cost(300);
format %{ "movd $dst,$src\t# MoveI2F" %}
- ins_encode %{ __ movdl($dst$$XMMRegister, $src$$Register); %}
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ %}
ins_pipe( pipe_slow );
%}
@@ -11355,7 +10467,9 @@ instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
effect(DEF dst, USE src);
ins_cost(300);
format %{ "movd $dst,$src\t# MoveL2D" %}
- ins_encode %{ __ movdq($dst$$XMMRegister, $src$$Register); %}
+ ins_encode %{
+ __ movdq($dst$$XMMRegister, $src$$Register);
+ %}
ins_pipe( pipe_slow );
%}
@@ -11365,7 +10479,13 @@ instruct Repl8B_reg(regD dst, regD src) %{
format %{ "MOVDQA $dst,$src\n\t"
"PUNPCKLBW $dst,$dst\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
- ins_encode( pshufd_8x8(dst, src));
+ ins_encode %{
+ if ($dst$$reg != $src$$reg) {
+ __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
+ }
+ __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
ins_pipe( pipe_slow );
%}
@@ -11375,7 +10495,11 @@ instruct Repl8B_rRegI(regD dst, rRegI src) %{
format %{ "MOVD $dst,$src\n\t"
"PUNPCKLBW $dst,$dst\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
- ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
ins_pipe( pipe_slow );
%}
@@ -11383,7 +10507,9 @@ instruct Repl8B_rRegI(regD dst, rRegI src) %{
instruct Repl8B_immI0(regD dst, immI0 zero) %{
match(Set dst (Replicate8B zero));
format %{ "PXOR $dst,$dst\t! replicate8B" %}
- ins_encode( pxor(dst, dst));
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11391,7 +10517,9 @@ instruct Repl8B_immI0(regD dst, immI0 zero) %{
instruct Repl4S_reg(regD dst, regD src) %{
match(Set dst (Replicate4S src));
format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
- ins_encode( pshufd_4x16(dst, src));
+ ins_encode %{
+ __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11400,7 +10528,10 @@ instruct Repl4S_rRegI(regD dst, rRegI src) %{
match(Set dst (Replicate4S src));
format %{ "MOVD $dst,$src\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
- ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11408,7 +10539,9 @@ instruct Repl4S_rRegI(regD dst, rRegI src) %{
instruct Repl4S_immI0(regD dst, immI0 zero) %{
match(Set dst (Replicate4S zero));
format %{ "PXOR $dst,$dst\t! replicate4S" %}
- ins_encode( pxor(dst, dst));
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11416,7 +10549,9 @@ instruct Repl4S_immI0(regD dst, immI0 zero) %{
instruct Repl4C_reg(regD dst, regD src) %{
match(Set dst (Replicate4C src));
format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
- ins_encode( pshufd_4x16(dst, src));
+ ins_encode %{
+ __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11425,7 +10560,10 @@ instruct Repl4C_rRegI(regD dst, rRegI src) %{
match(Set dst (Replicate4C src));
format %{ "MOVD $dst,$src\n\t"
"PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
- ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11433,7 +10571,9 @@ instruct Repl4C_rRegI(regD dst, rRegI src) %{
instruct Repl4C_immI0(regD dst, immI0 zero) %{
match(Set dst (Replicate4C zero));
format %{ "PXOR $dst,$dst\t! replicate4C" %}
- ins_encode( pxor(dst, dst));
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11441,7 +10581,9 @@ instruct Repl4C_immI0(regD dst, immI0 zero) %{
instruct Repl2I_reg(regD dst, regD src) %{
match(Set dst (Replicate2I src));
format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
- ins_encode( pshufd(dst, src, 0x00));
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11450,7 +10592,10 @@ instruct Repl2I_rRegI(regD dst, rRegI src) %{
match(Set dst (Replicate2I src));
format %{ "MOVD $dst,$src\n\t"
"PSHUFD $dst,$dst,0x00\t! replicate2I" %}
- ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
+ ins_encode %{
+ __ movdl($dst$$XMMRegister, $src$$Register);
+ __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11458,7 +10603,9 @@ instruct Repl2I_rRegI(regD dst, rRegI src) %{
instruct Repl2I_immI0(regD dst, immI0 zero) %{
match(Set dst (Replicate2I zero));
format %{ "PXOR $dst,$dst\t! replicate2I" %}
- ins_encode( pxor(dst, dst));
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11466,7 +10613,9 @@ instruct Repl2I_immI0(regD dst, immI0 zero) %{
instruct Repl2F_reg(regD dst, regD src) %{
match(Set dst (Replicate2F src));
format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
- ins_encode( pshufd(dst, src, 0xe0));
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11474,7 +10623,9 @@ instruct Repl2F_reg(regD dst, regD src) %{
instruct Repl2F_regF(regD dst, regF src) %{
match(Set dst (Replicate2F src));
format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
- ins_encode( pshufd(dst, src, 0xe0));
+ ins_encode %{
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -11482,7 +10633,9 @@ instruct Repl2F_regF(regD dst, regF src) %{
instruct Repl2F_immF0(regD dst, immF0 zero) %{
match(Set dst (Replicate2F zero));
format %{ "PXOR $dst,$dst\t! replicate2F" %}
- ins_encode( pxor(dst, dst));
+ ins_encode %{
+ __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
ins_pipe( fpu_reg_reg );
%}
@@ -12162,12 +11315,12 @@ instruct partialSubtypeCheck(rdi_RegP result,
effect(KILL rcx, KILL cr);
ins_cost(1100); // slightly larger than the next version
- format %{ "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
+ format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
"movl rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
"addq rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
"repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
"jne,s miss\t\t# Missed: rdi not-zero\n\t"
- "movq [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
+ "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
"xorq $result, $result\t\t Hit: rdi zero\n\t"
"miss:\t" %}
@@ -12185,12 +11338,12 @@ instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
effect(KILL rcx, KILL result);
ins_cost(1000);
- format %{ "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
+ format %{ "movq rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
"movl rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
"addq rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
"repne scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
"jne,s miss\t\t# Missed: flags nz\n\t"
- "movq [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
+ "movq [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
"miss:\t" %}
opcode(0x0); // No need to XOR RDI
@@ -12358,13 +11511,13 @@ instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
// inlined locking and unlocking
instruct cmpFastLock(rFlagsReg cr,
- rRegP object, rRegP box, rax_RegI tmp, rRegP scr)
+ rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr)
%{
match(Set cr (FastLock object box));
- effect(TEMP tmp, TEMP scr);
+ effect(TEMP tmp, TEMP scr, USE_KILL box);
ins_cost(300);
- format %{ "fastlock $object,$box,$tmp,$scr" %}
+ format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
ins_encode(Fast_Lock(object, box, tmp, scr));
ins_pipe(pipe_slow);
%}
@@ -12373,10 +11526,10 @@ instruct cmpFastUnlock(rFlagsReg cr,
rRegP object, rax_RegP box, rRegP tmp)
%{
match(Set cr (FastUnlock object box));
- effect(TEMP tmp);
+ effect(TEMP tmp, USE_KILL box);
ins_cost(300);
- format %{ "fastunlock $object, $box, $tmp" %}
+ format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
ins_encode(Fast_Unlock(object, box, tmp));
ins_pipe(pipe_slow);
%}
diff --git a/hotspot/src/os/bsd/vm/os_bsd.cpp b/hotspot/src/os/bsd/vm/os_bsd.cpp
index c4af904a338..c6e27b273fd 100644
--- a/hotspot/src/os/bsd/vm/os_bsd.cpp
+++ b/hotspot/src/os/bsd/vm/os_bsd.cpp
@@ -2835,7 +2835,7 @@ void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
#endif
}
-void os::free_memory(char *addr, size_t bytes) {
+void os::free_memory(char *addr, size_t bytes, size_t alignment_hint) {
::madvise(addr, bytes, MADV_DONTNEED);
}
diff --git a/hotspot/src/os/linux/vm/os_linux.cpp b/hotspot/src/os/linux/vm/os_linux.cpp
index a141f6a665c..0946b753eab 100644
--- a/hotspot/src/os/linux/vm/os_linux.cpp
+++ b/hotspot/src/os/linux/vm/os_linux.cpp
@@ -2546,8 +2546,8 @@ void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
}
}
-void os::free_memory(char *addr, size_t bytes) {
- commit_memory(addr, bytes, false);
+void os::free_memory(char *addr, size_t bytes, size_t alignment_hint) {
+ commit_memory(addr, bytes, alignment_hint, false);
}
void os::numa_make_global(char *addr, size_t bytes) {
diff --git a/hotspot/src/os/posix/vm/os_posix.cpp b/hotspot/src/os/posix/vm/os_posix.cpp
index 4795d06fbeb..42078342ec9 100644
--- a/hotspot/src/os/posix/vm/os_posix.cpp
+++ b/hotspot/src/os/posix/vm/os_posix.cpp
@@ -59,6 +59,10 @@ void os::check_or_create_dump(void* exceptionRecord, void* contextRecord, char*
VMError::report_coredump_status(buffer, success);
}
+int os::get_last_error() {
+ return errno;
+}
+
bool os::is_debugger_attached() {
// not implemented
return false;
diff --git a/hotspot/src/os/solaris/vm/os_solaris.cpp b/hotspot/src/os/solaris/vm/os_solaris.cpp
index f4043e19595..925152c3bb1 100644
--- a/hotspot/src/os/solaris/vm/os_solaris.cpp
+++ b/hotspot/src/os/solaris/vm/os_solaris.cpp
@@ -2821,7 +2821,7 @@ bool os::commit_memory(char* addr, size_t bytes, size_t alignment_hint,
}
// Uncommit the pages in a specified region.
-void os::free_memory(char* addr, size_t bytes) {
+void os::free_memory(char* addr, size_t bytes, size_t alignment_hint) {
if (madvise(addr, bytes, MADV_FREE) < 0) {
debug_only(warning("MADV_FREE failed."));
return;
diff --git a/hotspot/src/os/windows/vm/os_windows.cpp b/hotspot/src/os/windows/vm/os_windows.cpp
index 01ccc179e3b..889faf54010 100644
--- a/hotspot/src/os/windows/vm/os_windows.cpp
+++ b/hotspot/src/os/windows/vm/os_windows.cpp
@@ -132,7 +132,6 @@ PVOID topLevelVectoredExceptionHandler = NULL;
// save DLL module handle, used by GetModuleFileName
HINSTANCE vm_lib_handle;
-static int getLastErrorString(char *buf, size_t len);
BOOL WINAPI DllMain(HINSTANCE hinst, DWORD reason, LPVOID reserved) {
switch (reason) {
@@ -1452,7 +1451,7 @@ void * os::dll_load(const char *name, char *ebuf, int ebuflen)
return result;
}
- long errcode = GetLastError();
+ DWORD errcode = GetLastError();
if (errcode == ERROR_MOD_NOT_FOUND) {
strncpy(ebuf, "Can't find dependent libraries", ebuflen-1);
ebuf[ebuflen-1]='\0';
@@ -1463,11 +1462,11 @@ void * os::dll_load(const char *name, char *ebuf, int ebuflen)
// If we can read dll-info and find that dll was built
// for an architecture other than Hotspot is running in
// - then print to buffer "DLL was built for a different architecture"
- // else call getLastErrorString to obtain system error message
+ // else call os::lasterror to obtain system error message
// Read system error message into ebuf
// It may or may not be overwritten below (in the for loop and just above)
- getLastErrorString(ebuf, (size_t) ebuflen);
+ lasterror(ebuf, (size_t) ebuflen);
ebuf[ebuflen-1]='\0';
int file_descriptor=::open(name, O_RDONLY | O_BINARY, 0);
if (file_descriptor<0)
@@ -1500,7 +1499,7 @@ void * os::dll_load(const char *name, char *ebuf, int ebuflen)
::close(file_descriptor);
if (failed_to_get_lib_arch)
{
- // file i/o error - report getLastErrorString(...) msg
+ // file i/o error - report os::lasterror(...) msg
return NULL;
}
@@ -1543,7 +1542,7 @@ void * os::dll_load(const char *name, char *ebuf, int ebuflen)
"Didn't find runing architecture code in arch_array");
// If the architure is right
- // but some other error took place - report getLastErrorString(...) msg
+ // but some other error took place - report os::lasterror(...) msg
if (lib_arch == running_arch)
{
return NULL;
@@ -1775,12 +1774,12 @@ void os::print_jni_name_suffix_on(outputStream* st, int args_size) {
// This method is a copy of JDK's sysGetLastErrorString
// from src/windows/hpi/src/system_md.c
-size_t os::lasterror(char *buf, size_t len) {
- long errval;
+size_t os::lasterror(char* buf, size_t len) {
+ DWORD errval;
if ((errval = GetLastError()) != 0) {
- /* DOS error */
- int n = (int)FormatMessage(
+ // DOS error
+ size_t n = (size_t)FormatMessage(
FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS,
NULL,
errval,
@@ -1789,7 +1788,7 @@ size_t os::lasterror(char *buf, size_t len) {
(DWORD)len,
NULL);
if (n > 3) {
- /* Drop final '.', CR, LF */
+ // Drop final '.', CR, LF
if (buf[n - 1] == '\n') n--;
if (buf[n - 1] == '\r') n--;
if (buf[n - 1] == '.') n--;
@@ -1799,17 +1798,25 @@ size_t os::lasterror(char *buf, size_t len) {
}
if (errno != 0) {
- /* C runtime error that has no corresponding DOS error code */
- const char *s = strerror(errno);
+ // C runtime error that has no corresponding DOS error code
+ const char* s = strerror(errno);
size_t n = strlen(s);
if (n >= len) n = len - 1;
strncpy(buf, s, n);
buf[n] = '\0';
return n;
}
+
return 0;
}
+int os::get_last_error() {
+ DWORD error = GetLastError();
+ if (error == 0)
+ error = errno;
+ return (int)error;
+}
+
// sun.misc.Signal
// NOTE that this is a workaround for an apparent kernel bug where if
// a signal handler for SIGBREAK is installed then that signal handler
@@ -3130,7 +3137,7 @@ bool os::unguard_memory(char* addr, size_t bytes) {
}
void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
-void os::free_memory(char *addr, size_t bytes) { }
+void os::free_memory(char *addr, size_t bytes, size_t alignment_hint) { }
void os::numa_make_global(char *addr, size_t bytes) { }
void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) { }
bool os::numa_topology_changed() { return false; }
@@ -4746,7 +4753,7 @@ bool os::check_heap(bool force) {
fatal("corrupted C heap");
}
}
- int err = GetLastError();
+ DWORD err = GetLastError();
if (err != ERROR_NO_MORE_ITEMS && err != ERROR_CALL_NOT_IMPLEMENTED) {
fatal(err_msg("heap walk aborted with error %d", err));
}
@@ -4778,45 +4785,6 @@ LONG WINAPI os::win32::serialize_fault_filter(struct _EXCEPTION_POINTERS* e) {
return EXCEPTION_CONTINUE_SEARCH;
}
-static int getLastErrorString(char *buf, size_t len)
-{
- long errval;
-
- if ((errval = GetLastError()) != 0)
- {
- /* DOS error */
- size_t n = (size_t)FormatMessage(
- FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS,
- NULL,
- errval,
- 0,
- buf,
- (DWORD)len,
- NULL);
- if (n > 3) {
- /* Drop final '.', CR, LF */
- if (buf[n - 1] == '\n') n--;
- if (buf[n - 1] == '\r') n--;
- if (buf[n - 1] == '.') n--;
- buf[n] = '\0';
- }
- return (int)n;
- }
-
- if (errno != 0)
- {
- /* C runtime error that has no corresponding DOS error code */
- const char *s = strerror(errno);
- size_t n = strlen(s);
- if (n >= len) n = len - 1;
- strncpy(buf, s, n);
- buf[n] = '\0';
- return (int)n;
- }
- return 0;
-}
-
-
// We don't build a headless jre for Windows
bool os::is_headless_jre() { return false; }
diff --git a/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.hpp b/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.hpp
index 7a4e71081cf..f109e246fae 100644
--- a/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.hpp
+++ b/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.hpp
@@ -28,6 +28,8 @@
static void setup_fpu();
static bool supports_sse();
+ static jlong rdtsc();
+
static bool is_allocatable(size_t bytes);
// Used to register dynamic code cache area with the OS
diff --git a/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.inline.hpp b/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.inline.hpp
new file mode 100644
index 00000000000..67b2482aaf3
--- /dev/null
+++ b/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.inline.hpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_BSD_X86_VM_OS_BSD_X86_INLINE_HPP
+#define OS_CPU_BSD_X86_VM_OS_BSD_X86_INLINE_HPP
+
+#include "runtime/os.hpp"
+
+// See http://www.technovelty.org/code/c/reading-rdtsc.htl for details
+inline jlong os::rdtsc() {
+#ifndef AMD64
+ // 64 bit result in edx:eax
+ uint64_t res;
+ __asm__ __volatile__ ("rdtsc" : "=A" (res));
+ return (jlong)res;
+#else
+ uint64_t res;
+ uint32_t ts1, ts2;
+ __asm__ __volatile__ ("rdtsc" : "=a" (ts1), "=d" (ts2));
+ res = ((uint64_t)ts1 | (uint64_t)ts2 << 32);
+ return (jlong)res;
+#endif // AMD64
+}
+
+#endif // OS_CPU_BSD_X86_VM_OS_BSD_X86_INLINE_HPP
diff --git a/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.hpp b/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.hpp
index 64954d480f8..9bb22f8e6ba 100644
--- a/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.hpp
+++ b/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,8 @@
static void setup_fpu();
static bool supports_sse();
+ static jlong rdtsc();
+
static bool is_allocatable(size_t bytes);
// Used to register dynamic code cache area with the OS
diff --git a/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.inline.hpp b/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.inline.hpp
new file mode 100644
index 00000000000..fee719b01b8
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.inline.hpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_X86_VM_OS_LINUX_X86_INLINE_HPP
+#define OS_CPU_LINUX_X86_VM_OS_LINUX_X86_INLINE_HPP
+
+#include "runtime/os.hpp"
+
+// See http://www.technovelty.org/code/c/reading-rdtsc.htl for details
+inline jlong os::rdtsc() {
+#ifndef AMD64
+ // 64 bit result in edx:eax
+ uint64_t res;
+ __asm__ __volatile__ ("rdtsc" : "=A" (res));
+ return (jlong)res;
+#else
+ uint64_t res;
+ uint32_t ts1, ts2;
+ __asm__ __volatile__ ("rdtsc" : "=a" (ts1), "=d" (ts2));
+ res = ((uint64_t)ts1 | (uint64_t)ts2 << 32);
+ return (jlong)res;
+#endif // AMD64
+}
+
+#endif // OS_CPU_LINUX_X86_VM_OS_LINUX_X86_INLINE_HPP
diff --git a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp
index 5841fb3eb59..fd4f15282b4 100644
--- a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp
+++ b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp
@@ -46,6 +46,8 @@
static bool supports_sse();
+ static jlong rdtsc();
+
static bool is_allocatable(size_t bytes);
// Used to register dynamic code cache area with the OS
diff --git a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.inline.hpp b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.inline.hpp
new file mode 100644
index 00000000000..986884e031f
--- /dev/null
+++ b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.inline.hpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_SOLARIS_X86_VM_OS_SOLARIS_X86_INLINE_HPP
+#define OS_CPU_SOLARIS_X86_VM_OS_SOLARIS_X86_INLINE_HPP
+
+#include "runtime/os.hpp"
+
+inline jlong os::rdtsc() { return _raw_rdtsc(); }
+
+#endif // OS_CPU_SOLARIS_X86_VM_OS_SOLARIS_X86_INLINE_HPP
diff --git a/hotspot/src/os_cpu/solaris_x86/vm/solaris_x86_32.il b/hotspot/src/os_cpu/solaris_x86/vm/solaris_x86_32.il
index 056cb140293..b635a8292e6 100644
--- a/hotspot/src/os_cpu/solaris_x86/vm/solaris_x86_32.il
+++ b/hotspot/src/os_cpu/solaris_x86/vm/solaris_x86_32.il
@@ -43,6 +43,11 @@
movl %ebp, %eax
.end
+ // Support for os::rdtsc()
+ .inline _raw_rdtsc,0
+ rdtsc
+ .end
+
// Support for jint Atomic::add(jint inc, volatile jint* dest)
// An additional bool (os::is_MP()) is passed as the last argument.
.inline _Atomic_add,3
@@ -113,7 +118,6 @@
fistpll (%eax)
.end
-
// Support for OrderAccess::acquire()
.inline _OrderAccess_acquire,0
movl 0(%esp), %eax
diff --git a/hotspot/src/os_cpu/solaris_x86/vm/solaris_x86_64.il b/hotspot/src/os_cpu/solaris_x86/vm/solaris_x86_64.il
index b260375b164..fb7946b8c5c 100644
--- a/hotspot/src/os_cpu/solaris_x86/vm/solaris_x86_64.il
+++ b/hotspot/src/os_cpu/solaris_x86/vm/solaris_x86_64.il
@@ -30,12 +30,19 @@
movq %fs:0, %rax
.end
- // Get the frame pointer from current frame.
+ // Get current fp
.inline _get_current_fp,0
.volatile
movq %rbp, %rax
.end
+ // Support for os::rdtsc()
+ .inline _raw_rdtsc,0
+ rdtsc
+ salq $32, %rdx
+ orq %rdx, %rax
+ .end
+
// Support for jint Atomic::add(jint add_value, volatile jint* dest)
.inline _Atomic_add,2
movl %edi, %eax // save add_value for return
diff --git a/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.hpp b/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.hpp
index e7c3303c0f7..74e3519c901 100644
--- a/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.hpp
+++ b/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.hpp
@@ -58,6 +58,8 @@
static void setup_fpu();
static bool supports_sse() { return true; }
+ static jlong rdtsc();
+
static bool register_code_area(char *low, char *high);
#endif // OS_CPU_WINDOWS_X86_VM_OS_WINDOWS_X86_HPP
diff --git a/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.inline.hpp b/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.inline.hpp
new file mode 100644
index 00000000000..d108e3e4202
--- /dev/null
+++ b/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.inline.hpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_WINDOWS_X86_VM_OS_WINDOWS_X86_INLINE_HPP
+#define OS_CPU_WINDOWS_X86_VM_OS_WINDOWS_X86_INLINE_HPP
+
+#include "runtime/os.hpp"
+
+inline jlong os::rdtsc() {
+ // 32 bit: 64 bit result in edx:eax
+ // 64 bit: 64 bit value in rax
+ uint64_t res;
+ res = (uint64_t)__rdtsc();
+ return (jlong)res;
+}
+
+#endif // OS_CPU_WINDOWS_X86_VM_OS_WINDOWS_X86_INLINE_HPP
diff --git a/hotspot/src/share/vm/adlc/formssel.cpp b/hotspot/src/share/vm/adlc/formssel.cpp
index c1d39849bee..3c69d6a0da8 100644
--- a/hotspot/src/share/vm/adlc/formssel.cpp
+++ b/hotspot/src/share/vm/adlc/formssel.cpp
@@ -627,6 +627,7 @@ bool InstructForm::is_wide_memory_kill(FormDict &globals) const {
if( strcmp(_matrule->_opType,"MemBarAcquire") == 0 ) return true;
if( strcmp(_matrule->_opType,"MemBarReleaseLock") == 0 ) return true;
if( strcmp(_matrule->_opType,"MemBarAcquireLock") == 0 ) return true;
+ if( strcmp(_matrule->_opType,"MemBarStoreStore") == 0 ) return true;
return false;
}
@@ -3978,7 +3979,8 @@ bool MatchRule::is_ideal_membar() const {
!strcmp(_opType,"MemBarAcquireLock") ||
!strcmp(_opType,"MemBarReleaseLock") ||
!strcmp(_opType,"MemBarVolatile" ) ||
- !strcmp(_opType,"MemBarCPUOrder" ) ;
+ !strcmp(_opType,"MemBarCPUOrder" ) ||
+ !strcmp(_opType,"MemBarStoreStore" );
}
bool MatchRule::is_ideal_loadPC() const {
diff --git a/hotspot/src/share/vm/asm/assembler.cpp b/hotspot/src/share/vm/asm/assembler.cpp
index 86011e97496..2bcdcbc884d 100644
--- a/hotspot/src/share/vm/asm/assembler.cpp
+++ b/hotspot/src/share/vm/asm/assembler.cpp
@@ -61,6 +61,7 @@ AbstractAssembler::AbstractAssembler(CodeBuffer* code) {
_code_limit = cs->limit();
_code_pos = cs->end();
_oop_recorder= code->oop_recorder();
+ DEBUG_ONLY( _short_branch_delta = 0; )
if (_code_begin == NULL) {
vm_exit_out_of_memory(0, err_msg("CodeCache: no room for %s",
code->name()));
diff --git a/hotspot/src/share/vm/asm/assembler.hpp b/hotspot/src/share/vm/asm/assembler.hpp
index 8db7eef2ea5..c25aa3fca3f 100644
--- a/hotspot/src/share/vm/asm/assembler.hpp
+++ b/hotspot/src/share/vm/asm/assembler.hpp
@@ -241,6 +241,33 @@ class AbstractAssembler : public ResourceObj {
// Make it return true on platforms which need to verify
// instruction boundaries for some operations.
inline static bool pd_check_instruction_mark();
+
+ // Add delta to short branch distance to verify that it still fit into imm8.
+ int _short_branch_delta;
+
+ int short_branch_delta() const { return _short_branch_delta; }
+ void set_short_branch_delta() { _short_branch_delta = 32; }
+ void clear_short_branch_delta() { _short_branch_delta = 0; }
+
+ class ShortBranchVerifier: public StackObj {
+ private:
+ AbstractAssembler* _assm;
+
+ public:
+ ShortBranchVerifier(AbstractAssembler* assm) : _assm(assm) {
+ assert(assm->short_branch_delta() == 0, "overlapping instructions");
+ _assm->set_short_branch_delta();
+ }
+ ~ShortBranchVerifier() {
+ _assm->clear_short_branch_delta();
+ }
+ };
+ #else
+ // Dummy in product.
+ class ShortBranchVerifier: public StackObj {
+ public:
+ ShortBranchVerifier(AbstractAssembler* assm) {}
+ };
#endif
// Label functions
diff --git a/hotspot/src/share/vm/c1/c1_LIR.cpp b/hotspot/src/share/vm/c1/c1_LIR.cpp
index 267b966641c..629d849bc60 100644
--- a/hotspot/src/share/vm/c1/c1_LIR.cpp
+++ b/hotspot/src/share/vm/c1/c1_LIR.cpp
@@ -854,6 +854,9 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
if (opTypeCheck->_info_for_exception) do_info(opTypeCheck->_info_for_exception);
if (opTypeCheck->_info_for_patch) do_info(opTypeCheck->_info_for_patch);
if (opTypeCheck->_object->is_valid()) do_input(opTypeCheck->_object);
+ if (op->code() == lir_store_check && opTypeCheck->_object->is_valid()) {
+ do_temp(opTypeCheck->_object);
+ }
if (opTypeCheck->_array->is_valid()) do_input(opTypeCheck->_array);
if (opTypeCheck->_tmp1->is_valid()) do_temp(opTypeCheck->_tmp1);
if (opTypeCheck->_tmp2->is_valid()) do_temp(opTypeCheck->_tmp2);
diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
index 0491d71565e..5f3155d412a 100644
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
@@ -1256,8 +1256,7 @@ void LIRGenerator::do_getClass(Intrinsic* x) {
info = state_for(x);
}
__ move(new LIR_Address(rcvr.result(), oopDesc::klass_offset_in_bytes(), T_OBJECT), result, info);
- __ move_wide(new LIR_Address(result, Klass::java_mirror_offset_in_bytes() +
- klassOopDesc::klass_part_offset_in_bytes(), T_OBJECT), result);
+ __ move_wide(new LIR_Address(result, in_bytes(Klass::java_mirror_offset()), T_OBJECT), result);
}
diff --git a/hotspot/src/share/vm/c1/c1_Optimizer.cpp b/hotspot/src/share/vm/c1/c1_Optimizer.cpp
index 5c3640b1ad7..e1a0ef7bfdd 100644
--- a/hotspot/src/share/vm/c1/c1_Optimizer.cpp
+++ b/hotspot/src/share/vm/c1/c1_Optimizer.cpp
@@ -122,18 +122,32 @@ void CE_Eliminator::block_do(BlockBegin* block) {
if (sux != f_goto->default_sux()) return;
// check if at least one word was pushed on sux_state
+ // inlining depths must match
+ ValueStack* if_state = if_->state();
ValueStack* sux_state = sux->state();
- if (sux_state->stack_size() <= if_->state()->stack_size()) return;
+ if (if_state->scope()->level() > sux_state->scope()->level()) {
+ while (sux_state->scope() != if_state->scope()) {
+ if_state = if_state->caller_state();
+ assert(if_state != NULL, "states do not match up");
+ }
+ } else if (if_state->scope()->level() < sux_state->scope()->level()) {
+ while (sux_state->scope() != if_state->scope()) {
+ sux_state = sux_state->caller_state();
+ assert(sux_state != NULL, "states do not match up");
+ }
+ }
+
+ if (sux_state->stack_size() <= if_state->stack_size()) return;
// check if phi function is present at end of successor stack and that
// only this phi was pushed on the stack
- Value sux_phi = sux_state->stack_at(if_->state()->stack_size());
+ Value sux_phi = sux_state->stack_at(if_state->stack_size());
if (sux_phi == NULL || sux_phi->as_Phi() == NULL || sux_phi->as_Phi()->block() != sux) return;
- if (sux_phi->type()->size() != sux_state->stack_size() - if_->state()->stack_size()) return;
+ if (sux_phi->type()->size() != sux_state->stack_size() - if_state->stack_size()) return;
// get the values that were pushed in the true- and false-branch
- Value t_value = t_goto->state()->stack_at(if_->state()->stack_size());
- Value f_value = f_goto->state()->stack_at(if_->state()->stack_size());
+ Value t_value = t_goto->state()->stack_at(if_state->stack_size());
+ Value f_value = f_goto->state()->stack_at(if_state->stack_size());
// backend does not support floats
assert(t_value->type()->base() == f_value->type()->base(), "incompatible types");
@@ -180,11 +194,7 @@ void CE_Eliminator::block_do(BlockBegin* block) {
Goto* goto_ = new Goto(sux, state_before, if_->is_safepoint() || t_goto->is_safepoint() || f_goto->is_safepoint());
// prepare state for Goto
- ValueStack* goto_state = if_->state();
- while (sux_state->scope() != goto_state->scope()) {
- goto_state = goto_state->caller_state();
- assert(goto_state != NULL, "states do not match up");
- }
+ ValueStack* goto_state = if_state;
goto_state = goto_state->copy(ValueStack::StateAfter, goto_state->bci());
goto_state->push(result->type(), result);
assert(goto_state->is_same(sux_state), "states must match now");
diff --git a/hotspot/src/share/vm/ci/ciInstanceKlass.cpp b/hotspot/src/share/vm/ci/ciInstanceKlass.cpp
index f10b92fa33d..b7c2ab75850 100644
--- a/hotspot/src/share/vm/ci/ciInstanceKlass.cpp
+++ b/hotspot/src/share/vm/ci/ciInstanceKlass.cpp
@@ -54,7 +54,7 @@ ciInstanceKlass::ciInstanceKlass(KlassHandle h_k) :
_flags = ciFlags(access_flags);
_has_finalizer = access_flags.has_finalizer();
_has_subklass = ik->subklass() != NULL;
- _init_state = (instanceKlass::ClassState)ik->get_init_state();
+ _init_state = ik->init_state();
_nonstatic_field_size = ik->nonstatic_field_size();
_has_nonstatic_fields = ik->has_nonstatic_fields();
_nonstatic_fields = NULL; // initialized lazily by compute_nonstatic_fields:
@@ -118,7 +118,7 @@ ciInstanceKlass::ciInstanceKlass(ciSymbol* name,
void ciInstanceKlass::compute_shared_init_state() {
GUARDED_VM_ENTRY(
instanceKlass* ik = get_instanceKlass();
- _init_state = (instanceKlass::ClassState)ik->get_init_state();
+ _init_state = ik->init_state();
)
}
diff --git a/hotspot/src/share/vm/ci/ciTypeFlow.cpp b/hotspot/src/share/vm/ci/ciTypeFlow.cpp
index a5042d9676f..6678b4c4b3a 100644
--- a/hotspot/src/share/vm/ci/ciTypeFlow.cpp
+++ b/hotspot/src/share/vm/ci/ciTypeFlow.cpp
@@ -1589,7 +1589,7 @@ ciTypeFlow::Block::Block(ciTypeFlow* outer,
_next = NULL;
_on_work_list = false;
_backedge_copy = false;
- _exception_entry = false;
+ _has_monitorenter = false;
_trap_bci = -1;
_trap_index = 0;
df_init();
@@ -2182,6 +2182,10 @@ bool ciTypeFlow::clone_loop_heads(Loop* lp, StateVector* temp_vector, JsrSet* te
!head->is_clonable_exit(lp))
continue;
+ // Avoid BoxLock merge.
+ if (EliminateNestedLocks && head->has_monitorenter())
+ continue;
+
// check not already cloned
if (head->backedge_copy_count() != 0)
continue;
@@ -2322,6 +2326,10 @@ void ciTypeFlow::flow_block(ciTypeFlow::Block* block,
// Watch for bailouts.
if (failing()) return;
+ if (str.cur_bc() == Bytecodes::_monitorenter) {
+ block->set_has_monitorenter();
+ }
+
if (res) {
// We have encountered a trap. Record it in this block.
diff --git a/hotspot/src/share/vm/ci/ciTypeFlow.hpp b/hotspot/src/share/vm/ci/ciTypeFlow.hpp
index 5cc5c90c891..8a8d241dac8 100644
--- a/hotspot/src/share/vm/ci/ciTypeFlow.hpp
+++ b/hotspot/src/share/vm/ci/ciTypeFlow.hpp
@@ -544,15 +544,19 @@ public:
// Has this block been cloned for a loop backedge?
bool _backedge_copy;
+ // This block is entry to irreducible loop.
+ bool _irreducible_entry;
+
+ // This block has monitor entry point.
+ bool _has_monitorenter;
+
// A pointer used for our internal work list
- Block* _next;
bool _on_work_list; // on the work list
+ Block* _next;
Block* _rpo_next; // Reverse post order list
// Loop info
Loop* _loop; // nearest loop
- bool _irreducible_entry; // entry to irreducible loop
- bool _exception_entry; // entry to exception handler
ciBlock* ciblock() const { return _ciblock; }
StateVector* state() const { return _state; }
@@ -689,6 +693,8 @@ public:
bool is_loop_head() const { return _loop && _loop->head() == this; }
void set_irreducible_entry(bool c) { _irreducible_entry = c; }
bool is_irreducible_entry() const { return _irreducible_entry; }
+ void set_has_monitorenter() { _has_monitorenter = true; }
+ bool has_monitorenter() const { return _has_monitorenter; }
bool is_visited() const { return has_pre_order(); }
bool is_post_visited() const { return has_post_order(); }
bool is_clonable_exit(Loop* lp);
diff --git a/hotspot/src/share/vm/classfile/classFileParser.cpp b/hotspot/src/share/vm/classfile/classFileParser.cpp
index b28b8ad7e19..83bd0381ce2 100644
--- a/hotspot/src/share/vm/classfile/classFileParser.cpp
+++ b/hotspot/src/share/vm/classfile/classFileParser.cpp
@@ -45,6 +45,7 @@
#include "oops/methodOop.hpp"
#include "oops/symbol.hpp"
#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
#include "runtime/javaCalls.hpp"
#include "runtime/perfData.hpp"
#include "runtime/reflection.hpp"
@@ -1050,7 +1051,7 @@ static FieldAllocationType basic_type_to_atype(bool is_static, BasicType type) {
class FieldAllocationCount: public ResourceObj {
public:
- unsigned int count[MAX_FIELD_ALLOCATION_TYPE];
+ u2 count[MAX_FIELD_ALLOCATION_TYPE];
FieldAllocationCount() {
for (int i = 0; i < MAX_FIELD_ALLOCATION_TYPE; i++) {
@@ -1060,6 +1061,8 @@ class FieldAllocationCount: public ResourceObj {
FieldAllocationType update(bool is_static, BasicType type) {
FieldAllocationType atype = basic_type_to_atype(is_static, type);
+ // Make sure there is no overflow with injected fields.
+ assert(count[atype] < 0xFFFF, "More than 65535 fields");
count[atype]++;
return atype;
}
@@ -1070,7 +1073,7 @@ typeArrayHandle ClassFileParser::parse_fields(Symbol* class_name,
constantPoolHandle cp, bool is_interface,
FieldAllocationCount *fac,
objArrayHandle* fields_annotations,
- int* java_fields_count_ptr, TRAPS) {
+ u2* java_fields_count_ptr, TRAPS) {
ClassFileStream* cfs = stream();
typeArrayHandle nullHandle;
cfs->guarantee_more(2, CHECK_(nullHandle)); // length
@@ -2639,8 +2642,11 @@ instanceKlassHandle ClassFileParser::parseClassFile(Symbol* name,
TempNewSymbol& parsed_name,
bool verify,
TRAPS) {
- // So that JVMTI can cache class file in the state before retransformable agents
- // have modified it
+ // When a retransformable agent is attached, JVMTI caches the
+ // class bytes that existed before the first retransformation.
+ // If RedefineClasses() was used before the retransformable
+ // agent attached, then the cached class bytes may not be the
+ // original class bytes.
unsigned char *cached_class_file_bytes = NULL;
jint cached_class_file_length;
@@ -2660,6 +2666,25 @@ instanceKlassHandle ClassFileParser::parseClassFile(Symbol* name,
_max_bootstrap_specifier_index = -1;
if (JvmtiExport::should_post_class_file_load_hook()) {
+ // Get the cached class file bytes (if any) from the class that
+ // is being redefined or retransformed. We use jvmti_thread_state()
+ // instead of JvmtiThreadState::state_for(jt) so we don't allocate
+ // a JvmtiThreadState any earlier than necessary. This will help
+ // avoid the bug described by 7126851.
+ JvmtiThreadState *state = jt->jvmti_thread_state();
+ if (state != NULL) {
+ KlassHandle *h_class_being_redefined =
+ state->get_class_being_redefined();
+ if (h_class_being_redefined != NULL) {
+ instanceKlassHandle ikh_class_being_redefined =
+ instanceKlassHandle(THREAD, (*h_class_being_redefined)());
+ cached_class_file_bytes =
+ ikh_class_being_redefined->get_cached_class_file_bytes();
+ cached_class_file_length =
+ ikh_class_being_redefined->get_cached_class_file_len();
+ }
+ }
+
unsigned char* ptr = cfs->buffer();
unsigned char* end_ptr = cfs->buffer() + cfs->length();
@@ -2843,7 +2868,7 @@ instanceKlassHandle ClassFileParser::parseClassFile(Symbol* name,
local_interfaces = parse_interfaces(cp, itfs_len, class_loader, protection_domain, _class_name, CHECK_(nullHandle));
}
- int java_fields_count = 0;
+ u2 java_fields_count = 0;
// Fields (offsets are filled in later)
FieldAllocationCount fac;
objArrayHandle fields_annotations;
diff --git a/hotspot/src/share/vm/classfile/classFileParser.hpp b/hotspot/src/share/vm/classfile/classFileParser.hpp
index 8e0db5651c1..fef48eb61f4 100644
--- a/hotspot/src/share/vm/classfile/classFileParser.hpp
+++ b/hotspot/src/share/vm/classfile/classFileParser.hpp
@@ -91,7 +91,7 @@ class ClassFileParser VALUE_OBJ_CLASS_SPEC {
constantPoolHandle cp, bool is_interface,
FieldAllocationCount *fac,
objArrayHandle* fields_annotations,
- int* java_fields_count_ptr, TRAPS);
+ u2* java_fields_count_ptr, TRAPS);
// Method parsing
methodHandle parse_method(constantPoolHandle cp, bool is_interface,
diff --git a/hotspot/src/share/vm/classfile/vmSymbols.hpp b/hotspot/src/share/vm/classfile/vmSymbols.hpp
index 16652e5efb7..40f36ad66cf 100644
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp
@@ -296,6 +296,7 @@
template(finalize_method_name, "finalize") \
template(reference_lock_name, "lock") \
template(reference_discovered_name, "discovered") \
+ template(run_finalization_name, "runFinalization") \
template(run_finalizers_on_exit_name, "runFinalizersOnExit") \
template(uncaughtException_name, "uncaughtException") \
template(dispatchUncaughtException_name, "dispatchUncaughtException") \
diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
index 2f960435043..2cb5e2f3cef 100644
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
@@ -2598,7 +2598,7 @@ void CompactibleFreeListSpace::printFLCensus(size_t sweep_count) const {
AdaptiveWeightedAverage CFLS_LAB::_blocks_to_claim[] =
VECTOR_257(AdaptiveWeightedAverage(OldPLABWeight, (float)CMSParPromoteBlocksToClaim));
size_t CFLS_LAB::_global_num_blocks[] = VECTOR_257(0);
-int CFLS_LAB::_global_num_workers[] = VECTOR_257(0);
+uint CFLS_LAB::_global_num_workers[] = VECTOR_257(0);
CFLS_LAB::CFLS_LAB(CompactibleFreeListSpace* cfls) :
_cfls(cfls)
@@ -2732,7 +2732,7 @@ void CFLS_LAB::retire(int tid) {
// Update globals stats for num_blocks used
_global_num_blocks[i] += (_num_blocks[i] - num_retire);
_global_num_workers[i]++;
- assert(_global_num_workers[i] <= (ssize_t)ParallelGCThreads, "Too big");
+ assert(_global_num_workers[i] <= ParallelGCThreads, "Too big");
if (num_retire > 0) {
_cfls->_indexedFreeList[i].prepend(&_indexedFreeList[i]);
// Reset this list.
diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp
index 8b7e1077283..90d2f5f3918 100644
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp
@@ -631,7 +631,7 @@ class CFLS_LAB : public CHeapObj {
static AdaptiveWeightedAverage
_blocks_to_claim [CompactibleFreeListSpace::IndexSetSize];
static size_t _global_num_blocks [CompactibleFreeListSpace::IndexSetSize];
- static int _global_num_workers[CompactibleFreeListSpace::IndexSetSize];
+ static uint _global_num_workers[CompactibleFreeListSpace::IndexSetSize];
size_t _num_blocks [CompactibleFreeListSpace::IndexSetSize];
// Internal work method
diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
index bff5b40ab9a..39b57341ee8 100644
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
@@ -3779,7 +3779,7 @@ class CMSConcMarkingTask: public YieldingFlexibleGangTask {
terminator()->reset_for_reuse(active_workers);
}
- void work(int i);
+ void work(uint worker_id);
bool should_yield() {
return ConcurrentMarkSweepThread::should_yield()
&& !_collector->foregroundGCIsActive()
@@ -3852,7 +3852,7 @@ void CMSConcMarkingTerminator::yield() {
// . if neither is available, offer termination
// -- Terminate and return result
//
-void CMSConcMarkingTask::work(int i) {
+void CMSConcMarkingTask::work(uint worker_id) {
elapsedTimer _timer;
ResourceMark rm;
HandleMark hm;
@@ -3860,37 +3860,40 @@ void CMSConcMarkingTask::work(int i) {
DEBUG_ONLY(_collector->verify_overflow_empty();)
// Before we begin work, our work queue should be empty
- assert(work_queue(i)->size() == 0, "Expected to be empty");
+ assert(work_queue(worker_id)->size() == 0, "Expected to be empty");
// Scan the bitmap covering _cms_space, tracing through grey objects.
_timer.start();
- do_scan_and_mark(i, _cms_space);
+ do_scan_and_mark(worker_id, _cms_space);
_timer.stop();
if (PrintCMSStatistics != 0) {
gclog_or_tty->print_cr("Finished cms space scanning in %dth thread: %3.3f sec",
- i, _timer.seconds()); // XXX: need xxx/xxx type of notation, two timers
+ worker_id, _timer.seconds());
+ // XXX: need xxx/xxx type of notation, two timers
}
// ... do the same for the _perm_space
_timer.reset();
_timer.start();
- do_scan_and_mark(i, _perm_space);
+ do_scan_and_mark(worker_id, _perm_space);
_timer.stop();
if (PrintCMSStatistics != 0) {
gclog_or_tty->print_cr("Finished perm space scanning in %dth thread: %3.3f sec",
- i, _timer.seconds()); // XXX: need xxx/xxx type of notation, two timers
+ worker_id, _timer.seconds());
+ // XXX: need xxx/xxx type of notation, two timers
}
// ... do work stealing
_timer.reset();
_timer.start();
- do_work_steal(i);
+ do_work_steal(worker_id);
_timer.stop();
if (PrintCMSStatistics != 0) {
gclog_or_tty->print_cr("Finished work stealing in %dth thread: %3.3f sec",
- i, _timer.seconds()); // XXX: need xxx/xxx type of notation, two timers
+ worker_id, _timer.seconds());
+ // XXX: need xxx/xxx type of notation, two timers
}
assert(_collector->_markStack.isEmpty(), "Should have been emptied");
- assert(work_queue(i)->size() == 0, "Should have been emptied");
+ assert(work_queue(worker_id)->size() == 0, "Should have been emptied");
// Note that under the current task protocol, the
// following assertion is true even of the spaces
// expanded since the completion of the concurrent
@@ -3946,7 +3949,7 @@ void CMSConcMarkingTask::do_scan_and_mark(int i, CompactibleFreeListSpace* sp) {
// We allow that there may be no tasks to do here because
// we are restarting after a stack overflow.
assert(pst->valid() || n_tasks == 0, "Uninitialized use?");
- int nth_task = 0;
+ uint nth_task = 0;
HeapWord* aligned_start = sp->bottom();
if (sp->used_region().contains(_restart_addr)) {
@@ -5075,7 +5078,7 @@ class CMSParRemarkTask: public AbstractGangTask {
ParallelTaskTerminator* terminator() { return &_term; }
int n_workers() { return _n_workers; }
- void work(int i);
+ void work(uint worker_id);
private:
// Work method in support of parallel rescan ... of young gen spaces
@@ -5096,7 +5099,7 @@ class CMSParRemarkTask: public AbstractGangTask {
// also is passed to do_dirty_card_rescan_tasks() and to
// do_work_steal() to select the i-th task_queue.
-void CMSParRemarkTask::work(int i) {
+void CMSParRemarkTask::work(uint worker_id) {
elapsedTimer _timer;
ResourceMark rm;
HandleMark hm;
@@ -5107,7 +5110,7 @@ void CMSParRemarkTask::work(int i) {
Par_MarkRefsIntoAndScanClosure par_mrias_cl(_collector,
_collector->_span, _collector->ref_processor(),
&(_collector->_markBitMap),
- work_queue(i), &(_collector->_revisitStack));
+ work_queue(worker_id), &(_collector->_revisitStack));
// Rescan young gen roots first since these are likely
// coarsely partitioned and may, on that account, constitute
@@ -5128,15 +5131,15 @@ void CMSParRemarkTask::work(int i) {
assert(ect <= _collector->_eden_chunk_capacity, "out of bounds");
assert(sct <= _collector->_survivor_chunk_capacity, "out of bounds");
- do_young_space_rescan(i, &par_mrias_cl, to_space, NULL, 0);
- do_young_space_rescan(i, &par_mrias_cl, from_space, sca, sct);
- do_young_space_rescan(i, &par_mrias_cl, eden_space, eca, ect);
+ do_young_space_rescan(worker_id, &par_mrias_cl, to_space, NULL, 0);
+ do_young_space_rescan(worker_id, &par_mrias_cl, from_space, sca, sct);
+ do_young_space_rescan(worker_id, &par_mrias_cl, eden_space, eca, ect);
_timer.stop();
if (PrintCMSStatistics != 0) {
gclog_or_tty->print_cr(
"Finished young gen rescan work in %dth thread: %3.3f sec",
- i, _timer.seconds());
+ worker_id, _timer.seconds());
}
}
@@ -5158,7 +5161,7 @@ void CMSParRemarkTask::work(int i) {
if (PrintCMSStatistics != 0) {
gclog_or_tty->print_cr(
"Finished remaining root rescan work in %dth thread: %3.3f sec",
- i, _timer.seconds());
+ worker_id, _timer.seconds());
}
// ---------- rescan dirty cards ------------
@@ -5167,26 +5170,26 @@ void CMSParRemarkTask::work(int i) {
// Do the rescan tasks for each of the two spaces
// (cms_space and perm_space) in turn.
- // "i" is passed to select the "i-th" task_queue
- do_dirty_card_rescan_tasks(_cms_space, i, &par_mrias_cl);
- do_dirty_card_rescan_tasks(_perm_space, i, &par_mrias_cl);
+ // "worker_id" is passed to select the task_queue for "worker_id"
+ do_dirty_card_rescan_tasks(_cms_space, worker_id, &par_mrias_cl);
+ do_dirty_card_rescan_tasks(_perm_space, worker_id, &par_mrias_cl);
_timer.stop();
if (PrintCMSStatistics != 0) {
gclog_or_tty->print_cr(
"Finished dirty card rescan work in %dth thread: %3.3f sec",
- i, _timer.seconds());
+ worker_id, _timer.seconds());
}
// ---------- steal work from other threads ...
// ---------- ... and drain overflow list.
_timer.reset();
_timer.start();
- do_work_steal(i, &par_mrias_cl, _collector->hash_seed(i));
+ do_work_steal(worker_id, &par_mrias_cl, _collector->hash_seed(worker_id));
_timer.stop();
if (PrintCMSStatistics != 0) {
gclog_or_tty->print_cr(
"Finished work stealing in %dth thread: %3.3f sec",
- i, _timer.seconds());
+ worker_id, _timer.seconds());
}
}
@@ -5207,8 +5210,8 @@ CMSParRemarkTask::do_young_space_rescan(int i,
SequentialSubTasksDone* pst = space->par_seq_tasks();
assert(pst->valid(), "Uninitialized use?");
- int nth_task = 0;
- int n_tasks = pst->n_tasks();
+ uint nth_task = 0;
+ uint n_tasks = pst->n_tasks();
HeapWord *start, *end;
while (!pst->is_task_claimed(/* reference */ nth_task)) {
@@ -5220,12 +5223,12 @@ CMSParRemarkTask::do_young_space_rescan(int i,
} else if (nth_task == 0) {
start = space->bottom();
end = chunk_array[nth_task];
- } else if (nth_task < (jint)chunk_top) {
+ } else if (nth_task < (uint)chunk_top) {
assert(nth_task >= 1, "Control point invariant");
start = chunk_array[nth_task - 1];
end = chunk_array[nth_task];
} else {
- assert(nth_task == (jint)chunk_top, "Control point invariant");
+ assert(nth_task == (uint)chunk_top, "Control point invariant");
start = chunk_array[chunk_top - 1];
end = space->top();
}
@@ -5288,7 +5291,7 @@ CMSParRemarkTask::do_dirty_card_rescan_tasks(
SequentialSubTasksDone* pst = sp->conc_par_seq_tasks();
assert(pst->valid(), "Uninitialized use?");
- int nth_task = 0;
+ uint nth_task = 0;
const int alignment = CardTableModRefBS::card_size * BitsPerWord;
MemRegion span = sp->used_region();
HeapWord* start_addr = span.start();
@@ -5736,26 +5739,26 @@ public:
CMSParKeepAliveClosure* keep_alive,
int* seed);
- virtual void work(int i);
+ virtual void work(uint worker_id);
};
-void CMSRefProcTaskProxy::work(int i) {
+void CMSRefProcTaskProxy::work(uint worker_id) {
assert(_collector->_span.equals(_span), "Inconsistency in _span");
CMSParKeepAliveClosure par_keep_alive(_collector, _span,
_mark_bit_map,
&_collector->_revisitStack,
- work_queue(i));
+ work_queue(worker_id));
CMSParDrainMarkingStackClosure par_drain_stack(_collector, _span,
_mark_bit_map,
&_collector->_revisitStack,
- work_queue(i));
+ work_queue(worker_id));
CMSIsAliveClosure is_alive_closure(_span, _mark_bit_map);
- _task.work(i, is_alive_closure, par_keep_alive, par_drain_stack);
+ _task.work(worker_id, is_alive_closure, par_keep_alive, par_drain_stack);
if (_task.marks_oops_alive()) {
- do_work_steal(i, &par_drain_stack, &par_keep_alive,
- _collector->hash_seed(i));
+ do_work_steal(worker_id, &par_drain_stack, &par_keep_alive,
+ _collector->hash_seed(worker_id));
}
- assert(work_queue(i)->size() == 0, "work_queue should be empty");
+ assert(work_queue(worker_id)->size() == 0, "work_queue should be empty");
assert(_collector->_overflow_list == NULL, "non-empty _overflow_list");
}
@@ -5769,9 +5772,9 @@ public:
_task(task)
{ }
- virtual void work(int i)
+ virtual void work(uint worker_id)
{
- _task.work(i);
+ _task.work(worker_id);
}
};
diff --git a/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp
index 544b5a8676c..354fefbf71f 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp
@@ -264,7 +264,7 @@ prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize) {
// or some improperly initialized variable with leads to no
// active threads, protect against that in a product build.
n_threads = MAX2(G1CollectedHeap::heap()->workers()->active_workers(),
- 1);
+ 1U);
}
size_t max_waste = n_threads * chunkSize;
// it should be aligned with respect to chunkSize
diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
index e208929a2a1..4712d803542 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,7 @@
#include "gc_implementation/g1/g1ErgoVerbose.hpp"
#include "gc_implementation/g1/g1OopClosures.inline.hpp"
#include "gc_implementation/g1/g1RemSet.hpp"
+#include "gc_implementation/g1/heapRegion.inline.hpp"
#include "gc_implementation/g1/heapRegionRemSet.hpp"
#include "gc_implementation/g1/heapRegionSeq.inline.hpp"
#include "gc_implementation/shared/vmGCOperations.hpp"
@@ -183,12 +184,11 @@ CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
void CMMarkStack::allocate(size_t size) {
_base = NEW_C_HEAP_ARRAY(oop, size);
if (_base == NULL) {
- vm_exit_during_initialization("Failed to allocate "
- "CM region mark stack");
+ vm_exit_during_initialization("Failed to allocate CM region mark stack");
}
_index = 0;
_capacity = (jint) size;
- _oops_do_bound = -1;
+ _saved_index = -1;
NOT_PRODUCT(_max_depth = 0);
}
@@ -283,7 +283,6 @@ bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
}
}
-
CMRegionStack::CMRegionStack() : _base(NULL) {}
void CMRegionStack::allocate(size_t size) {
@@ -302,6 +301,8 @@ CMRegionStack::~CMRegionStack() {
}
void CMRegionStack::push_lock_free(MemRegion mr) {
+ guarantee(false, "push_lock_free(): don't call this any more");
+
assert(mr.word_size() > 0, "Precondition");
while (true) {
jint index = _index;
@@ -325,6 +326,8 @@ void CMRegionStack::push_lock_free(MemRegion mr) {
// marking / remark phases. Should only be called in tandem with
// other lock-free pops.
MemRegion CMRegionStack::pop_lock_free() {
+ guarantee(false, "pop_lock_free(): don't call this any more");
+
while (true) {
jint index = _index;
@@ -390,6 +393,8 @@ MemRegion CMRegionStack::pop_with_lock() {
#endif
bool CMRegionStack::invalidate_entries_into_cset() {
+ guarantee(false, "invalidate_entries_into_cset(): don't call this any more");
+
bool result = false;
G1CollectedHeap* g1h = G1CollectedHeap::heap();
for (int i = 0; i < _oops_do_bound; ++i) {
@@ -438,14 +443,29 @@ bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
return res;
}
+void CMMarkStack::note_start_of_gc() {
+ assert(_saved_index == -1,
+ "note_start_of_gc()/end_of_gc() bracketed incorrectly");
+ _saved_index = _index;
+}
+
+void CMMarkStack::note_end_of_gc() {
+ // This is intentionally a guarantee, instead of an assert. If we
+ // accidentally add something to the mark stack during GC, it
+ // will be a correctness issue so it's better if we crash. we'll
+ // only check this once per GC anyway, so it won't be a performance
+ // issue in any way.
+ guarantee(_saved_index == _index,
+ err_msg("saved index: %d index: %d", _saved_index, _index));
+ _saved_index = -1;
+}
+
void CMMarkStack::oops_do(OopClosure* f) {
- if (_index == 0) return;
- assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
- "Bound must be set.");
- for (int i = 0; i < _oops_do_bound; i++) {
+ assert(_saved_index == _index,
+ err_msg("saved index: %d index: %d", _saved_index, _index));
+ for (int i = 0; i < _index; i += 1) {
f->do_oop(&_base[i]);
}
- _oops_do_bound = -1;
}
bool ConcurrentMark::not_yet_marked(oop obj) const {
@@ -458,8 +478,8 @@ bool ConcurrentMark::not_yet_marked(oop obj) const {
#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
#endif // _MSC_VER
-size_t ConcurrentMark::scale_parallel_threads(size_t n_par_threads) {
- return MAX2((n_par_threads + 2) / 4, (size_t)1);
+uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
+ return MAX2((n_par_threads + 2) / 4, 1U);
}
ConcurrentMark::ConcurrentMark(ReservedSpace rs,
@@ -486,7 +506,7 @@ ConcurrentMark::ConcurrentMark(ReservedSpace rs,
_regionStack(),
// _finger set in set_non_marking_state
- _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
+ _max_task_num(MAX2((uint)ParallelGCThreads, 1U)),
// _active_tasks set in set_non_marking_state
// _tasks set inside the constructor
_task_queues(new CMTaskQueueSet((int) _max_task_num)),
@@ -506,7 +526,6 @@ ConcurrentMark::ConcurrentMark(ReservedSpace rs,
_cleanup_times(),
_total_counting_time(0.0),
_total_rs_scrub_time(0.0),
-
_parallel_workers(NULL) {
CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
if (verbose_level < no_verbose) {
@@ -568,7 +587,7 @@ ConcurrentMark::ConcurrentMark(ReservedSpace rs,
// notice that ConcGCThreads overwrites G1MarkingOverheadPercent
// if both are set
- _parallel_marking_threads = ConcGCThreads;
+ _parallel_marking_threads = (uint) ConcGCThreads;
_max_parallel_marking_threads = _parallel_marking_threads;
_sleep_factor = 0.0;
_marking_task_overhead = 1.0;
@@ -589,12 +608,12 @@ ConcurrentMark::ConcurrentMark(ReservedSpace rs,
double sleep_factor =
(1.0 - marking_task_overhead) / marking_task_overhead;
- _parallel_marking_threads = (size_t) marking_thread_num;
+ _parallel_marking_threads = (uint) marking_thread_num;
_max_parallel_marking_threads = _parallel_marking_threads;
_sleep_factor = sleep_factor;
_marking_task_overhead = marking_task_overhead;
} else {
- _parallel_marking_threads = scale_parallel_threads(ParallelGCThreads);
+ _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads);
_max_parallel_marking_threads = _parallel_marking_threads;
_sleep_factor = 0.0;
_marking_task_overhead = 1.0;
@@ -618,7 +637,7 @@ ConcurrentMark::ConcurrentMark(ReservedSpace rs,
guarantee(parallel_marking_threads() > 0, "peace of mind");
_parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
- (int) _max_parallel_marking_threads, false, true);
+ _max_parallel_marking_threads, false, true);
if (_parallel_workers == NULL) {
vm_exit_during_initialization("Failed necessary allocation.");
} else {
@@ -691,7 +710,7 @@ void ConcurrentMark::reset() {
set_concurrent_marking_in_progress();
}
-void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
+void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
assert(active_tasks <= _max_task_num, "we should not have more");
_active_tasks = active_tasks;
@@ -727,12 +746,8 @@ void ConcurrentMark::set_non_marking_state() {
}
ConcurrentMark::~ConcurrentMark() {
- for (int i = 0; i < (int) _max_task_num; ++i) {
- delete _task_queues->queue(i);
- delete _tasks[i];
- }
- delete _task_queues;
- FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
+ // The ConcurrentMark instance is never freed.
+ ShouldNotReachHere();
}
// This closure is used to mark refs into the g1 generation
@@ -788,7 +803,7 @@ class NoteStartOfMarkHRClosure: public HeapRegionClosure {
public:
bool doHeapRegion(HeapRegion* r) {
if (!r->continuesHumongous()) {
- r->note_start_of_marking(true);
+ r->note_start_of_marking();
}
return false;
}
@@ -809,6 +824,10 @@ void ConcurrentMark::checkpointRootsInitialPre() {
// Initialise marking structures. This has to be done in a STW phase.
reset();
+
+ // For each region note start of marking.
+ NoteStartOfMarkHRClosure startcl;
+ g1h->heap_region_iterate(&startcl);
}
@@ -823,10 +842,6 @@ void ConcurrentMark::checkpointRootsInitialPost() {
// every remark and we'll eventually not need to cause one.
force_overflow_stw()->init();
- // For each region note start of marking.
- NoteStartOfMarkHRClosure startcl;
- g1h->heap_region_iterate(&startcl);
-
// Start Concurrent Marking weak-reference discovery.
ReferenceProcessor* rp = g1h->ref_processor_cm();
// enable ("weak") refs discovery
@@ -951,22 +966,9 @@ bool ForceOverflowSettings::should_force() {
}
#endif // !PRODUCT
-void ConcurrentMark::grayRoot(oop p) {
- HeapWord* addr = (HeapWord*) p;
- // We can't really check against _heap_start and _heap_end, since it
- // is possible during an evacuation pause with piggy-backed
- // initial-mark that the committed space is expanded during the
- // pause without CM observing this change. So the assertions below
- // is a bit conservative; but better than nothing.
- assert(_g1h->g1_committed().contains(addr),
- "address should be within the heap bounds");
-
- if (!_nextMarkBitMap->isMarked(addr)) {
- _nextMarkBitMap->parMark(addr);
- }
-}
-
void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
+ guarantee(false, "grayRegionIfNecessary(): don't call this any more");
+
// The objects on the region have already been marked "in bulk" by
// the caller. We only need to decide whether to push the region on
// the region stack or not.
@@ -1012,6 +1014,8 @@ void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
}
void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
+ guarantee(false, "markAndGrayObjectIfNecessary(): don't call this any more");
+
// The object is not marked by the caller. We need to at least mark
// it and maybe push in on the stack.
@@ -1048,7 +1052,7 @@ private:
ConcurrentMarkThread* _cmt;
public:
- void work(int worker_i) {
+ void work(uint worker_id) {
assert(Thread::current()->is_ConcurrentGC_thread(),
"this should only be done by a conc GC thread");
ResourceMark rm;
@@ -1057,8 +1061,8 @@ public:
ConcurrentGCThread::stsJoin();
- assert((size_t) worker_i < _cm->active_tasks(), "invariant");
- CMTask* the_task = _cm->task(worker_i);
+ assert(worker_id < _cm->active_tasks(), "invariant");
+ CMTask* the_task = _cm->task(worker_id);
the_task->record_start_time();
if (!_cm->has_aborted()) {
do {
@@ -1076,7 +1080,7 @@ public:
double elapsed_time_sec = end_time_sec - start_time_sec;
_cm->clear_has_overflown();
- bool ret = _cm->do_yield_check(worker_i);
+ bool ret = _cm->do_yield_check(worker_id);
jlong sleep_time_ms;
if (!_cm->has_aborted() && the_task->has_aborted()) {
@@ -1105,7 +1109,7 @@ public:
ConcurrentGCThread::stsLeave();
double end_vtime = os::elapsedVTime();
- _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
+ _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
}
CMConcurrentMarkingTask(ConcurrentMark* cm,
@@ -1117,9 +1121,9 @@ public:
// Calculates the number of active workers for a concurrent
// phase.
-size_t ConcurrentMark::calc_parallel_marking_threads() {
+uint ConcurrentMark::calc_parallel_marking_threads() {
if (G1CollectedHeap::use_parallel_gc_threads()) {
- size_t n_conc_workers = 0;
+ uint n_conc_workers = 0;
if (!UseDynamicNumberOfGCThreads ||
(!FLAG_IS_DEFAULT(ConcGCThreads) &&
!ForceDynamicNumberOfGCThreads)) {
@@ -1159,7 +1163,7 @@ void ConcurrentMark::markFromRoots() {
assert(parallel_marking_threads() <= max_parallel_marking_threads(),
"Maximum number of marking threads exceeded");
- size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
+ uint active_workers = MAX2(1U, parallel_marking_threads());
// Parallel task terminator is set in "set_phase()"
set_phase(active_workers, true /* concurrent */);
@@ -1229,7 +1233,6 @@ void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
true /* expected_active */);
if (VerifyDuringGC) {
-
HandleMark hm; // handle scope
gclog_or_tty->print(" VerifyDuringGC:(after)");
Universe::heap()->prepare_for_verify();
@@ -1503,7 +1506,7 @@ class G1ParFinalCountTask: public AbstractGangTask {
protected:
G1CollectedHeap* _g1h;
CMBitMap* _bm;
- size_t _n_workers;
+ uint _n_workers;
size_t *_live_bytes;
size_t *_used_bytes;
BitMap* _region_bm;
@@ -1535,13 +1538,13 @@ public:
FREE_C_HEAP_ARRAY(size_t, _used_bytes);
}
- void work(int i) {
+ void work(uint worker_id) {
CalcLiveObjectsClosure calccl(true /*final*/,
_bm, _g1h->concurrent_mark(),
_region_bm, _card_bm);
calccl.no_yield();
if (G1CollectedHeap::use_parallel_gc_threads()) {
- _g1h->heap_region_par_iterate_chunked(&calccl, i,
+ _g1h->heap_region_par_iterate_chunked(&calccl, worker_id,
(int) _n_workers,
HeapRegion::FinalCountClaimValue);
} else {
@@ -1549,19 +1552,19 @@ public:
}
assert(calccl.complete(), "Shouldn't have yielded!");
- assert((size_t) i < _n_workers, "invariant");
- _live_bytes[i] = calccl.tot_live();
- _used_bytes[i] = calccl.tot_used();
+ assert(worker_id < _n_workers, "invariant");
+ _live_bytes[worker_id] = calccl.tot_live();
+ _used_bytes[worker_id] = calccl.tot_used();
}
size_t live_bytes() {
size_t live_bytes = 0;
- for (size_t i = 0; i < _n_workers; ++i)
+ for (uint i = 0; i < _n_workers; ++i)
live_bytes += _live_bytes[i];
return live_bytes;
}
size_t used_bytes() {
size_t used_bytes = 0;
- for (size_t i = 0; i < _n_workers; ++i)
+ for (uint i = 0; i < _n_workers; ++i)
used_bytes += _used_bytes[i];
return used_bytes;
}
@@ -1646,18 +1649,18 @@ public:
AbstractGangTask("G1 note end"), _g1h(g1h),
_max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
- void work(int i) {
+ void work(uint worker_id) {
double start = os::elapsedTime();
FreeRegionList local_cleanup_list("Local Cleanup List");
OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
HRRSCleanupTask hrrs_cleanup_task;
- G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
+ G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
&old_proxy_set,
&humongous_proxy_set,
&hrrs_cleanup_task);
if (G1CollectedHeap::use_parallel_gc_threads()) {
- _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
+ _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
_g1h->workers()->active_workers(),
HeapRegion::NoteEndClaimValue);
} else {
@@ -1701,8 +1704,8 @@ public:
double end = os::elapsedTime();
if (G1PrintParCleanupStats) {
gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
- "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
- i, start, end, (end-start)*1000.0,
+ "claimed %u regions (tot = %8.3f ms, max = %8.3f ms).\n",
+ worker_id, start, end, (end-start)*1000.0,
g1_note_end.regions_claimed(),
g1_note_end.claimed_region_time_sec()*1000.0,
g1_note_end.max_region_time_sec()*1000.0);
@@ -1724,9 +1727,9 @@ public:
_region_bm(region_bm), _card_bm(card_bm)
{}
- void work(int i) {
+ void work(uint worker_id) {
if (G1CollectedHeap::use_parallel_gc_threads()) {
- _g1rs->scrub_par(_region_bm, _card_bm, i,
+ _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
HeapRegion::ScrubRemSetClaimValue);
} else {
_g1rs->scrub(_region_bm, _card_bm);
@@ -1766,7 +1769,7 @@ void ConcurrentMark::cleanup() {
HeapRegionRemSet::reset_for_cleanup_tasks();
- size_t n_workers;
+ uint n_workers;
// Do counting once more with the world stopped for good measure.
G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
@@ -1778,7 +1781,7 @@ void ConcurrentMark::cleanup() {
g1h->set_par_threads();
n_workers = g1h->n_par_threads();
- assert(g1h->n_par_threads() == (int) n_workers,
+ assert(g1h->n_par_threads() == n_workers,
"Should not have been reset");
g1h->workers()->run_task(&g1_par_count_task);
// Done with the parallel phase so reset to 0.
@@ -1884,10 +1887,6 @@ void ConcurrentMark::cleanup() {
double end = os::elapsedTime();
_cleanup_times.add((end - start) * 1000.0);
- // G1CollectedHeap::heap()->print();
- // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
- // G1CollectedHeap::heap()->get_gc_time_stamp());
-
if (PrintGC || PrintGCDetails) {
g1h->print_size_transition(gclog_or_tty,
start_used_bytes,
@@ -2169,13 +2168,13 @@ public:
AbstractGangTask("Process reference objects in parallel"),
_proc_task(proc_task), _g1h(g1h), _cm(cm) { }
- virtual void work(int i) {
- CMTask* marking_task = _cm->task(i);
+ virtual void work(uint worker_id) {
+ CMTask* marking_task = _cm->task(worker_id);
G1CMIsAliveClosure g1_is_alive(_g1h);
G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
- _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
+ _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
}
};
@@ -2201,8 +2200,8 @@ public:
AbstractGangTask("Enqueue reference objects in parallel"),
_enq_task(enq_task) { }
- virtual void work(int i) {
- _enq_task.work(i);
+ virtual void work(uint worker_id) {
+ _enq_task.work(worker_id);
}
};
@@ -2249,8 +2248,8 @@ void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
// We use the work gang from the G1CollectedHeap and we utilize all
// the worker threads.
- int active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1;
- active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
+ uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;
+ active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U);
G1CMRefProcTaskExecutor par_task_executor(g1h, this,
g1h->workers(), active_workers);
@@ -2314,11 +2313,11 @@ private:
ConcurrentMark *_cm;
public:
- void work(int worker_i) {
+ void work(uint worker_id) {
// Since all available tasks are actually started, we should
// only proceed if we're supposed to be actived.
- if ((size_t)worker_i < _cm->active_tasks()) {
- CMTask* task = _cm->task(worker_i);
+ if (worker_id < _cm->active_tasks()) {
+ CMTask* task = _cm->task(worker_id);
task->record_start_time();
do {
task->do_marking_step(1000000000.0 /* something very large */,
@@ -2347,10 +2346,10 @@ void ConcurrentMark::checkpointRootsFinalWork() {
if (G1CollectedHeap::use_parallel_gc_threads()) {
G1CollectedHeap::StrongRootsScope srs(g1h);
// this is remark, so we'll use up all active threads
- int active_workers = g1h->workers()->active_workers();
+ uint active_workers = g1h->workers()->active_workers();
if (active_workers == 0) {
assert(active_workers > 0, "Should have been set earlier");
- active_workers = ParallelGCThreads;
+ active_workers = (uint) ParallelGCThreads;
g1h->workers()->set_active_workers(active_workers);
}
set_phase(active_workers, false /* concurrent */);
@@ -2366,7 +2365,7 @@ void ConcurrentMark::checkpointRootsFinalWork() {
} else {
G1CollectedHeap::StrongRootsScope srs(g1h);
// this is remark, so we'll use up all available threads
- int active_workers = 1;
+ uint active_workers = 1;
set_phase(active_workers, false /* concurrent */);
CMRemarkTask remarkTask(this, active_workers);
@@ -2674,6 +2673,8 @@ void ConcurrentMark::deal_with_reference(oop obj) {
}
void ConcurrentMark::drainAllSATBBuffers() {
+ guarantee(false, "drainAllSATBBuffers(): don't call this any more");
+
CMGlobalObjectClosure oc(this);
SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
satb_mq_set.set_closure(&oc);
@@ -2692,12 +2693,6 @@ void ConcurrentMark::drainAllSATBBuffers() {
assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
}
-void ConcurrentMark::markPrev(oop p) {
- // Note we are overriding the read-only view of the prev map here, via
- // the cast.
- ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
-}
-
void ConcurrentMark::clear(oop p) {
assert(p != NULL && p->is_oop(), "expected an oop");
HeapWord* addr = (HeapWord*)p;
@@ -2707,13 +2702,21 @@ void ConcurrentMark::clear(oop p) {
_nextMarkBitMap->clear(addr);
}
-void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
+void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
// Note we are overriding the read-only view of the prev map here, via
// the cast.
((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
+}
+
+void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
_nextMarkBitMap->clearRange(mr);
}
+void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
+ clearRangePrevBitmap(mr);
+ clearRangeNextBitmap(mr);
+}
+
HeapRegion*
ConcurrentMark::claim_region(int task_num) {
// "checkpoint" the finger
@@ -2808,6 +2811,9 @@ ConcurrentMark::claim_region(int task_num) {
}
bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
+ guarantee(false, "invalidate_aborted_regions_in_cset(): "
+ "don't call this any more");
+
bool result = false;
for (int i = 0; i < (int)_max_task_num; ++i) {
CMTask* the_task = _tasks[i];
@@ -2859,25 +2865,136 @@ void ConcurrentMark::oops_do(OopClosure* cl) {
// ...then over the contents of the all the task queues.
queue->oops_do(cl);
}
-
- // Invalidate any entries, that are in the region stack, that
- // point into the collection set
- if (_regionStack.invalidate_entries_into_cset()) {
- // otherwise, any gray objects copied during the evacuation pause
- // might not be visited.
- assert(_should_gray_objects, "invariant");
- }
-
- // Invalidate any aborted regions, recorded in the individual CM
- // tasks, that point into the collection set.
- if (invalidate_aborted_regions_in_cset()) {
- // otherwise, any gray objects copied during the evacuation pause
- // might not be visited.
- assert(_should_gray_objects, "invariant");
- }
-
}
+#ifndef PRODUCT
+enum VerifyNoCSetOopsPhase {
+ VerifyNoCSetOopsStack,
+ VerifyNoCSetOopsQueues,
+ VerifyNoCSetOopsSATBCompleted,
+ VerifyNoCSetOopsSATBThread
+};
+
+class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure {
+private:
+ G1CollectedHeap* _g1h;
+ VerifyNoCSetOopsPhase _phase;
+ int _info;
+
+ const char* phase_str() {
+ switch (_phase) {
+ case VerifyNoCSetOopsStack: return "Stack";
+ case VerifyNoCSetOopsQueues: return "Queue";
+ case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
+ case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers";
+ default: ShouldNotReachHere();
+ }
+ return NULL;
+ }
+
+ void do_object_work(oop obj) {
+ guarantee(!_g1h->obj_in_cs(obj),
+ err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
+ (void*) obj, phase_str(), _info));
+ }
+
+public:
+ VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
+
+ void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
+ _phase = phase;
+ _info = info;
+ }
+
+ virtual void do_oop(oop* p) {
+ oop obj = oopDesc::load_decode_heap_oop(p);
+ do_object_work(obj);
+ }
+
+ virtual void do_oop(narrowOop* p) {
+ // We should not come across narrow oops while scanning marking
+ // stacks and SATB buffers.
+ ShouldNotReachHere();
+ }
+
+ virtual void do_object(oop obj) {
+ do_object_work(obj);
+ }
+};
+
+void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
+ bool verify_enqueued_buffers,
+ bool verify_thread_buffers,
+ bool verify_fingers) {
+ assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
+ if (!G1CollectedHeap::heap()->mark_in_progress()) {
+ return;
+ }
+
+ VerifyNoCSetOopsClosure cl;
+
+ if (verify_stacks) {
+ // Verify entries on the global mark stack
+ cl.set_phase(VerifyNoCSetOopsStack);
+ _markStack.oops_do(&cl);
+
+ // Verify entries on the task queues
+ for (int i = 0; i < (int) _max_task_num; i += 1) {
+ cl.set_phase(VerifyNoCSetOopsQueues, i);
+ OopTaskQueue* queue = _task_queues->queue(i);
+ queue->oops_do(&cl);
+ }
+ }
+
+ SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
+
+ // Verify entries on the enqueued SATB buffers
+ if (verify_enqueued_buffers) {
+ cl.set_phase(VerifyNoCSetOopsSATBCompleted);
+ satb_qs.iterate_completed_buffers_read_only(&cl);
+ }
+
+ // Verify entries on the per-thread SATB buffers
+ if (verify_thread_buffers) {
+ cl.set_phase(VerifyNoCSetOopsSATBThread);
+ satb_qs.iterate_thread_buffers_read_only(&cl);
+ }
+
+ if (verify_fingers) {
+ // Verify the global finger
+ HeapWord* global_finger = finger();
+ if (global_finger != NULL && global_finger < _heap_end) {
+ // The global finger always points to a heap region boundary. We
+ // use heap_region_containing_raw() to get the containing region
+ // given that the global finger could be pointing to a free region
+ // which subsequently becomes continues humongous. If that
+ // happens, heap_region_containing() will return the bottom of the
+ // corresponding starts humongous region and the check below will
+ // not hold any more.
+ HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
+ guarantee(global_finger == global_hr->bottom(),
+ err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
+ global_finger, HR_FORMAT_PARAMS(global_hr)));
+ }
+
+ // Verify the task fingers
+ assert(parallel_marking_threads() <= _max_task_num, "sanity");
+ for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
+ CMTask* task = _tasks[i];
+ HeapWord* task_finger = task->finger();
+ if (task_finger != NULL && task_finger < _heap_end) {
+ // See above note on the global finger verification.
+ HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
+ guarantee(task_finger == task_hr->bottom() ||
+ !task_hr->in_collection_set(),
+ err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
+ task_finger, HR_FORMAT_PARAMS(task_hr)));
+ }
+ }
+ }
+}
+#endif // PRODUCT
+
void ConcurrentMark::clear_marking_state(bool clear_overflow) {
_markStack.setEmpty();
_markStack.clear_overflow();
@@ -2921,7 +3038,7 @@ class CSetMarkOopClosure: public OopClosure {
int _ms_size;
int _ms_ind;
int _array_increment;
- int _worker_i;
+ uint _worker_id;
bool push(oop obj, int arr_ind = 0) {
if (_ms_ind == _ms_size) {
@@ -2971,7 +3088,7 @@ class CSetMarkOopClosure: public OopClosure {
}
public:
- CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
+ CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, uint worker_id) :
_g1h(G1CollectedHeap::heap()),
_cm(cm),
_bm(cm->nextMarkBitMap()),
@@ -2979,7 +3096,7 @@ public:
_ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
_array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
_array_increment(MAX2(ms_size/8, 16)),
- _worker_i(worker_i) { }
+ _worker_id(worker_id) { }
~CSetMarkOopClosure() {
FREE_C_HEAP_ARRAY(oop, _ms);
@@ -3024,14 +3141,14 @@ class CSetMarkBitMapClosure: public BitMapClosure {
CMBitMap* _bitMap;
ConcurrentMark* _cm;
CSetMarkOopClosure _oop_cl;
- int _worker_i;
+ uint _worker_id;
public:
- CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
+ CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_id) :
_g1h(G1CollectedHeap::heap()),
_bitMap(cm->nextMarkBitMap()),
- _oop_cl(cm, ms_size, worker_i),
- _worker_i(worker_i) { }
+ _oop_cl(cm, ms_size, worker_id),
+ _worker_id(worker_id) { }
bool do_bit(size_t offset) {
// convert offset into a HeapWord*
@@ -3056,17 +3173,17 @@ public:
class CompleteMarkingInCSetHRClosure: public HeapRegionClosure {
CMBitMap* _bm;
CSetMarkBitMapClosure _bit_cl;
- int _worker_i;
+ uint _worker_id;
enum SomePrivateConstants {
MSSize = 1000
};
public:
- CompleteMarkingInCSetHRClosure(ConcurrentMark* cm, int worker_i) :
+ CompleteMarkingInCSetHRClosure(ConcurrentMark* cm, int worker_id) :
_bm(cm->nextMarkBitMap()),
- _bit_cl(cm, MSSize, worker_i),
- _worker_i(worker_i) { }
+ _bit_cl(cm, MSSize, worker_id),
+ _worker_id(worker_id) { }
bool doHeapRegion(HeapRegion* hr) {
if (hr->claimHeapRegion(HeapRegion::CompleteMarkCSetClaimValue)) {
@@ -3085,19 +3202,6 @@ public:
}
};
-class SetClaimValuesInCSetHRClosure: public HeapRegionClosure {
- jint _claim_value;
-
-public:
- SetClaimValuesInCSetHRClosure(jint claim_value) :
- _claim_value(claim_value) { }
-
- bool doHeapRegion(HeapRegion* hr) {
- hr->set_claim_value(_claim_value);
- return false;
- }
-};
-
class G1ParCompleteMarkInCSetTask: public AbstractGangTask {
protected:
G1CollectedHeap* _g1h;
@@ -3109,14 +3213,17 @@ public:
AbstractGangTask("Complete Mark in CSet"),
_g1h(g1h), _cm(cm) { }
- void work(int worker_i) {
- CompleteMarkingInCSetHRClosure cmplt(_cm, worker_i);
- HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_i);
+ void work(uint worker_id) {
+ CompleteMarkingInCSetHRClosure cmplt(_cm, worker_id);
+ HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_id);
_g1h->collection_set_iterate_from(hr, &cmplt);
}
};
void ConcurrentMark::complete_marking_in_collection_set() {
+ guarantee(false, "complete_marking_in_collection_set(): "
+ "don't call this any more");
+
G1CollectedHeap* g1h = G1CollectedHeap::heap();
if (!g1h->mark_in_progress()) {
@@ -3140,9 +3247,8 @@ void ConcurrentMark::complete_marking_in_collection_set() {
assert(g1h->check_cset_heap_region_claim_values(HeapRegion::CompleteMarkCSetClaimValue), "sanity");
- // Now reset the claim values in the regions in the collection set.
- SetClaimValuesInCSetHRClosure set_cv_cl(HeapRegion::InitialClaimValue);
- g1h->collection_set_iterate(&set_cv_cl);
+ // Reset the claim values in the regions in the collection set.
+ g1h->reset_cset_heap_region_claim_values();
assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
@@ -3165,6 +3271,8 @@ void ConcurrentMark::complete_marking_in_collection_set() {
// newCSet().
void ConcurrentMark::newCSet() {
+ guarantee(false, "newCSet(): don't call this any more");
+
if (!concurrent_marking_in_progress()) {
// nothing to do if marking is not in progress
return;
@@ -3203,6 +3311,8 @@ void ConcurrentMark::newCSet() {
}
void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
+ guarantee(false, "registerCSetRegion(): don't call this any more");
+
if (!concurrent_marking_in_progress()) return;
HeapWord* region_end = hr->end();
@@ -3214,6 +3324,9 @@ void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
// Resets the region fields of active CMTasks whose values point
// into the collection set.
void ConcurrentMark::reset_active_task_region_fields_in_cset() {
+ guarantee(false, "reset_active_task_region_fields_in_cset(): "
+ "don't call this any more");
+
assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
assert(parallel_marking_threads() <= _max_task_num, "sanity");
@@ -3307,13 +3420,13 @@ void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
// the CMS bit map. Called at the first checkpoint.
// We take a break if someone is trying to stop the world.
-bool ConcurrentMark::do_yield_check(int worker_i) {
+bool ConcurrentMark::do_yield_check(uint worker_id) {
if (should_yield()) {
- if (worker_i == 0) {
+ if (worker_id == 0) {
_g1h->g1_policy()->record_concurrent_pause();
}
cmThread()->yield();
- if (worker_i == 0) {
+ if (worker_id == 0) {
_g1h->g1_policy()->record_concurrent_pause_end();
}
return true;
@@ -3924,6 +4037,10 @@ void CMTask::drain_satb_buffers() {
}
void CMTask::drain_region_stack(BitMapClosure* bc) {
+ assert(_cm->region_stack_empty(), "region stack should be empty");
+ assert(_aborted_region.is_empty(), "aborted region should be empty");
+ return;
+
if (has_aborted()) return;
assert(_region_finger == NULL,
diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
index 6383227d90e..1a407848499 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -166,10 +166,10 @@ class CMBitMap : public CMBitMapRO {
// Ideally this should be GrowableArray<> just like MSC's marking stack(s).
class CMMarkStack VALUE_OBJ_CLASS_SPEC {
ConcurrentMark* _cm;
- oop* _base; // bottom of stack
- jint _index; // one more than last occupied index
- jint _capacity; // max #elements
- jint _oops_do_bound; // Number of elements to include in next iteration.
+ oop* _base; // bottom of stack
+ jint _index; // one more than last occupied index
+ jint _capacity; // max #elements
+ jint _saved_index; // value of _index saved at start of GC
NOT_PRODUCT(jint _max_depth;) // max depth plumbed during run
bool _overflow;
@@ -247,16 +247,12 @@ class CMMarkStack VALUE_OBJ_CLASS_SPEC {
void setEmpty() { _index = 0; clear_overflow(); }
- // Record the current size; a subsequent "oops_do" will iterate only over
- // indices valid at the time of this call.
- void set_oops_do_bound(jint bound = -1) {
- if (bound == -1) {
- _oops_do_bound = _index;
- } else {
- _oops_do_bound = bound;
- }
- }
- jint oops_do_bound() { return _oops_do_bound; }
+ // Record the current index.
+ void note_start_of_gc();
+
+ // Make sure that we have not added any entries to the stack during GC.
+ void note_end_of_gc();
+
// iterate over the oops in the mark stack, up to the bound recorded via
// the call above.
void oops_do(OopClosure* f);
@@ -374,9 +370,9 @@ class ConcurrentMark: public CHeapObj {
protected:
ConcurrentMarkThread* _cmThread; // the thread doing the work
G1CollectedHeap* _g1h; // the heap.
- size_t _parallel_marking_threads; // the number of marking
+ uint _parallel_marking_threads; // the number of marking
// threads we're use
- size_t _max_parallel_marking_threads; // max number of marking
+ uint _max_parallel_marking_threads; // max number of marking
// threads we'll ever use
double _sleep_factor; // how much we have to sleep, with
// respect to the work we just did, to
@@ -412,8 +408,8 @@ protected:
// last claimed region
// marking tasks
- size_t _max_task_num; // maximum task number
- size_t _active_tasks; // task num currently active
+ uint _max_task_num; // maximum task number
+ uint _active_tasks; // task num currently active
CMTask** _tasks; // task queue array (max_task_num len)
CMTaskQueueSet* _task_queues; // task queue set
ParallelTaskTerminator _terminator; // for termination
@@ -492,7 +488,7 @@ protected:
// It should be called to indicate which phase we're in (concurrent
// mark or remark) and how many threads are currently active.
- void set_phase(size_t active_tasks, bool concurrent);
+ void set_phase(uint active_tasks, bool concurrent);
// We do this after we're done with marking so that the marking data
// structures are initialised to a sensible and predictable state.
void set_non_marking_state();
@@ -505,8 +501,8 @@ protected:
}
// accessor methods
- size_t parallel_marking_threads() { return _parallel_marking_threads; }
- size_t max_parallel_marking_threads() { return _max_parallel_marking_threads;}
+ uint parallel_marking_threads() { return _parallel_marking_threads; }
+ uint max_parallel_marking_threads() { return _max_parallel_marking_threads;}
double sleep_factor() { return _sleep_factor; }
double marking_task_overhead() { return _marking_task_overhead;}
double cleanup_sleep_factor() { return _cleanup_sleep_factor; }
@@ -514,7 +510,7 @@ protected:
HeapWord* finger() { return _finger; }
bool concurrent() { return _concurrent; }
- size_t active_tasks() { return _active_tasks; }
+ uint active_tasks() { return _active_tasks; }
ParallelTaskTerminator* terminator() { return &_terminator; }
// It claims the next available region to be scanned by a marking
@@ -715,19 +711,18 @@ public:
// Returns the number of GC threads to be used in a concurrent
// phase based on the number of GC threads being used in a STW
// phase.
- size_t scale_parallel_threads(size_t n_par_threads);
+ uint scale_parallel_threads(uint n_par_threads);
// Calculates the number of GC threads to be used in a concurrent phase.
- size_t calc_parallel_marking_threads();
+ uint calc_parallel_marking_threads();
// The following three are interaction between CM and
// G1CollectedHeap
// This notifies CM that a root during initial-mark needs to be
- // grayed and it's MT-safe. Currently, we just mark it. But, in the
- // future, we can experiment with pushing it on the stack and we can
- // do this without changing G1CollectedHeap.
- void grayRoot(oop p);
+ // grayed. It is MT-safe.
+ inline void grayRoot(oop obj, size_t word_size);
+
// It's used during evacuation pauses to gray a region, if
// necessary, and it's MT-safe. It assumes that the caller has
// marked any objects on that region. If _should_gray_objects is
@@ -735,6 +730,7 @@ public:
// pushed on the region stack, if it is located below the global
// finger, otherwise we do nothing.
void grayRegionIfNecessary(MemRegion mr);
+
// It's used during evacuation pauses to mark and, if necessary,
// gray a single object and it's MT-safe. It assumes the caller did
// not mark the object. If _should_gray_objects is true and we're
@@ -791,24 +787,40 @@ public:
// Mark in the previous bitmap. NB: this is usually read-only, so use
// this carefully!
- void markPrev(oop p);
+ inline void markPrev(oop p);
+ inline void markNext(oop p);
void clear(oop p);
- // Clears marks for all objects in the given range, for both prev and
- // next bitmaps. NB: the previous bitmap is usually read-only, so use
- // this carefully!
- void clearRangeBothMaps(MemRegion mr);
+ // Clears marks for all objects in the given range, for the prev,
+ // next, or both bitmaps. NB: the previous bitmap is usually
+ // read-only, so use this carefully!
+ void clearRangePrevBitmap(MemRegion mr);
+ void clearRangeNextBitmap(MemRegion mr);
+ void clearRangeBothBitmaps(MemRegion mr);
- // Record the current top of the mark and region stacks; a
- // subsequent oops_do() on the mark stack and
- // invalidate_entries_into_cset() on the region stack will iterate
- // only over indices valid at the time of this call.
- void set_oops_do_bound() {
- _markStack.set_oops_do_bound();
- _regionStack.set_oops_do_bound();
+ // Notify data structures that a GC has started.
+ void note_start_of_gc() {
+ _markStack.note_start_of_gc();
}
+
+ // Notify data structures that a GC is finished.
+ void note_end_of_gc() {
+ _markStack.note_end_of_gc();
+ }
+
// Iterate over the oops in the mark stack and all local queues. It
// also calls invalidate_entries_into_cset() on the region stack.
void oops_do(OopClosure* f);
+
+ // Verify that there are no CSet oops on the stacks (taskqueues /
+ // global mark stack), enqueued SATB buffers, per-thread SATB
+ // buffers, and fingers (global / per-task). The boolean parameters
+ // decide which of the above data structures to verify. If marking
+ // is not in progress, it's a no-op.
+ void verify_no_cset_oops(bool verify_stacks,
+ bool verify_enqueued_buffers,
+ bool verify_thread_buffers,
+ bool verify_fingers) PRODUCT_RETURN;
+
// It is called at the end of an evacuation pause during marking so
// that CM is notified of where the new end of the heap is. It
// doesn't do anything if concurrent_marking_in_progress() is false,
@@ -873,7 +885,7 @@ public:
return _prevMarkBitMap->isMarked(addr);
}
- inline bool do_yield_check(int worker_i = 0);
+ inline bool do_yield_check(uint worker_i = 0);
inline bool should_yield();
// Called to abort the marking cycle after a Full GC takes palce.
@@ -1166,6 +1178,7 @@ public:
// It keeps picking SATB buffers and processing them until no SATB
// buffers are available.
void drain_satb_buffers();
+
// It keeps popping regions from the region stack and processing
// them until the region stack is empty.
void drain_region_stack(BitMapClosure* closure);
diff --git a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp
index fdba22a0ebc..d72db9ea78f 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -153,4 +153,46 @@ inline void CMTask::deal_with_reference(oop obj) {
}
}
+inline void ConcurrentMark::markPrev(oop p) {
+ assert(!_prevMarkBitMap->isMarked((HeapWord*) p), "sanity");
+ // Note we are overriding the read-only view of the prev map here, via
+ // the cast.
+ ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*) p);
+}
+
+inline void ConcurrentMark::markNext(oop p) {
+ assert(!_nextMarkBitMap->isMarked((HeapWord*) p), "sanity");
+ _nextMarkBitMap->mark((HeapWord*) p);
+}
+
+inline void ConcurrentMark::grayRoot(oop obj, size_t word_size) {
+ HeapWord* addr = (HeapWord*) obj;
+
+ // Currently we don't do anything with word_size but we will use it
+ // in the very near future in the liveness calculation piggy-backing
+ // changes.
+
+#ifdef ASSERT
+ HeapRegion* hr = _g1h->heap_region_containing(addr);
+ assert(hr != NULL, "sanity");
+ assert(!hr->is_survivor(), "should not allocate survivors during IM");
+ assert(addr < hr->next_top_at_mark_start(),
+ err_msg("addr: "PTR_FORMAT" hr: "HR_FORMAT" NTAMS: "PTR_FORMAT,
+ addr, HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start()));
+ // We cannot assert that word_size == obj->size() given that obj
+ // might not be in a consistent state (another thread might be in
+ // the process of copying it). So the best thing we can do is to
+ // assert that word_size is under an upper bound which is its
+ // containing region's capacity.
+ assert(word_size * HeapWordSize <= hr->capacity(),
+ err_msg("size: "SIZE_FORMAT" capacity: "SIZE_FORMAT" "HR_FORMAT,
+ word_size * HeapWordSize, hr->capacity(),
+ HR_FORMAT_PARAMS(hr)));
+#endif // ASSERT
+
+ if (!_nextMarkBitMap->isMarked(addr)) {
+ _nextMarkBitMap->parMark(addr);
+ }
+}
+
#endif // SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_INLINE_HPP
diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
index 03280b448f9..f1645f32b0b 100644
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -32,9 +32,11 @@
#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
#include "gc_implementation/g1/g1CollectorPolicy.hpp"
#include "gc_implementation/g1/g1ErgoVerbose.hpp"
+#include "gc_implementation/g1/g1EvacFailure.hpp"
#include "gc_implementation/g1/g1MarkSweep.hpp"
#include "gc_implementation/g1/g1OopClosures.inline.hpp"
#include "gc_implementation/g1/g1RemSet.inline.hpp"
+#include "gc_implementation/g1/heapRegion.inline.hpp"
#include "gc_implementation/g1/heapRegionRemSet.hpp"
#include "gc_implementation/g1/heapRegionSeq.inline.hpp"
#include "gc_implementation/g1/vm_operations_g1.hpp"
@@ -591,17 +593,29 @@ HeapRegion* G1CollectedHeap::new_region(size_t word_size, bool do_expand) {
}
res = new_region_try_secondary_free_list();
}
- if (res == NULL && do_expand) {
+ if (res == NULL && do_expand && _expand_heap_after_alloc_failure) {
+ // Currently, only attempts to allocate GC alloc regions set
+ // do_expand to true. So, we should only reach here during a
+ // safepoint. If this assumption changes we might have to
+ // reconsider the use of _expand_heap_after_alloc_failure.
+ assert(SafepointSynchronize::is_at_safepoint(), "invariant");
+
ergo_verbose1(ErgoHeapSizing,
"attempt heap expansion",
ergo_format_reason("region allocation request failed")
ergo_format_byte("allocation request"),
word_size * HeapWordSize);
if (expand(word_size * HeapWordSize)) {
- // Even though the heap was expanded, it might not have reached
- // the desired size. So, we cannot assume that the allocation
- // will succeed.
+ // Given that expand() succeeded in expanding the heap, and we
+ // always expand the heap by an amount aligned to the heap
+ // region size, the free list should in theory not be empty. So
+ // it would probably be OK to use remove_head(). But the extra
+ // check for NULL is unlikely to be a performance issue here (we
+ // just expanded the heap!) so let's just be conservative and
+ // use remove_head_or_null().
res = _free_list.remove_head_or_null();
+ } else {
+ _expand_heap_after_alloc_failure = false;
}
}
return res;
@@ -1165,9 +1179,9 @@ public:
_g1(g1)
{ }
- void work(int i) {
- RebuildRSOutOfRegionClosure rebuild_rs(_g1, i);
- _g1->heap_region_par_iterate_chunked(&rebuild_rs, i,
+ void work(uint worker_id) {
+ RebuildRSOutOfRegionClosure rebuild_rs(_g1, worker_id);
+ _g1->heap_region_par_iterate_chunked(&rebuild_rs, worker_id,
_g1->workers()->active_workers(),
HeapRegion::RebuildRSClaimValue);
}
@@ -1374,7 +1388,7 @@ bool G1CollectedHeap::do_collection(bool explicit_gc,
// Rebuild remembered sets of all regions.
if (G1CollectedHeap::use_parallel_gc_threads()) {
- int n_workers =
+ uint n_workers =
AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
workers()->active_workers(),
Threads::number_of_non_daemon_threads());
@@ -1838,6 +1852,7 @@ G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) :
_young_list(new YoungList(this)),
_gc_time_stamp(0),
_retained_old_gc_alloc_region(NULL),
+ _expand_heap_after_alloc_failure(true),
_surviving_young_words(NULL),
_full_collections_completed(0),
_in_cset_fast_test(NULL),
@@ -2519,11 +2534,11 @@ void G1CollectedHeap::heap_region_iterate_from(HeapRegion* r,
void
G1CollectedHeap::heap_region_par_iterate_chunked(HeapRegionClosure* cl,
- int worker,
- int no_of_par_workers,
+ uint worker,
+ uint no_of_par_workers,
jint claim_value) {
const size_t regions = n_regions();
- const size_t max_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+ const uint max_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
no_of_par_workers :
1);
assert(UseDynamicNumberOfGCThreads ||
@@ -2605,12 +2620,16 @@ public:
}
};
-void
-G1CollectedHeap::reset_heap_region_claim_values() {
+void G1CollectedHeap::reset_heap_region_claim_values() {
ResetClaimValuesClosure blk;
heap_region_iterate(&blk);
}
+void G1CollectedHeap::reset_cset_heap_region_claim_values() {
+ ResetClaimValuesClosure blk;
+ collection_set_iterate(&blk);
+}
+
#ifdef ASSERT
// This checks whether all regions in the heap have the correct claim
// value. I also piggy-backed on this a check to ensure that the
@@ -2739,7 +2758,7 @@ HeapRegion* G1CollectedHeap::start_cset_region_for_worker(int worker_i) {
result = g1_policy()->collection_set();
if (G1CollectedHeap::use_parallel_gc_threads()) {
size_t cs_size = g1_policy()->cset_region_length();
- int active_workers = workers()->active_workers();
+ uint active_workers = workers()->active_workers();
assert(UseDynamicNumberOfGCThreads ||
active_workers == workers()->total_workers(),
"Unless dynamic should use total workers");
@@ -3000,14 +3019,20 @@ public:
} else {
VerifyObjsInRegionClosure not_dead_yet_cl(r, _vo);
r->object_iterate(¬_dead_yet_cl);
- if (r->max_live_bytes() < not_dead_yet_cl.live_bytes()) {
- gclog_or_tty->print_cr("["PTR_FORMAT","PTR_FORMAT"] "
- "max_live_bytes "SIZE_FORMAT" "
- "< calculated "SIZE_FORMAT,
- r->bottom(), r->end(),
- r->max_live_bytes(),
+ if (_vo != VerifyOption_G1UseNextMarking) {
+ if (r->max_live_bytes() < not_dead_yet_cl.live_bytes()) {
+ gclog_or_tty->print_cr("["PTR_FORMAT","PTR_FORMAT"] "
+ "max_live_bytes "SIZE_FORMAT" "
+ "< calculated "SIZE_FORMAT,
+ r->bottom(), r->end(),
+ r->max_live_bytes(),
not_dead_yet_cl.live_bytes());
- _failures = true;
+ _failures = true;
+ }
+ } else {
+ // When vo == UseNextMarking we cannot currently do a sanity
+ // check on the live bytes as the calculation has not been
+ // finalized yet.
}
}
}
@@ -3075,10 +3100,10 @@ public:
return _failures;
}
- void work(int worker_i) {
+ void work(uint worker_id) {
HandleMark hm;
VerifyRegionClosure blk(_allow_dirty, true, _vo);
- _g1h->heap_region_par_iterate_chunked(&blk, worker_i,
+ _g1h->heap_region_par_iterate_chunked(&blk, worker_id,
_g1h->workers()->active_workers(),
HeapRegion::ParVerifyClaimValue);
if (blk.failures()) {
@@ -3641,25 +3666,6 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
}
perm_gen()->save_marks();
- // We must do this before any possible evacuation that should propagate
- // marks.
- if (mark_in_progress()) {
- double start_time_sec = os::elapsedTime();
-
- _cm->drainAllSATBBuffers();
- double finish_mark_ms = (os::elapsedTime() - start_time_sec) * 1000.0;
- g1_policy()->record_satb_drain_time(finish_mark_ms);
- }
- // Record the number of elements currently on the mark stack, so we
- // only iterate over these. (Since evacuation may add to the mark
- // stack, doing more exposes race conditions.) If no mark is in
- // progress, this will be zero.
- _cm->set_oops_do_bound();
-
- if (mark_in_progress()) {
- concurrent_mark()->newCSet();
- }
-
#if YOUNG_LIST_VERBOSE
gclog_or_tty->print_cr("\nBefore choosing collection set.\nYoung_list:");
_young_list->print();
@@ -3668,6 +3674,16 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
g1_policy()->choose_collection_set(target_pause_time_ms);
+ _cm->note_start_of_gc();
+ // We should not verify the per-thread SATB buffers given that
+ // we have not filtered them yet (we'll do so during the
+ // GC). We also call this after choose_collection_set() to
+ // ensure that the CSet has been finalized.
+ _cm->verify_no_cset_oops(true /* verify_stacks */,
+ true /* verify_enqueued_buffers */,
+ false /* verify_thread_buffers */,
+ true /* verify_fingers */);
+
if (_hr_printer.is_active()) {
HeapRegion* hr = g1_policy()->collection_set();
while (hr != NULL) {
@@ -3684,16 +3700,6 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
}
}
- // We have chosen the complete collection set. If marking is
- // active then, we clear the region fields of any of the
- // concurrent marking tasks whose region fields point into
- // the collection set as these values will become stale. This
- // will cause the owning marking threads to claim a new region
- // when marking restarts.
- if (mark_in_progress()) {
- concurrent_mark()->reset_active_task_region_fields_in_cset();
- }
-
#ifdef ASSERT
VerifyCSetClosure cl;
collection_set_iterate(&cl);
@@ -3707,6 +3713,16 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
// Actually do the work...
evacuate_collection_set();
+ // We do this to mainly verify the per-thread SATB buffers
+ // (which have been filtered by now) since we didn't verify
+ // them earlier. No point in re-checking the stacks / enqueued
+ // buffers given that the CSet has not changed since last time
+ // we checked.
+ _cm->verify_no_cset_oops(false /* verify_stacks */,
+ false /* verify_enqueued_buffers */,
+ true /* verify_thread_buffers */,
+ true /* verify_fingers */);
+
free_collection_set(g1_policy()->collection_set());
g1_policy()->clear_collection_set();
@@ -3775,6 +3791,8 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
size_t expand_bytes = g1_policy()->expansion_amount();
if (expand_bytes > 0) {
size_t bytes_before = capacity();
+ // No need for an ergo verbose message here,
+ // expansion_amount() does this when it returns a value > 0.
if (!expand(expand_bytes)) {
// We failed to expand the heap so let's verify that
// committed/uncommitted amount match the backing store
@@ -3784,6 +3802,14 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
}
}
+ // We redo the verificaiton but now wrt to the new CSet which
+ // has just got initialized after the previous CSet was freed.
+ _cm->verify_no_cset_oops(true /* verify_stacks */,
+ true /* verify_enqueued_buffers */,
+ true /* verify_thread_buffers */,
+ true /* verify_fingers */);
+ _cm->note_end_of_gc();
+
double end_time_sec = os::elapsedTime();
double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS;
g1_policy()->record_pause_time_ms(pause_time_ms);
@@ -3831,21 +3857,6 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
// CM reference discovery will be re-enabled if necessary.
}
- {
- size_t expand_bytes = g1_policy()->expansion_amount();
- if (expand_bytes > 0) {
- size_t bytes_before = capacity();
- // No need for an ergo verbose message here,
- // expansion_amount() does this when it returns a value > 0.
- if (!expand(expand_bytes)) {
- // We failed to expand the heap so let's verify that
- // committed/uncommitted amount match the backing store
- assert(capacity() == _g1_storage.committed_size(), "committed size mismatch");
- assert(max_capacity() == _g1_storage.reserved_size(), "reserved size mismatch");
- }
- }
- }
-
// We should do this after we potentially expand the heap so
// that all the COMMIT events are generated before the end GC
// event, and after we retire the GC alloc regions so that all
@@ -3949,6 +3960,8 @@ void G1CollectedHeap::init_gc_alloc_regions() {
// we allocate to in the region sets. We'll re-add it later, when
// it's retired again.
_old_set.remove(retained_region);
+ bool during_im = g1_policy()->during_initial_mark_pause();
+ retained_region->note_start_of_copying(during_im);
_old_gc_alloc_region.set(retained_region);
_hr_printer.reuse(retained_region);
}
@@ -3985,157 +3998,26 @@ void G1CollectedHeap::finalize_for_evac_failure() {
_evac_failure_scan_stack = NULL;
}
-class UpdateRSetDeferred : public OopsInHeapRegionClosure {
-private:
- G1CollectedHeap* _g1;
- DirtyCardQueue *_dcq;
- CardTableModRefBS* _ct_bs;
-
-public:
- UpdateRSetDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
- _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) {}
-
- virtual void do_oop(narrowOop* p) { do_oop_work(p); }
- virtual void do_oop( oop* p) { do_oop_work(p); }
- template