diff --git a/hotspot/.mx.jvmci/.pydevproject b/hotspot/.mx.jvmci/.pydevproject index 93dc745f1db..b127d241c79 100644 --- a/hotspot/.mx.jvmci/.pydevproject +++ b/hotspot/.mx.jvmci/.pydevproject @@ -1,11 +1,9 @@ - - - + Default python 2.7 -/mx.jvmci +/.mx.jvmci /mx diff --git a/hotspot/make/lib/Lib-jdk.hotspot.agent.gmk b/hotspot/make/lib/Lib-jdk.hotspot.agent.gmk index 9f78ce46152..8942652cce7 100644 --- a/hotspot/make/lib/Lib-jdk.hotspot.agent.gmk +++ b/hotspot/make/lib/Lib-jdk.hotspot.agent.gmk @@ -61,9 +61,8 @@ ifeq ($(OPENJDK_TARGET_OS), linux) else ifeq ($(OPENJDK_TARGET_OS), solaris) SA_TOOLCHAIN := TOOLCHAIN_LINK_CXX - COMMON_CFLAGS := -DSOLARIS_11_B159_OR_LATER - SA_CFLAGS := $(CFLAGS_JDKLIB) $(COMMON_CFLAGS) - SA_CXXFLAGS := $(CXXFLAGS_JDKLIB) $(COMMON_CFLAGS) + SA_CFLAGS := $(CFLAGS_JDKLIB) + SA_CXXFLAGS := $(CXXFLAGS_JDKLIB) SA_LDFLAGS := $(subst -Wl$(COMMA)-z$(COMMA)defs,, $(LDFLAGS_JDKLIB)) \ -mt $(LDFLAGS_CXX_JDK) SA_LIBS := -ldl -ldemangle -lthread -lc @@ -75,7 +74,7 @@ else ifeq ($(OPENJDK_TARGET_OS), macosx) -mstack-alignment=16 -fPIC SA_LDFLAGS := $(LDFLAGS_JDKLIB) SA_LIBS := -framework Foundation -framework JavaNativeFoundation \ - -framework Security -framework CoreFoundation + -framework JavaRuntimeSupport -framework Security -framework CoreFoundation else ifeq ($(OPENJDK_TARGET_OS), windows) SA_NAME := sawindbg diff --git a/hotspot/make/test/JtregNative.gmk b/hotspot/make/test/JtregNative.gmk index a1ede807462..78e78d774e5 100644 --- a/hotspot/make/test/JtregNative.gmk +++ b/hotspot/make/test/JtregNative.gmk @@ -47,11 +47,13 @@ BUILD_HOTSPOT_JTREG_NATIVE_SRC := \ $(HOTSPOT_TOPDIR)/test/runtime/jni/checked \ $(HOTSPOT_TOPDIR)/test/runtime/jni/PrivateInterfaceMethods \ $(HOTSPOT_TOPDIR)/test/runtime/jni/ToStringInInterfaceTest \ + $(HOTSPOT_TOPDIR)/test/runtime/jni/CalleeSavedRegisters \ $(HOTSPOT_TOPDIR)/test/runtime/modules/getModuleJNI \ $(HOTSPOT_TOPDIR)/test/runtime/SameObject \ $(HOTSPOT_TOPDIR)/test/runtime/BoolReturn \ $(HOTSPOT_TOPDIR)/test/compiler/floatingpoint/ \ $(HOTSPOT_TOPDIR)/test/compiler/calls \ + $(HOTSPOT_TOPDIR)/test/compiler/native \ $(HOTSPOT_TOPDIR)/test/serviceability/jvmti/GetNamedModule \ $(HOTSPOT_TOPDIR)/test/testlibrary/jvmti \ $(HOTSPOT_TOPDIR)/test/compiler/jvmci/jdk.vm.ci.code.test \ @@ -89,6 +91,11 @@ ifeq ($(OPENJDK_TARGET_OS), linux) BUILD_HOTSPOT_JTREG_LIBRARIES_LDFLAGS_libtest-rwx := -z execstack BUILD_HOTSPOT_JTREG_EXECUTABLES_LIBS_exeinvoke := -ljvm -lpthread BUILD_TEST_invoke_exeinvoke.c_OPTIMIZATION := NONE + BUILD_HOTSPOT_JTREG_EXECUTABLES_LDFLAGS_exeFPRegs := -ldl +endif + +ifeq ($(OPENJDK_TARGET_OS), windows) + BUILD_HOTSPOT_JTREG_EXECUTABLES_CFLAGS_exeFPRegs := -MT endif BUILD_HOTSPOT_JTREG_OUTPUT_DIR := $(BUILD_OUTPUT)/support/test/hotspot/jtreg/native diff --git a/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp index a2d1631bf54..f4131ab5098 100644 --- a/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp +++ b/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp @@ -2277,14 +2277,6 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { __ br(Assembler::HI, *stub->entry()); } - // FIXME: The logic in LIRGenerator::arraycopy_helper clears - // length_positive_check if the source of our length operand is an - // arraylength. However, that arraylength might be zero, and the - // stub that we're about to call contains an assertion that count != - // 0 . So we make this check purely in order not to trigger an - // assertion failure. - __ cbzw(length, *stub->continuation()); - if (flags & LIR_OpArrayCopy::type_check) { // We don't know the array types are compatible if (basic_type != T_OBJECT) { diff --git a/hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp index 865ee69d1f7..9a16dd76967 100644 --- a/hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp +++ b/hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -72,6 +72,7 @@ define_pd_global(bool, OptoScheduling, false); define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoRegScheduling, false); define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); +define_pd_global(bool, IdealizeClearArrayNode, true); define_pd_global(intx, ReservedCodeCacheSize, 48*M); define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); diff --git a/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp index 8963353ede3..b74bce2d415 100644 --- a/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp +++ b/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp @@ -142,6 +142,10 @@ bool frame::safe_for_sender(JavaThread *thread) { } sender_sp = _unextended_sp + _cb->frame_size(); + // Is sender_sp safe? + if ((address)sender_sp >= thread->stack_base()) { + return false; + } sender_unextended_sp = sender_sp; sender_pc = (address) *(sender_sp-1); // Note: frame::sender_sp_offset is only valid for compiled frame @@ -200,8 +204,15 @@ bool frame::safe_for_sender(JavaThread *thread) { } // construct the potential sender + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); - return sender.is_entry_frame_valid(thread); + + // Validate the JavaCallWrapper an entry frame must have + address jcw = (address)sender.entry_frame_call_wrapper(); + + bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp()); + + return jcw_safe; } CompiledMethod* nm = sender_blob->as_compiled_method_or_null(); diff --git a/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp index 6fb7e5f11d1..3226ef5cf72 100644 --- a/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp +++ b/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp @@ -39,6 +39,7 @@ define_pd_global(bool, ImplicitNullChecks, true); // Generate code for im define_pd_global(bool, TrapBasedNullChecks, false); define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast +define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment. define_pd_global(intx, CodeEntryAlignment, 64); define_pd_global(intx, OptoLoopAlignment, 16); define_pd_global(intx, InlineFrequencyCount, 100); diff --git a/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp index 7cd7e95c258..8f173648f32 100644 --- a/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp +++ b/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp @@ -1962,6 +1962,8 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // due to cache line collision. __ serialize_memory(rthread, r2); } + } else { + __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset())); } // check for safepoint operation in progress and/or pending suspend requests diff --git a/hotspot/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp index 9750ba1b409..8ff0bc6b962 100644 --- a/hotspot/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp +++ b/hotspot/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp @@ -476,6 +476,7 @@ address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, } #endif } +#endif // handle exceptions { Label L; diff --git a/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp b/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp index 9c684e4aca2..a0530449a71 100644 --- a/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp +++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp @@ -2102,7 +2102,9 @@ class Assembler : public AbstractAssembler { inline void mfvscr( VectorRegister d); // Vector-Scalar (VSX) instructions. + inline void lxvd2x( VectorSRegister d, Register a); inline void lxvd2x( VectorSRegister d, Register a, Register b); + inline void stxvd2x( VectorSRegister d, Register a); inline void stxvd2x( VectorSRegister d, Register a, Register b); inline void mtvrd( VectorRegister d, Register a); inline void mfvrd( Register a, VectorRegister d); diff --git a/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp b/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp index f46f47dc311..45f4ec57e3e 100644 --- a/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp +++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp @@ -734,8 +734,10 @@ inline void Assembler::lvsl( VectorRegister d, Register s1, Register s2) { emit inline void Assembler::lvsr( VectorRegister d, Register s1, Register s2) { emit_int32( LVSR_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } // Vector-Scalar (VSX) instructions. -inline void Assembler::lxvd2x (VectorSRegister d, Register s1, Register s2) { emit_int32( LXVD2X_OPCODE | vsrt(d) | ra(s1) | rb(s2)); } -inline void Assembler::stxvd2x(VectorSRegister d, Register s1, Register s2) { emit_int32( STXVD2X_OPCODE | vsrt(d) | ra(s1) | rb(s2)); } +inline void Assembler::lxvd2x (VectorSRegister d, Register s1) { emit_int32( LXVD2X_OPCODE | vsrt(d) | ra(0) | rb(s1)); } +inline void Assembler::lxvd2x (VectorSRegister d, Register s1, Register s2) { emit_int32( LXVD2X_OPCODE | vsrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::stxvd2x(VectorSRegister d, Register s1) { emit_int32( STXVD2X_OPCODE | vsrt(d) | ra(0) | rb(s1)); } +inline void Assembler::stxvd2x(VectorSRegister d, Register s1, Register s2) { emit_int32( STXVD2X_OPCODE | vsrt(d) | ra0mem(s1) | rb(s2)); } inline void Assembler::mtvrd( VectorRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vrt(d) | ra(a) | 1u); } // 1u: d is treated as Vector (VMX/Altivec). inline void Assembler::mfvrd( Register a, VectorRegister d) { emit_int32( MFVSRD_OPCODE | vrt(d) | ra(a) | 1u); } // 1u: d is treated as Vector (VMX/Altivec). diff --git a/hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.cpp b/hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.cpp index 0f52feff1f1..d9e17216563 100644 --- a/hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.cpp +++ b/hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.cpp @@ -1894,15 +1894,18 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { __ beq(combined_check, slow); } + // If the compiler was not able to prove that exact type of the source or the destination + // of the arraycopy is an array type, check at runtime if the source or the destination is + // an instance type. if (flags & LIR_OpArrayCopy::type_check) { - if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) { + if (!(flags & LIR_OpArrayCopy::dst_objarray)) { __ load_klass(tmp, dst); __ lwz(tmp2, in_bytes(Klass::layout_helper_offset()), tmp); __ cmpwi(CCR0, tmp2, Klass::_lh_neutral_value); __ bge(CCR0, slow); } - if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) { + if (!(flags & LIR_OpArrayCopy::src_objarray)) { __ load_klass(tmp, src); __ lwz(tmp2, in_bytes(Klass::layout_helper_offset()), tmp); __ cmpwi(CCR0, tmp2, Klass::_lh_neutral_value); diff --git a/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp b/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp index 0dc6714f7e4..e755e982b9e 100644 --- a/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp +++ b/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2015 SAP SE. All rights reserved. + * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2016 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -80,6 +80,7 @@ define_pd_global(bool, SuperWordLoopUnrollAnalysis, false); // loc = x.f // NullCheck loc define_pd_global(bool, OptoScheduling, false); +define_pd_global(bool, IdealizeClearArrayNode, true); define_pd_global(intx, InitialCodeCacheSize, 2048*K); // Integral multiple of CodeCacheExpansionSize define_pd_global(intx, ReservedCodeCacheSize, 256*M); diff --git a/hotspot/src/cpu/ppc/vm/frame_ppc.cpp b/hotspot/src/cpu/ppc/vm/frame_ppc.cpp index 08463758370..131a931c2c1 100644 --- a/hotspot/src/cpu/ppc/vm/frame_ppc.cpp +++ b/hotspot/src/cpu/ppc/vm/frame_ppc.cpp @@ -221,6 +221,7 @@ void frame::describe_pd(FrameValues& values, int frame_no) { values.describe(frame_no, (intptr_t*)&(get_ijava_state()->name), #name); DESCRIBE_ADDRESS(method); + DESCRIBE_ADDRESS(mirror); DESCRIBE_ADDRESS(locals); DESCRIBE_ADDRESS(monitors); DESCRIBE_ADDRESS(cpoolCache); diff --git a/hotspot/src/cpu/ppc/vm/frame_ppc.hpp b/hotspot/src/cpu/ppc/vm/frame_ppc.hpp index ae6847ecab2..ccec598c1ad 100644 --- a/hotspot/src/cpu/ppc/vm/frame_ppc.hpp +++ b/hotspot/src/cpu/ppc/vm/frame_ppc.hpp @@ -257,8 +257,7 @@ struct ijava_state { #ifdef ASSERT - uint64_t ijava_reserved; // Used for assertion. - uint64_t ijava_reserved2; // Inserted for alignment. + uint64_t ijava_reserved; // Used for assertion. #endif uint64_t method; uint64_t mirror; @@ -274,7 +273,6 @@ uint64_t oop_tmp; uint64_t lresult; uint64_t fresult; - // Aligned to frame::alignment_in_bytes (16). }; enum { diff --git a/hotspot/src/cpu/ppc/vm/globals_ppc.hpp b/hotspot/src/cpu/ppc/vm/globals_ppc.hpp index 662dc3c6bf1..953a4d2134f 100644 --- a/hotspot/src/cpu/ppc/vm/globals_ppc.hpp +++ b/hotspot/src/cpu/ppc/vm/globals_ppc.hpp @@ -56,10 +56,11 @@ define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); // Use large code-entry alignment. -define_pd_global(intx, CodeEntryAlignment, 128); -define_pd_global(intx, OptoLoopAlignment, 16); -define_pd_global(intx, InlineFrequencyCount, 100); -define_pd_global(intx, InlineSmallCode, 1500); +define_pd_global(uintx, CodeCacheSegmentSize, 128); +define_pd_global(intx, CodeEntryAlignment, 128); +define_pd_global(intx, OptoLoopAlignment, 16); +define_pd_global(intx, InlineFrequencyCount, 100); +define_pd_global(intx, InlineSmallCode, 1500); // Flags for template interpreter. define_pd_global(bool, RewriteBytecodes, true); diff --git a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp index 417e6a3a27a..110ed85e12e 100644 --- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp +++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp @@ -1922,7 +1922,7 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, // Check the supertype display: if (must_load_sco) { // The super check offset is always positive... - lwz(check_cache_offset, sco_offset, super_klass); + lwz(check_cache_offset, sco_offset, super_klass); super_check_offset = RegisterOrConstant(check_cache_offset); // super_check_offset is register. assert_different_registers(sub_klass, super_klass, cached_super, super_check_offset.as_register()); @@ -3325,12 +3325,10 @@ void MacroAssembler::load_klass(Register dst, Register src) { } } -void MacroAssembler::load_mirror(Register mirror, Register method) { - const int mirror_offset = in_bytes(Klass::java_mirror_offset()); - ld(mirror, in_bytes(Method::const_offset()), method); - ld(mirror, in_bytes(ConstMethod::constants_offset()), mirror); +void MacroAssembler::load_mirror_from_const_method(Register mirror, Register const_method) { + ld(mirror, in_bytes(ConstMethod::constants_offset()), const_method); ld(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror); - ld(mirror, mirror_offset, mirror); + ld(mirror, in_bytes(Klass::java_mirror_offset()), mirror); } // Clear Array @@ -4345,8 +4343,8 @@ void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len * @param t3 volatile register */ void MacroAssembler::kernel_crc32_1word_vpmsumd(Register crc, Register buf, Register len, Register table, - Register constants, Register barretConstants, - Register t0, Register t1, Register t2, Register t3, Register t4) { + Register constants, Register barretConstants, + Register t0, Register t1, Register t2, Register t3, Register t4) { assert_different_registers(crc, buf, len, table); Label L_alignedHead, L_tail, L_alignTail, L_start, L_end; diff --git a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp index 3e481d84cf9..7763c86a1dd 100644 --- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp +++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp @@ -723,7 +723,7 @@ class MacroAssembler: public Assembler { void store_klass(Register dst_oop, Register klass, Register tmp = R0); void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified. - void load_mirror(Register mirror, Register method); + void load_mirror_from_const_method(Register mirror, Register const_method); static int instr_size_for_decode_klass_not_null(); void decode_klass_not_null(Register dst, Register src = noreg); diff --git a/hotspot/src/cpu/ppc/vm/ppc.ad b/hotspot/src/cpu/ppc/vm/ppc.ad index 8bacecaecb2..4c50b9b0448 100644 --- a/hotspot/src/cpu/ppc/vm/ppc.ad +++ b/hotspot/src/cpu/ppc/vm/ppc.ad @@ -11237,6 +11237,17 @@ instruct cmpP_reg_reg(flagsReg crx, iRegP_N2P src1, iRegP_N2P src2) %{ ins_pipe(pipe_class_compare); %} +instruct cmpP_reg_null(flagsReg crx, iRegP_N2P src1, immP_0or1 src2) %{ + match(Set crx (CmpP src1 src2)); + format %{ "CMPLDI $crx, $src1, $src2 \t// ptr" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_cmpl); + __ cmpldi($crx$$CondRegister, $src1$$Register, (int)((short)($src2$$constant & 0xFFFF))); + %} + ins_pipe(pipe_class_compare); +%} + // Used in postalloc expand. instruct cmpP_reg_imm16(flagsReg crx, iRegPsrc src1, immL16 src2) %{ // This match rule prevents reordering of node before a safepoint. diff --git a/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp b/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp index e638c5a6773..4bd0c833ab8 100644 --- a/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp +++ b/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp @@ -1220,8 +1220,8 @@ class StubGenerator: public StubCodeGenerator { __ bind(l_10); // Use loop with VSX load/store instructions to // copy 32 elements a time. - __ lxvd2x(tmp_vsr1, 0, R3_ARG1); // Load src - __ stxvd2x(tmp_vsr1, 0, R4_ARG2); // Store to dst + __ lxvd2x(tmp_vsr1, R3_ARG1); // Load src + __ stxvd2x(tmp_vsr1, R4_ARG2); // Store to dst __ lxvd2x(tmp_vsr2, tmp1, R3_ARG1); // Load src + 16 __ stxvd2x(tmp_vsr2, tmp1, R4_ARG2); // Store to dst + 16 __ addi(R3_ARG1, R3_ARG1, 32); // Update src+=32 @@ -1486,8 +1486,8 @@ class StubGenerator: public StubCodeGenerator { __ bind(l_9); // Use loop with VSX load/store instructions to // copy 16 elements a time. - __ lxvd2x(tmp_vsr1, 0, R3_ARG1); // Load from src. - __ stxvd2x(tmp_vsr1, 0, R4_ARG2); // Store to dst. + __ lxvd2x(tmp_vsr1, R3_ARG1); // Load from src. + __ stxvd2x(tmp_vsr1, R4_ARG2); // Store to dst. __ lxvd2x(tmp_vsr2, R3_ARG1, tmp1); // Load from src + 16. __ stxvd2x(tmp_vsr2, R4_ARG2, tmp1); // Store to dst + 16. __ addi(R3_ARG1, R3_ARG1, 32); // Update src+=32. @@ -1677,8 +1677,8 @@ class StubGenerator: public StubCodeGenerator { __ bind(l_7); // Use loop with VSX load/store instructions to // copy 8 elements a time. - __ lxvd2x(tmp_vsr1, 0, R3_ARG1); // Load src - __ stxvd2x(tmp_vsr1, 0, R4_ARG2); // Store to dst + __ lxvd2x(tmp_vsr1, R3_ARG1); // Load src + __ stxvd2x(tmp_vsr1, R4_ARG2); // Store to dst __ lxvd2x(tmp_vsr2, tmp1, R3_ARG1); // Load src + 16 __ stxvd2x(tmp_vsr2, tmp1, R4_ARG2); // Store to dst + 16 __ addi(R3_ARG1, R3_ARG1, 32); // Update src+=32 @@ -1745,13 +1745,16 @@ class StubGenerator: public StubCodeGenerator { // Do reverse copy. We assume the case of actual overlap is rare enough // that we don't have to optimize it. - Label l_1, l_2, l_3, l_4, l_5, l_6; + Label l_1, l_2, l_3, l_4, l_5, l_6, l_7; Register tmp1 = R6_ARG4; Register tmp2 = R7_ARG5; Register tmp3 = R8_ARG6; Register tmp4 = R0; + VectorSRegister tmp_vsr1 = VSR1; + VectorSRegister tmp_vsr2 = VSR2; + { // FasterArrayCopy __ cmpwi(CCR0, R5_ARG3, 0); __ beq(CCR0, l_6); @@ -1761,6 +1764,25 @@ class StubGenerator: public StubCodeGenerator { __ add(R4_ARG2, R4_ARG2, R5_ARG3); __ srdi(R5_ARG3, R5_ARG3, 2); + if (!aligned) { + // check if arrays have same alignment mod 8. + __ xorr(tmp1, R3_ARG1, R4_ARG2); + __ andi_(R0, tmp1, 7); + // Not the same alignment, but ld and std just need to be 4 byte aligned. + __ bne(CCR0, l_7); // to OR from is 8 byte aligned -> copy 2 at a time + + // copy 1 element to align to and from on an 8 byte boundary + __ andi_(R0, R3_ARG1, 7); + __ beq(CCR0, l_7); + + __ addi(R3_ARG1, R3_ARG1, -4); + __ addi(R4_ARG2, R4_ARG2, -4); + __ addi(R5_ARG3, R5_ARG3, -1); + __ lwzx(tmp2, R3_ARG1); + __ stwx(tmp2, R4_ARG2); + __ bind(l_7); + } + __ cmpwi(CCR0, R5_ARG3, 7); __ ble(CCR0, l_5); // copy 1 at a time if less than 8 elements remain @@ -1768,6 +1790,7 @@ class StubGenerator: public StubCodeGenerator { __ andi(R5_ARG3, R5_ARG3, 7); __ mtctr(tmp1); + if (!VM_Version::has_vsx()) { __ bind(l_4); // Use unrolled version for mass copying (copy 4 elements a time). // Load feeding store gets zero latency on Power6, however not on Power5. @@ -1783,6 +1806,40 @@ class StubGenerator: public StubCodeGenerator { __ std(tmp2, 8, R4_ARG2); __ std(tmp1, 0, R4_ARG2); __ bdnz(l_4); + } else { // Processor supports VSX, so use it to mass copy. + // Prefetch the data into the L2 cache. + __ dcbt(R3_ARG1, 0); + + // If supported set DSCR pre-fetch to deepest. + if (VM_Version::has_mfdscr()) { + __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); + __ mtdscr(tmp2); + } + + __ li(tmp1, 16); + + // Backbranch target aligned to 32-byte. Not 16-byte align as + // loop contains < 8 instructions that fit inside a single + // i-cache sector. + __ align(32); + + __ bind(l_4); + // Use loop with VSX load/store instructions to + // copy 8 elements a time. + __ addi(R3_ARG1, R3_ARG1, -32); // Update src-=32 + __ addi(R4_ARG2, R4_ARG2, -32); // Update dsc-=32 + __ lxvd2x(tmp_vsr2, tmp1, R3_ARG1); // Load src+16 + __ lxvd2x(tmp_vsr1, R3_ARG1); // Load src + __ stxvd2x(tmp_vsr2, tmp1, R4_ARG2); // Store to dst+16 + __ stxvd2x(tmp_vsr1, R4_ARG2); // Store to dst + __ bdnz(l_4); + + // Restore DSCR pre-fetch value. + if (VM_Version::has_mfdscr()) { + __ load_const_optimized(tmp2, VM_Version::_dscr_val); + __ mtdscr(tmp2); + } + } __ cmpwi(CCR0, R5_ARG3, 0); __ beq(CCR0, l_6); @@ -1892,8 +1949,8 @@ class StubGenerator: public StubCodeGenerator { __ bind(l_5); // Use loop with VSX load/store instructions to // copy 4 elements a time. - __ lxvd2x(tmp_vsr1, 0, R3_ARG1); // Load src - __ stxvd2x(tmp_vsr1, 0, R4_ARG2); // Store to dst + __ lxvd2x(tmp_vsr1, R3_ARG1); // Load src + __ stxvd2x(tmp_vsr1, R4_ARG2); // Store to dst __ lxvd2x(tmp_vsr2, tmp1, R3_ARG1); // Load src + 16 __ stxvd2x(tmp_vsr2, tmp1, R4_ARG2); // Store to dst + 16 __ addi(R3_ARG1, R3_ARG1, 32); // Update src+=32 @@ -1962,6 +2019,9 @@ class StubGenerator: public StubCodeGenerator { Register tmp3 = R8_ARG6; Register tmp4 = R0; + VectorSRegister tmp_vsr1 = VSR1; + VectorSRegister tmp_vsr2 = VSR2; + Label l_1, l_2, l_3, l_4, l_5; __ cmpwi(CCR0, R5_ARG3, 0); @@ -1980,6 +2040,7 @@ class StubGenerator: public StubCodeGenerator { __ andi(R5_ARG3, R5_ARG3, 3); __ mtctr(tmp1); + if (!VM_Version::has_vsx()) { __ bind(l_4); // Use unrolled version for mass copying (copy 4 elements a time). // Load feeding store gets zero latency on Power6, however not on Power5. @@ -1995,6 +2056,40 @@ class StubGenerator: public StubCodeGenerator { __ std(tmp2, 8, R4_ARG2); __ std(tmp1, 0, R4_ARG2); __ bdnz(l_4); + } else { // Processor supports VSX, so use it to mass copy. + // Prefetch the data into the L2 cache. + __ dcbt(R3_ARG1, 0); + + // If supported set DSCR pre-fetch to deepest. + if (VM_Version::has_mfdscr()) { + __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7); + __ mtdscr(tmp2); + } + + __ li(tmp1, 16); + + // Backbranch target aligned to 32-byte. Not 16-byte align as + // loop contains < 8 instructions that fit inside a single + // i-cache sector. + __ align(32); + + __ bind(l_4); + // Use loop with VSX load/store instructions to + // copy 4 elements a time. + __ addi(R3_ARG1, R3_ARG1, -32); // Update src-=32 + __ addi(R4_ARG2, R4_ARG2, -32); // Update dsc-=32 + __ lxvd2x(tmp_vsr2, tmp1, R3_ARG1); // Load src+16 + __ lxvd2x(tmp_vsr1, R3_ARG1); // Load src + __ stxvd2x(tmp_vsr2, tmp1, R4_ARG2); // Store to dst+16 + __ stxvd2x(tmp_vsr1, R4_ARG2); // Store to dst + __ bdnz(l_4); + + // Restore DSCR pre-fetch value. + if (VM_Version::has_mfdscr()) { + __ load_const_optimized(tmp2, VM_Version::_dscr_val); + __ mtdscr(tmp2); + } + } __ cmpwi(CCR0, R5_ARG3, 0); __ beq(CCR0, l_1); diff --git a/hotspot/src/cpu/ppc/vm/templateInterpreterGenerator_ppc.cpp b/hotspot/src/cpu/ppc/vm/templateInterpreterGenerator_ppc.cpp index 959ee898504..cadde26b5be 100644 --- a/hotspot/src/cpu/ppc/vm/templateInterpreterGenerator_ppc.cpp +++ b/hotspot/src/cpu/ppc/vm/templateInterpreterGenerator_ppc.cpp @@ -915,7 +915,9 @@ void TemplateInterpreterGenerator::lock_method(Register Rflags, Register Rscratc __ b(Ldone); __ bind(Lstatic); // Static case: Lock the java mirror - __ load_mirror(Robj_to_lock, R19_method); + // Load mirror from interpreter frame. + __ ld(Robj_to_lock, _abi(callers_sp), R1_SP); + __ ld(Robj_to_lock, _ijava_state_neg(mirror), Robj_to_lock); __ bind(Ldone); __ verify_oop(Robj_to_lock); @@ -1077,12 +1079,12 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call, Regist __ resize_frame(parent_frame_resize, R11_scratch1); __ std(R12_scratch2, _abi(lr), R1_SP); + // Get mirror and store it in the frame as GC root for this Method*. + __ load_mirror_from_const_method(R12_scratch2, Rconst_method); + __ addi(R26_monitor, R1_SP, - frame::ijava_state_size); __ addi(R15_esp, R26_monitor, - Interpreter::stackElementSize); - // Get mirror and store it in the frame as GC root for this Method*. - __ load_mirror(R12_scratch2, R19_method); - // Store values. // R15_esp, R14_bcp, R26_monitor, R28_mdx are saved at java calls // in InterpreterMacroAssembler::call_from_interpreter. @@ -1380,13 +1382,12 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { __ testbitdi(CCR0, R0, access_flags, JVM_ACC_STATIC_BIT); __ bfalse(CCR0, method_is_not_static); - __ load_mirror(R12_scratch2, R19_method); - // state->_native_mirror = mirror; - - __ ld(R11_scratch1, 0, R1_SP); - __ std(R12_scratch2/*mirror*/, _ijava_state_neg(oop_tmp), R11_scratch1); + __ ld(R11_scratch1, _abi(callers_sp), R1_SP); + // Load mirror from interpreter frame. + __ ld(R12_scratch2, _ijava_state_neg(mirror), R11_scratch1); // R4_ARG2 = &state->_oop_temp; __ addi(R4_ARG2, R11_scratch1, _ijava_state_neg(oop_tmp)); + __ std(R12_scratch2/*mirror*/, _ijava_state_neg(oop_tmp), R11_scratch1); BIND(method_is_not_static); } @@ -2157,12 +2158,12 @@ address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state // Restoration of lr done by remove_activation. switch (state) { // Narrow result if state is itos but result type is smaller. - case itos: __ narrow(R17_tos); /* fall through */ - case ltos: case btos: case ztos: case ctos: case stos: + case itos: __ narrow(R17_tos); /* fall through */ + case ltos: case atos: __ mr(R3_RET, R17_tos); break; case ftos: case dtos: __ fmr(F1_RET, F15_ftos); break; diff --git a/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp b/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp index 610cf637575..517a304d4e3 100644 --- a/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp +++ b/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp @@ -2133,10 +2133,6 @@ void TemplateTable::_return(TosState state) { // since compiled code callers expect the result to already be narrowed. case itos: __ narrow(R17_tos); /* fall through */ case ltos: - case btos: - case ztos: - case ctos: - case stos: case atos: __ mr(R3_RET, R17_tos); break; case ftos: case dtos: __ fmr(F1_RET, F15_ftos); break; @@ -2548,7 +2544,6 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr assert(branch_table[ztos] == 0, "can't compute twice"); branch_table[ztos] = __ pc(); // non-volatile_entry point __ lbzx(R17_tos, Rclass_or_obj, Roffset); - __ extsb(R17_tos, R17_tos); __ push(ztos); if (!is_static && rc == may_rewrite) { // use btos rewriting, no truncating to t/f bit is needed for getfield. diff --git a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp index 6162bafd1ab..190d83d09ff 100644 --- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp +++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp @@ -656,7 +656,7 @@ void VM_Version::determine_features() { a->vpmsumb(VR0, VR1, VR2); // code[11] -> vpmsumb a->tcheck(0); // code[12] -> tcheck a->mfdscr(R0); // code[13] -> mfdscr - a->lxvd2x(VSR0, 0, R3_ARG1); // code[14] -> vsx + a->lxvd2x(VSR0, R3_ARG1); // code[14] -> vsx a->blr(); // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it. diff --git a/hotspot/src/cpu/s390/vm/abstractInterpreter_s390.cpp b/hotspot/src/cpu/s390/vm/abstractInterpreter_s390.cpp new file mode 100644 index 00000000000..2140ac711a6 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/abstractInterpreter_s390.cpp @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/constMethod.hpp" +#include "oops/method.hpp" +#include "runtime/frame.inline.hpp" +#include "utilities/debug.hpp" +#include "utilities/macros.hpp" + +int AbstractInterpreter::BasicType_as_index(BasicType type) { + int i = 0; + switch (type) { + case T_BOOLEAN: i = 0; break; + case T_CHAR : i = 1; break; + case T_BYTE : i = 2; break; + case T_SHORT : i = 3; break; + case T_INT : i = 4; break; + case T_LONG : i = 5; break; + case T_VOID : i = 6; break; + case T_FLOAT : i = 7; break; + case T_DOUBLE : i = 8; break; + case T_OBJECT : i = 9; break; + case T_ARRAY : i = 9; break; + default : ShouldNotReachHere(); + } + assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, "index out of bounds"); + return i; +} + +bool AbstractInterpreter::can_be_compiled(methodHandle m) { + // No special entry points that preclude compilation. + return true; +} + +// How much stack a method top interpreter activation needs in words. +int AbstractInterpreter::size_top_interpreter_activation(Method* method) { + + // We have to size the following 2 frames: + // + // [TOP_IJAVA_FRAME_ABI] + // [ENTRY_FRAME] + // + // This expands to (see frame_s390.hpp): + // + // [TOP_IJAVA_FRAME_ABI] + // [operand stack] > stack + // [monitors] (optional) > monitors + // [IJAVA_STATE] > interpreter_state + // [PARENT_IJAVA_FRAME_ABI] + // [callee's locals w/o arguments] \ locals + // [outgoing arguments] / + // [ENTRY_FRAME_LOCALS] + + int locals = method->max_locals() * BytesPerWord; + int interpreter_state = frame::z_ijava_state_size; + + int stack = method->max_stack() * BytesPerWord; + int monitors = method->is_synchronized() ? frame::interpreter_frame_monitor_size_in_bytes() : 0; + + int total_bytes = + frame::z_top_ijava_frame_abi_size + + stack + + monitors + + interpreter_state + + frame::z_parent_ijava_frame_abi_size + + locals + + frame::z_entry_frame_locals_size; + + return (total_bytes/BytesPerWord); +} + +// Returns number of stackElementWords needed for the interpreter frame with the +// given sections. +// This overestimates the stack by one slot in case of alignments. +int AbstractInterpreter::size_activation(int max_stack, + int temps, + int extra_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame) { + // Note: This calculation must exactly parallel the frame setup + // in AbstractInterpreterGenerator::generate_method_entry. + + assert((Interpreter::stackElementSize == frame::alignment_in_bytes), "must align frame size"); + const int abi_scratch = is_top_frame ? (frame::z_top_ijava_frame_abi_size / Interpreter::stackElementSize) : + (frame::z_parent_ijava_frame_abi_size / Interpreter::stackElementSize); + + const int size = + max_stack + + (callee_locals - callee_params) + // Already counted in max_stack(). + monitors * frame::interpreter_frame_monitor_size() + + abi_scratch + + frame::z_ijava_state_size / Interpreter::stackElementSize; + + // Fixed size of an interpreter frame. + return size; +} + +// Fills a sceletal interpreter frame generated during deoptimizations. +// +// Parameters: +// +// interpreter_frame != NULL: +// set up the method, locals, and monitors. +// The frame interpreter_frame, if not NULL, is guaranteed to be the +// right size, as determined by a previous call to this method. +// It is also guaranteed to be walkable even though it is in a skeletal state +// +// is_top_frame == true: +// We're processing the *oldest* interpreter frame! +// +// pop_frame_extra_args: +// If this is != 0 we are returning to a deoptimized frame by popping +// off the callee frame. We want to re-execute the call that called the +// callee interpreted, but since the return to the interpreter would pop +// the arguments off advance the esp by dummy popframe_extra_args slots. +// Popping off those will establish the stack layout as it was before the call. +// + +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_locals_count, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + // TOP_IJAVA_FRAME: + // + // 0 [TOP_IJAVA_FRAME_ABI] -+ + // 16 [operand stack] | size + // [monitors] (optional) | + // [IJAVA_STATE] -+ + // Note: own locals are located in the caller frame. + // + // PARENT_IJAVA_FRAME: + // + // 0 [PARENT_IJAVA_FRAME_ABI] -+ + // [callee's locals w/o arguments] | + // [outgoing arguments] | size + // [used part of operand stack w/o arguments] | + // [monitors] (optional) | + // [IJAVA_STATE] -+ + // + + // Now we know our caller, calc the exact frame layout and size + // z_ijava_state->locals - i*BytesPerWord points to i-th Java local (i starts at 0). + intptr_t* locals_base = (caller->is_interpreted_frame()) + ? (caller->interpreter_frame_tos_address() + caller_actual_parameters - 1) + : (caller->sp() + method->max_locals() - 1 + + frame::z_parent_ijava_frame_abi_size / Interpreter::stackElementSize); + + intptr_t* monitor_base = (intptr_t*)((address)interpreter_frame->fp() - frame::z_ijava_state_size); + intptr_t* monitor = monitor_base - (moncount * frame::interpreter_frame_monitor_size()); + intptr_t* operand_stack_base = monitor; + intptr_t* tos = operand_stack_base - tempcount - popframe_extra_args; + intptr_t* top_frame_sp = + operand_stack_base - method->max_stack() - frame::z_top_ijava_frame_abi_size / Interpreter::stackElementSize; + intptr_t* sender_sp; + if (caller->is_interpreted_frame()) { + sender_sp = caller->interpreter_frame_top_frame_sp(); + } else if (caller->is_compiled_frame()) { + sender_sp = caller->fp() - caller->cb()->frame_size(); + // The bottom frame's sender_sp is its caller's unextended_sp. + // It was already set when its skeleton was pushed (see push_skeleton_frames()). + // Note: the unextended_sp is required by nmethod::orig_pc_addr(). + assert(is_bottom_frame && (sender_sp == caller->unextended_sp()), + "must initialize sender_sp of bottom skeleton frame when pushing it"); + } else { + assert(caller->is_entry_frame(), "is there a new frame type??"); + sender_sp = caller->sp(); // Call_stub only uses it's fp. + } + + interpreter_frame->interpreter_frame_set_method(method); + interpreter_frame->interpreter_frame_set_mirror(method->method_holder()->java_mirror()); + interpreter_frame->interpreter_frame_set_locals(locals_base); + interpreter_frame->interpreter_frame_set_monitor_end((BasicObjectLock *)monitor); + *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache(); + interpreter_frame->interpreter_frame_set_tos_address(tos); + interpreter_frame->interpreter_frame_set_sender_sp(sender_sp); + interpreter_frame->interpreter_frame_set_top_frame_sp(top_frame_sp); +} diff --git a/hotspot/src/cpu/s390/vm/assembler_s390.cpp b/hotspot/src/cpu/s390/vm/assembler_s390.cpp new file mode 100644 index 00000000000..7b9d1d4dd1d --- /dev/null +++ b/hotspot/src/cpu/s390/vm/assembler_s390.cpp @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.inline.hpp" +#include "compiler/disassembler.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "gc/shared/cardTableModRefBS.hpp" +#include "memory/resourceArea.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/macros.hpp" +#if INCLUDE_ALL_GCS +#include "gc/g1/g1CollectedHeap.inline.hpp" +#include "gc/g1/g1SATBCardTableModRefBS.hpp" +#include "gc/g1/heapRegion.hpp" +#endif + +// Convention: Use Z_R0 and Z_R1 instead of Z_scratch_* in all +// assembler_s390.* files. + +// Convert the raw encoding form into the form expected by the +// constructor for Address. This is called by adlc generated code. +Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) { + assert(scale == 0, "Scale should not be used on z/Architecture. The call to make_raw is " + "generated by adlc and this must mirror all features of Operands from machnode.hpp."); + assert(disp_reloc == relocInfo::none, "not implemented on z/Architecture."); + + Address madr(as_Register(base), as_Register(index), in_ByteSize(disp)); + return madr; +} + +int AbstractAssembler::code_fill_byte() { + return 0x00; // Illegal instruction 0x00000000. +} + +// Condition code masks. Details see enum branch_condition. +// Although this method is meant for INT CCs, the Overflow/Ordered +// bit in the masks has to be considered. The CC might have been set +// by a float operation, but is evaluated while calculating an integer +// result. See elementary test TestFloat.isNotEqual(FF)Z for example. +Assembler::branch_condition Assembler::inverse_condition(Assembler::branch_condition cc) { + Assembler::branch_condition unordered_bit = (Assembler::branch_condition)(cc & bcondNotOrdered); + Assembler::branch_condition inverse_cc; + + // Some are commented out to avoid duplicate labels. + switch (cc) { + case bcondNever : inverse_cc = bcondAlways; break; // 0 -> 15 + case bcondAlways : inverse_cc = bcondNever; break; // 15 -> 0 + + case bcondOverflow : inverse_cc = bcondNotOverflow; break; // 1 -> 14 + case bcondNotOverflow : inverse_cc = bcondOverflow; break; // 14 -> 1 + + default : + switch ((Assembler::branch_condition)(cc & bcondOrdered)) { + case bcondEqual : inverse_cc = bcondNotEqual; break; // 8 -> 6 + // case bcondZero : + // case bcondAllZero : + + case bcondNotEqual : inverse_cc = bcondEqual; break; // 6 -> 8 + // case bcondNotZero : + // case bcondMixed : + + case bcondLow : inverse_cc = bcondNotLow; break; // 4 -> 10 + // case bcondNegative : + + case bcondNotLow : inverse_cc = bcondLow; break; // 10 -> 4 + // case bcondNotNegative : + + case bcondHigh : inverse_cc = bcondNotHigh; break; // 2 -> 12 + // case bcondPositive : + + case bcondNotHigh : inverse_cc = bcondHigh; break; // 12 -> 2 + // case bcondNotPositive : + + default : + fprintf(stderr, "inverse_condition(%d)\n", (int)cc); + fflush(stderr); + ShouldNotReachHere(); + return bcondNever; + } + // If cc is even, inverse_cc must be odd. + if (!unordered_bit) { + inverse_cc = (Assembler::branch_condition)(inverse_cc | bcondNotOrdered); + } + break; + } + return inverse_cc; +} + +Assembler::branch_condition Assembler::inverse_float_condition(Assembler::branch_condition cc) { + Assembler::branch_condition inverse_cc; + + switch (cc) { + case bcondNever : inverse_cc = bcondAlways; break; // 0 + case bcondAlways : inverse_cc = bcondNever; break; // 15 + + case bcondNotOrdered : inverse_cc = bcondOrdered; break; // 14 + case bcondOrdered : inverse_cc = bcondNotOrdered; break; // 1 + + case bcondEqual : inverse_cc = (branch_condition)(bcondNotEqual + bcondNotOrdered); break; // 8 + case bcondNotEqual + bcondNotOrdered : inverse_cc = bcondEqual; break; // 7 + + case bcondLow + bcondNotOrdered : inverse_cc = (branch_condition)(bcondHigh + bcondEqual); break; // 5 + case bcondNotLow : inverse_cc = (branch_condition)(bcondLow + bcondNotOrdered); break; // 10 + + case bcondHigh : inverse_cc = (branch_condition)(bcondLow + bcondNotOrdered + bcondEqual); break; // 2 + case bcondNotHigh + bcondNotOrdered : inverse_cc = bcondHigh; break; // 13 + + default : + fprintf(stderr, "inverse_float_condition(%d)\n", (int)cc); + fflush(stderr); + ShouldNotReachHere(); + return bcondNever; + } + return inverse_cc; +} + +#ifdef ASSERT +void Assembler::print_dbg_msg(outputStream* out, unsigned long inst, const char* msg, int ilen) { + out->flush(); + switch (ilen) { + case 2: out->print_cr("inst = %4.4x, %s", (unsigned short)inst, msg); break; + case 4: out->print_cr("inst = %8.8x, %s\n", (unsigned int)inst, msg); break; + case 6: out->print_cr("inst = %12.12lx, %s\n", inst, msg); break; + default: out->print_cr("inst = %16.16lx, %s\n", inst, msg); break; + } + out->flush(); +} + +void Assembler::dump_code_range(outputStream* out, address pc, const unsigned int range, const char* msg) { + out->cr(); + out->print_cr("-------------------------------"); + out->print_cr("-- %s", msg); + out->print_cr("-------------------------------"); + out->print_cr("Hex dump of +/-%d bytes around %p, interval [%p,%p)", range, pc, pc-range, pc+range); + os::print_hex_dump(out, pc-range, pc+range, 2); + + out->cr(); + out->print_cr("Disassembly of +/-%d bytes around %p, interval [%p,%p)", range, pc, pc-range, pc+range); + Disassembler::decode(pc, pc + range, out); +} +#endif diff --git a/hotspot/src/cpu/s390/vm/assembler_s390.hpp b/hotspot/src/cpu/s390/vm/assembler_s390.hpp new file mode 100644 index 00000000000..dee38875e50 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/assembler_s390.hpp @@ -0,0 +1,2530 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_ASSEMBLER_S390_HPP +#define CPU_S390_VM_ASSEMBLER_S390_HPP + +#undef LUCY_DBG + +#define NearLabel Label + +// Immediate is an abstraction to represent the various immediate +// operands which exist on z/Architecture. Neither this class nor +// instances hereof have an own state. It consists of methods only. +class Immediate VALUE_OBJ_CLASS_SPEC { + + public: + static bool is_simm(int64_t x, unsigned int nbits) { + // nbits < 2 --> false + // nbits >= 64 --> true + assert(2 <= nbits && nbits < 64, "Don't call, use statically known result."); + const int64_t min = -(1L << (nbits-1)); + const int64_t maxplus1 = (1L << (nbits-1)); + return min <= x && x < maxplus1; + } + static bool is_simm32(int64_t x) { + return is_simm(x, 32); + } + static bool is_simm20(int64_t x) { + return is_simm(x, 20); + } + static bool is_simm16(int64_t x) { + return is_simm(x, 16); + } + static bool is_simm8(int64_t x) { + return is_simm(x, 8); + } + + // Test if x is within signed immediate range for nbits. + static bool is_uimm(int64_t x, unsigned int nbits) { + // nbits == 0 --> false + // nbits >= 64 --> true + assert(1 <= nbits && nbits < 64, "don't call, use statically known result"); + const uint64_t xu = (unsigned long)x; + const uint64_t maxplus1 = 1UL << nbits; + return xu < maxplus1; // Unsigned comparison. Negative inputs appear to be very large. + } + static bool is_uimm32(int64_t x) { + return is_uimm(x, 32); + } + static bool is_uimm16(int64_t x) { + return is_uimm(x, 16); + } + static bool is_uimm12(int64_t x) { + return is_uimm(x, 12); + } + static bool is_uimm8(int64_t x) { + return is_uimm(x, 8); + } +}; + +// Displacement is an abstraction to represent the various +// displacements which exist with addresses on z/ArchiTecture. +// Neither this class nor instances hereof have an own state. It +// consists of methods only. +class Displacement VALUE_OBJ_CLASS_SPEC { + + public: // These tests are used outside the (Macro)Assembler world, e.g. in ad-file. + + static bool is_longDisp(int64_t x) { // Fits in a 20-bit displacement field. + return Immediate::is_simm20(x); + } + static bool is_shortDisp(int64_t x) { // Fits in a 12-bit displacement field. + return Immediate::is_uimm12(x); + } + static bool is_validDisp(int64_t x) { // Is a valid displacement, regardless of length constraints. + return is_longDisp(x); + } +}; + +// RelAddr is an abstraction to represent relative addresses in the +// form they are used on z/Architecture for instructions which access +// their operand with pc-relative addresses. Neither this class nor +// instances hereof have an own state. It consists of methods only. +class RelAddr VALUE_OBJ_CLASS_SPEC { + + private: // No public use at all. Solely for (Macro)Assembler. + + static bool is_in_range_of_RelAddr(address target, address pc, bool shortForm) { + // Guard against illegal branch targets, e.g. -1. Occurrences in + // CompiledStaticCall and ad-file. Do not assert (it's a test + // function!). Just return false in case of illegal operands. + if ((((uint64_t)target) & 0x0001L) != 0) return false; + if ((((uint64_t)pc) & 0x0001L) != 0) return false; + + if (shortForm) { + return Immediate::is_simm((int64_t)(target-pc), 17); // Relative short addresses can reach +/- 2**16 bytes. + } else { + return Immediate::is_simm((int64_t)(target-pc), 33); // Relative long addresses can reach +/- 2**32 bytes. + } + } + + static bool is_in_range_of_RelAddr16(address target, address pc) { + return is_in_range_of_RelAddr(target, pc, true); + } + static bool is_in_range_of_RelAddr16(ptrdiff_t distance) { + return is_in_range_of_RelAddr((address)distance, 0, true); + } + + static bool is_in_range_of_RelAddr32(address target, address pc) { + return is_in_range_of_RelAddr(target, pc, false); + } + static bool is_in_range_of_RelAddr32(ptrdiff_t distance) { + return is_in_range_of_RelAddr((address)distance, 0, false); + } + + static int pcrel_off(address target, address pc, bool shortForm) { + assert(((uint64_t)target & 0x0001L) == 0, "target of a relative address must be aligned"); + assert(((uint64_t)pc & 0x0001L) == 0, "origin of a relative address must be aligned"); + + if ((target == NULL) || (target == pc)) { + return 0; // Yet unknown branch destination. + } else { + guarantee(is_in_range_of_RelAddr(target, pc, shortForm), "target not within reach"); + return (int)((target - pc)>>1); + } + } + + static int pcrel_off16(address target, address pc) { + return pcrel_off(target, pc, true); + } + static int pcrel_off16(ptrdiff_t distance) { + return pcrel_off((address)distance, 0, true); + } + + static int pcrel_off32(address target, address pc) { + return pcrel_off(target, pc, false); + } + static int pcrel_off32(ptrdiff_t distance) { + return pcrel_off((address)distance, 0, false); + } + + static ptrdiff_t inv_pcrel_off16(int offset) { + return ((ptrdiff_t)offset)<<1; + } + + static ptrdiff_t inv_pcrel_off32(int offset) { + return ((ptrdiff_t)offset)<<1; + } + + friend class Assembler; + friend class MacroAssembler; + friend class NativeGeneralJump; +}; + +// Address is an abstraction used to represent a memory location +// as passed to Z assembler instructions. +// +// Note: A register location is represented via a Register, not +// via an address for efficiency & simplicity reasons. +class Address VALUE_OBJ_CLASS_SPEC { + private: + Register _base; // Base register. + Register _index; // Index register + intptr_t _disp; // Constant displacement. + + public: + Address() : + _base(noreg), + _index(noreg), + _disp(0) {} + + Address(Register base, Register index, intptr_t disp = 0) : + _base(base), + _index(index), + _disp(disp) {} + + Address(Register base, intptr_t disp = 0) : + _base(base), + _index(noreg), + _disp(disp) {} + + Address(Register base, RegisterOrConstant roc, intptr_t disp = 0) : + _base(base), + _index(noreg), + _disp(disp) { + if (roc.is_constant()) _disp += roc.as_constant(); else _index = roc.as_register(); + } + +#ifdef ASSERT + // ByteSize is only a class when ASSERT is defined, otherwise it's an int. + Address(Register base, ByteSize disp) : + _base(base), + _index(noreg), + _disp(in_bytes(disp)) {} + + Address(Register base, Register index, ByteSize disp) : + _base(base), + _index(index), + _disp(in_bytes(disp)) {} +#endif + + // Aborts if disp is a register and base and index are set already. + Address plus_disp(RegisterOrConstant disp) const { + Address a = (*this); + a._disp += disp.constant_or_zero(); + if (disp.is_register()) { + if (a._index == noreg) { + a._index = disp.as_register(); + } else { + guarantee(_base == noreg, "can not encode"); a._base = disp.as_register(); + } + } + return a; + } + + // A call to this is generated by adlc for replacement variable $xxx$$Address. + static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc); + + bool is_same_address(Address a) const { + return _base == a._base && _index == a._index && _disp == a._disp; + } + + // testers + bool has_base() const { return _base != noreg; } + bool has_index() const { return _index != noreg; } + bool has_disp() const { return true; } // There is no "invalid" value. + + bool is_disp12() const { return Immediate::is_uimm12(disp()); } + bool is_disp20() const { return Immediate::is_simm20(disp()); } + bool is_RSform() { return has_base() && !has_index() && is_disp12(); } + bool is_RSYform() { return has_base() && !has_index() && is_disp20(); } + bool is_RXform() { return has_base() && has_index() && is_disp12(); } + bool is_RXEform() { return has_base() && has_index() && is_disp12(); } + bool is_RXYform() { return has_base() && has_index() && is_disp20(); } + + bool uses(Register r) { return _base == r || _index == r; }; + + // accessors + Register base() const { return _base; } + Register baseOrR0() const { assert(_base != Z_R0, ""); return _base == noreg ? Z_R0 : _base; } + Register index() const { return _index; } + Register indexOrR0() const { assert(_index != Z_R0, ""); return _index == noreg ? Z_R0 : _index; } + intptr_t disp() const { return _disp; } + // Specific version for short displacement instructions. + int disp12() const { + assert(is_disp12(), "displacement out of range for uimm12"); + return _disp; + } + // Specific version for long displacement instructions. + int disp20() const { + assert(is_disp20(), "displacement out of range for simm20"); + return _disp; + } + intptr_t value() const { return _disp; } + + friend class Assembler; +}; + +class AddressLiteral VALUE_OBJ_CLASS_SPEC { + private: + address _address; + RelocationHolder _rspec; + + RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) { + switch (rtype) { + case relocInfo::external_word_type: + return external_word_Relocation::spec(addr); + case relocInfo::internal_word_type: + return internal_word_Relocation::spec(addr); + case relocInfo::opt_virtual_call_type: + return opt_virtual_call_Relocation::spec(); + case relocInfo::static_call_type: + return static_call_Relocation::spec(); + case relocInfo::runtime_call_w_cp_type: + return runtime_call_w_cp_Relocation::spec(); + case relocInfo::none: + return RelocationHolder(); + default: + ShouldNotReachHere(); + return RelocationHolder(); + } + } + + protected: + // creation + AddressLiteral() : _address(NULL), _rspec(NULL) {} + + public: + AddressLiteral(address addr, RelocationHolder const& rspec) + : _address(addr), + _rspec(rspec) {} + + // Some constructors to avoid casting at the call site. + AddressLiteral(jobject obj, RelocationHolder const& rspec) + : _address((address) obj), + _rspec(rspec) {} + + AddressLiteral(intptr_t value, RelocationHolder const& rspec) + : _address((address) value), + _rspec(rspec) {} + + AddressLiteral(address addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + // Some constructors to avoid casting at the call site. + AddressLiteral(address* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(bool* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(const bool* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(signed char* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(int* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(intptr_t addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(intptr_t* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(oop addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(oop* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(float* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(double* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + intptr_t value() const { return (intptr_t) _address; } + + const relocInfo::relocType rtype() const { return _rspec.type(); } + const RelocationHolder& rspec() const { return _rspec; } + + RelocationHolder rspec(int offset) const { + return offset == 0 ? _rspec : _rspec.plus(offset); + } +}; + +// Convenience classes +class ExternalAddress: public AddressLiteral { + private: + static relocInfo::relocType reloc_for_target(address target) { + // Sometimes ExternalAddress is used for values which aren't + // exactly addresses, like the card table base. + // External_word_type can't be used for values in the first page + // so just skip the reloc in that case. + return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none; + } + + public: + ExternalAddress(address target) : AddressLiteral(target, reloc_for_target( target)) {} + ExternalAddress(oop* target) : AddressLiteral(target, reloc_for_target((address) target)) {} +}; + +// Argument is an abstraction used to represent an outgoing actual +// argument or an incoming formal parameter, whether it resides in +// memory or in a register, in a manner consistent with the +// z/Architecture Application Binary Interface, or ABI. This is often +// referred to as the native or C calling convention. +class Argument VALUE_OBJ_CLASS_SPEC { + private: + int _number; + bool _is_in; + + public: + enum { + // Only 5 registers may contain integer parameters. + n_register_parameters = 5, + // Can have up to 4 floating registers. + n_float_register_parameters = 4 + }; + + // creation + Argument(int number, bool is_in) : _number(number), _is_in(is_in) {} + Argument(int number) : _number(number) {} + + int number() const { return _number; } + + Argument successor() const { return Argument(number() + 1); } + + // Locating register-based arguments: + bool is_register() const { return _number < n_register_parameters; } + + // Locating Floating Point register-based arguments: + bool is_float_register() const { return _number < n_float_register_parameters; } + + FloatRegister as_float_register() const { + assert(is_float_register(), "must be a register argument"); + return as_FloatRegister((number() *2) + 1); + } + + FloatRegister as_double_register() const { + assert(is_float_register(), "must be a register argument"); + return as_FloatRegister((number() *2)); + } + + Register as_register() const { + assert(is_register(), "must be a register argument"); + return as_Register(number() + Z_ARG1->encoding()); + } + + // debugging + const char* name() const; + + friend class Assembler; +}; + + +// The z/Architecture Assembler: Pure assembler doing NO optimizations +// on the instruction level; i.e., what you write is what you get. The +// Assembler is generating code into a CodeBuffer. +class Assembler : public AbstractAssembler { + protected: + + friend class AbstractAssembler; + friend class AddressLiteral; + + // Code patchers need various routines like inv_wdisp(). + friend class NativeInstruction; +#ifndef COMPILER2 + friend class NativeGeneralJump; +#endif + friend class Relocation; + + public: + +// Addressing + +// address calculation +#define LA_ZOPC (unsigned int)(0x41 << 24) +#define LAY_ZOPC (unsigned long)(0xe3L << 40 | 0x71L) +#define LARL_ZOPC (unsigned long)(0xc0L << 40 | 0x00L << 32) + + +// Data Transfer + +// register to register transfer +#define LR_ZOPC (unsigned int)(24 << 8) +#define LBR_ZOPC (unsigned int)(0xb926 << 16) +#define LHR_ZOPC (unsigned int)(0xb927 << 16) +#define LGBR_ZOPC (unsigned int)(0xb906 << 16) +#define LGHR_ZOPC (unsigned int)(0xb907 << 16) +#define LGFR_ZOPC (unsigned int)(0xb914 << 16) +#define LGR_ZOPC (unsigned int)(0xb904 << 16) + +#define LLHR_ZOPC (unsigned int)(0xb995 << 16) +#define LLGCR_ZOPC (unsigned int)(0xb984 << 16) +#define LLGHR_ZOPC (unsigned int)(0xb985 << 16) +#define LLGTR_ZOPC (unsigned int)(185 << 24 | 23 << 16) +#define LLGFR_ZOPC (unsigned int)(185 << 24 | 22 << 16) + +#define LTR_ZOPC (unsigned int)(18 << 8) +#define LTGFR_ZOPC (unsigned int)(185 << 24 | 18 << 16) +#define LTGR_ZOPC (unsigned int)(185 << 24 | 2 << 16) + +#define LER_ZOPC (unsigned int)(56 << 8) +#define LEDBR_ZOPC (unsigned int)(179 << 24 | 68 << 16) +#define LEXBR_ZOPC (unsigned int)(179 << 24 | 70 << 16) +#define LDEBR_ZOPC (unsigned int)(179 << 24 | 4 << 16) +#define LDR_ZOPC (unsigned int)(40 << 8) +#define LDXBR_ZOPC (unsigned int)(179 << 24 | 69 << 16) +#define LXEBR_ZOPC (unsigned int)(179 << 24 | 6 << 16) +#define LXDBR_ZOPC (unsigned int)(179 << 24 | 5 << 16) +#define LXR_ZOPC (unsigned int)(179 << 24 | 101 << 16) +#define LTEBR_ZOPC (unsigned int)(179 << 24 | 2 << 16) +#define LTDBR_ZOPC (unsigned int)(179 << 24 | 18 << 16) +#define LTXBR_ZOPC (unsigned int)(179 << 24 | 66 << 16) + +#define LRVR_ZOPC (unsigned int)(0xb91f << 16) +#define LRVGR_ZOPC (unsigned int)(0xb90f << 16) + +#define LDGR_ZOPC (unsigned int)(0xb3c1 << 16) // z10 +#define LGDR_ZOPC (unsigned int)(0xb3cd << 16) // z10 + +#define LOCR_ZOPC (unsigned int)(0xb9f2 << 16) // z196 +#define LOCGR_ZOPC (unsigned int)(0xb9e2 << 16) // z196 + +// immediate to register transfer +#define IIHH_ZOPC (unsigned int)(165 << 24) +#define IIHL_ZOPC (unsigned int)(165 << 24 | 1 << 16) +#define IILH_ZOPC (unsigned int)(165 << 24 | 2 << 16) +#define IILL_ZOPC (unsigned int)(165 << 24 | 3 << 16) +#define IIHF_ZOPC (unsigned long)(0xc0L << 40 | 8L << 32) +#define IILF_ZOPC (unsigned long)(0xc0L << 40 | 9L << 32) +#define LLIHH_ZOPC (unsigned int)(165 << 24 | 12 << 16) +#define LLIHL_ZOPC (unsigned int)(165 << 24 | 13 << 16) +#define LLILH_ZOPC (unsigned int)(165 << 24 | 14 << 16) +#define LLILL_ZOPC (unsigned int)(165 << 24 | 15 << 16) +#define LLIHF_ZOPC (unsigned long)(0xc0L << 40 | 14L << 32) +#define LLILF_ZOPC (unsigned long)(0xc0L << 40 | 15L << 32) +#define LHI_ZOPC (unsigned int)(167 << 24 | 8 << 16) +#define LGHI_ZOPC (unsigned int)(167 << 24 | 9 << 16) +#define LGFI_ZOPC (unsigned long)(0xc0L << 40 | 1L << 32) + +#define LZER_ZOPC (unsigned int)(0xb374 << 16) +#define LZDR_ZOPC (unsigned int)(0xb375 << 16) + +// LOAD: memory to register transfer +#define LB_ZOPC (unsigned long)(227L << 40 | 118L) +#define LH_ZOPC (unsigned int)(72 << 24) +#define LHY_ZOPC (unsigned long)(227L << 40 | 120L) +#define L_ZOPC (unsigned int)(88 << 24) +#define LY_ZOPC (unsigned long)(227L << 40 | 88L) +#define LT_ZOPC (unsigned long)(0xe3L << 40 | 0x12L) +#define LGB_ZOPC (unsigned long)(227L << 40 | 119L) +#define LGH_ZOPC (unsigned long)(227L << 40 | 21L) +#define LGF_ZOPC (unsigned long)(227L << 40 | 20L) +#define LG_ZOPC (unsigned long)(227L << 40 | 4L) +#define LTG_ZOPC (unsigned long)(0xe3L << 40 | 0x02L) +#define LTGF_ZOPC (unsigned long)(0xe3L << 40 | 0x32L) + +#define LLC_ZOPC (unsigned long)(0xe3L << 40 | 0x94L) +#define LLH_ZOPC (unsigned long)(0xe3L << 40 | 0x95L) +#define LLGT_ZOPC (unsigned long)(227L << 40 | 23L) +#define LLGC_ZOPC (unsigned long)(227L << 40 | 144L) +#define LLGH_ZOPC (unsigned long)(227L << 40 | 145L) +#define LLGF_ZOPC (unsigned long)(227L << 40 | 22L) + +#define IC_ZOPC (unsigned int)(0x43 << 24) +#define ICY_ZOPC (unsigned long)(0xe3L << 40 | 0x73L) +#define ICM_ZOPC (unsigned int)(0xbf << 24) +#define ICMY_ZOPC (unsigned long)(0xebL << 40 | 0x81L) +#define ICMH_ZOPC (unsigned long)(0xebL << 40 | 0x80L) + +#define LRVH_ZOPC (unsigned long)(0xe3L << 40 | 0x1fL) +#define LRV_ZOPC (unsigned long)(0xe3L << 40 | 0x1eL) +#define LRVG_ZOPC (unsigned long)(0xe3L << 40 | 0x0fL) + + +// LOAD relative: memory to register transfer +#define LHRL_ZOPC (unsigned long)(0xc4L << 40 | 0x05L << 32) // z10 +#define LRL_ZOPC (unsigned long)(0xc4L << 40 | 0x0dL << 32) // z10 +#define LGHRL_ZOPC (unsigned long)(0xc4L << 40 | 0x04L << 32) // z10 +#define LGFRL_ZOPC (unsigned long)(0xc4L << 40 | 0x0cL << 32) // z10 +#define LGRL_ZOPC (unsigned long)(0xc4L << 40 | 0x08L << 32) // z10 + +#define LLHRL_ZOPC (unsigned long)(0xc4L << 40 | 0x02L << 32) // z10 +#define LLGHRL_ZOPC (unsigned long)(0xc4L << 40 | 0x06L << 32) // z10 +#define LLGFRL_ZOPC (unsigned long)(0xc4L << 40 | 0x0eL << 32) // z10 + +#define LOC_ZOPC (unsigned long)(0xebL << 40 | 0xf2L) // z196 +#define LOCG_ZOPC (unsigned long)(0xebL << 40 | 0xe2L) // z196 + +#define LMG_ZOPC (unsigned long)(235L << 40 | 4L) + +#define LE_ZOPC (unsigned int)(0x78 << 24) +#define LEY_ZOPC (unsigned long)(237L << 40 | 100L) +#define LDEB_ZOPC (unsigned long)(237L << 40 | 4) +#define LD_ZOPC (unsigned int)(0x68 << 24) +#define LDY_ZOPC (unsigned long)(237L << 40 | 101L) +#define LXEB_ZOPC (unsigned long)(237L << 40 | 6) +#define LXDB_ZOPC (unsigned long)(237L << 40 | 5) + +// STORE: register to memory transfer +#define STC_ZOPC (unsigned int)(0x42 << 24) +#define STCY_ZOPC (unsigned long)(227L << 40 | 114L) +#define STH_ZOPC (unsigned int)(64 << 24) +#define STHY_ZOPC (unsigned long)(227L << 40 | 112L) +#define ST_ZOPC (unsigned int)(80 << 24) +#define STY_ZOPC (unsigned long)(227L << 40 | 80L) +#define STG_ZOPC (unsigned long)(227L << 40 | 36L) + +#define STCM_ZOPC (unsigned long)(0xbeL << 24) +#define STCMY_ZOPC (unsigned long)(0xebL << 40 | 0x2dL) +#define STCMH_ZOPC (unsigned long)(0xebL << 40 | 0x2cL) + +// STORE relative: memory to register transfer +#define STHRL_ZOPC (unsigned long)(0xc4L << 40 | 0x07L << 32) // z10 +#define STRL_ZOPC (unsigned long)(0xc4L << 40 | 0x0fL << 32) // z10 +#define STGRL_ZOPC (unsigned long)(0xc4L << 40 | 0x0bL << 32) // z10 + +#define STOC_ZOPC (unsigned long)(0xebL << 40 | 0xf3L) // z196 +#define STOCG_ZOPC (unsigned long)(0xebL << 40 | 0xe3L) // z196 + +#define STMG_ZOPC (unsigned long)(235L << 40 | 36L) + +#define STE_ZOPC (unsigned int)(0x70 << 24) +#define STEY_ZOPC (unsigned long)(237L << 40 | 102L) +#define STD_ZOPC (unsigned int)(0x60 << 24) +#define STDY_ZOPC (unsigned long)(237L << 40 | 103L) + +// MOVE: immediate to memory transfer +#define MVHHI_ZOPC (unsigned long)(0xe5L << 40 | 0x44L << 32) // z10 +#define MVHI_ZOPC (unsigned long)(0xe5L << 40 | 0x4cL << 32) // z10 +#define MVGHI_ZOPC (unsigned long)(0xe5L << 40 | 0x48L << 32) // z10 + + +// ALU operations + +// Load Positive +#define LPR_ZOPC (unsigned int)(16 << 8) +#define LPGFR_ZOPC (unsigned int)(185 << 24 | 16 << 16) +#define LPGR_ZOPC (unsigned int)(185 << 24) +#define LPEBR_ZOPC (unsigned int)(179 << 24) +#define LPDBR_ZOPC (unsigned int)(179 << 24 | 16 << 16) +#define LPXBR_ZOPC (unsigned int)(179 << 24 | 64 << 16) + +// Load Negative +#define LNR_ZOPC (unsigned int)(17 << 8) +#define LNGFR_ZOPC (unsigned int)(185 << 24 | 17 << 16) +#define LNGR_ZOPC (unsigned int)(185 << 24 | 1 << 16) +#define LNEBR_ZOPC (unsigned int)(179 << 24 | 1 << 16) +#define LNDBR_ZOPC (unsigned int)(179 << 24 | 17 << 16) +#define LNXBR_ZOPC (unsigned int)(179 << 24 | 65 << 16) + +// Load Complement +#define LCR_ZOPC (unsigned int)(19 << 8) +#define LCGFR_ZOPC (unsigned int)(185 << 24 | 19 << 16) +#define LCGR_ZOPC (unsigned int)(185 << 24 | 3 << 16) +#define LCEBR_ZOPC (unsigned int)(179 << 24 | 3 << 16) +#define LCDBR_ZOPC (unsigned int)(179 << 24 | 19 << 16) +#define LCXBR_ZOPC (unsigned int)(179 << 24 | 67 << 16) + +// Add +// RR, signed +#define AR_ZOPC (unsigned int)(26 << 8) +#define AGFR_ZOPC (unsigned int)(0xb9 << 24 | 0x18 << 16) +#define AGR_ZOPC (unsigned int)(0xb9 << 24 | 0x08 << 16) +// RRF, signed +#define ARK_ZOPC (unsigned int)(0xb9 << 24 | 0x00f8 << 16) +#define AGRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00e8 << 16) +// RI, signed +#define AHI_ZOPC (unsigned int)(167 << 24 | 10 << 16) +#define AFI_ZOPC (unsigned long)(0xc2L << 40 | 9L << 32) +#define AGHI_ZOPC (unsigned int)(167 << 24 | 11 << 16) +#define AGFI_ZOPC (unsigned long)(0xc2L << 40 | 8L << 32) +// RIE, signed +#define AHIK_ZOPC (unsigned long)(0xecL << 40 | 0x00d8L) +#define AGHIK_ZOPC (unsigned long)(0xecL << 40 | 0x00d9L) +#define AIH_ZOPC (unsigned long)(0xccL << 40 | 0x08L << 32) +// RM, signed +#define AHY_ZOPC (unsigned long)(227L << 40 | 122L) +#define A_ZOPC (unsigned int)(90 << 24) +#define AY_ZOPC (unsigned long)(227L << 40 | 90L) +#define AGF_ZOPC (unsigned long)(227L << 40 | 24L) +#define AG_ZOPC (unsigned long)(227L << 40 | 8L) +// In-memory arithmetic (add signed, add logical with signed immediate). +// MI, signed +#define ASI_ZOPC (unsigned long)(0xebL << 40 | 0x6aL) +#define AGSI_ZOPC (unsigned long)(0xebL << 40 | 0x7aL) + +// RR, Logical +#define ALR_ZOPC (unsigned int)(30 << 8) +#define ALGFR_ZOPC (unsigned int)(185 << 24 | 26 << 16) +#define ALGR_ZOPC (unsigned int)(185 << 24 | 10 << 16) +#define ALCGR_ZOPC (unsigned int)(185 << 24 | 136 << 16) +// RRF, Logical +#define ALRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00fa << 16) +#define ALGRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00ea << 16) +// RI, Logical +#define ALFI_ZOPC (unsigned long)(0xc2L << 40 | 0x0bL << 32) +#define ALGFI_ZOPC (unsigned long)(0xc2L << 40 | 0x0aL << 32) +// RIE, Logical +#define ALHSIK_ZOPC (unsigned long)(0xecL << 40 | 0x00daL) +#define ALGHSIK_ZOPC (unsigned long)(0xecL << 40 | 0x00dbL) +// RM, Logical +#define AL_ZOPC (unsigned int)(0x5e << 24) +#define ALY_ZOPC (unsigned long)(227L << 40 | 94L) +#define ALGF_ZOPC (unsigned long)(227L << 40 | 26L) +#define ALG_ZOPC (unsigned long)(227L << 40 | 10L) +// In-memory arithmetic (add signed, add logical with signed immediate). +// MI, Logical +#define ALSI_ZOPC (unsigned long)(0xebL << 40 | 0x6eL) +#define ALGSI_ZOPC (unsigned long)(0xebL << 40 | 0x7eL) + +// RR, BFP +#define AEBR_ZOPC (unsigned int)(179 << 24 | 10 << 16) +#define ADBR_ZOPC (unsigned int)(179 << 24 | 26 << 16) +#define AXBR_ZOPC (unsigned int)(179 << 24 | 74 << 16) +// RM, BFP +#define AEB_ZOPC (unsigned long)(237L << 40 | 10) +#define ADB_ZOPC (unsigned long)(237L << 40 | 26) + +// Subtract +// RR, signed +#define SR_ZOPC (unsigned int)(27 << 8) +#define SGFR_ZOPC (unsigned int)(185 << 24 | 25 << 16) +#define SGR_ZOPC (unsigned int)(185 << 24 | 9 << 16) +// RRF, signed +#define SRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00f9 << 16) +#define SGRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00e9 << 16) +// RM, signed +#define SH_ZOPC (unsigned int)(0x4b << 24) +#define SHY_ZOPC (unsigned long)(227L << 40 | 123L) +#define S_ZOPC (unsigned int)(0x5B << 24) +#define SY_ZOPC (unsigned long)(227L << 40 | 91L) +#define SGF_ZOPC (unsigned long)(227L << 40 | 25) +#define SG_ZOPC (unsigned long)(227L << 40 | 9) +// RR, Logical +#define SLR_ZOPC (unsigned int)(31 << 8) +#define SLGFR_ZOPC (unsigned int)(185 << 24 | 27 << 16) +#define SLGR_ZOPC (unsigned int)(185 << 24 | 11 << 16) +// RIL, Logical +#define SLFI_ZOPC (unsigned long)(0xc2L << 40 | 0x05L << 32) +#define SLGFI_ZOPC (unsigned long)(0xc2L << 40 | 0x04L << 32) +// RRF, Logical +#define SLRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00fb << 16) +#define SLGRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00eb << 16) +// RM, Logical +#define SLY_ZOPC (unsigned long)(227L << 40 | 95L) +#define SLGF_ZOPC (unsigned long)(227L << 40 | 27L) +#define SLG_ZOPC (unsigned long)(227L << 40 | 11L) + +// RR, BFP +#define SEBR_ZOPC (unsigned int)(179 << 24 | 11 << 16) +#define SDBR_ZOPC (unsigned int)(179 << 24 | 27 << 16) +#define SXBR_ZOPC (unsigned int)(179 << 24 | 75 << 16) +// RM, BFP +#define SEB_ZOPC (unsigned long)(237L << 40 | 11) +#define SDB_ZOPC (unsigned long)(237L << 40 | 27) + +// Multiply +// RR, signed +#define MR_ZOPC (unsigned int)(28 << 8) +#define MSR_ZOPC (unsigned int)(178 << 24 | 82 << 16) +#define MSGFR_ZOPC (unsigned int)(185 << 24 | 28 << 16) +#define MSGR_ZOPC (unsigned int)(185 << 24 | 12 << 16) +// RI, signed +#define MHI_ZOPC (unsigned int)(167 << 24 | 12 << 16) +#define MGHI_ZOPC (unsigned int)(167 << 24 | 13 << 16) +#define MSFI_ZOPC (unsigned long)(0xc2L << 40 | 0x01L << 32) // z10 +#define MSGFI_ZOPC (unsigned long)(0xc2L << 40 | 0x00L << 32) // z10 +// RM, signed +#define M_ZOPC (unsigned int)(92 << 24) +#define MS_ZOPC (unsigned int)(0x71 << 24) +#define MHY_ZOPC (unsigned long)(0xe3L<< 40 | 0x7cL) +#define MSY_ZOPC (unsigned long)(227L << 40 | 81L) +#define MSGF_ZOPC (unsigned long)(227L << 40 | 28L) +#define MSG_ZOPC (unsigned long)(227L << 40 | 12L) +// RR, unsigned +#define MLR_ZOPC (unsigned int)(185 << 24 | 150 << 16) +#define MLGR_ZOPC (unsigned int)(185 << 24 | 134 << 16) +// RM, unsigned +#define ML_ZOPC (unsigned long)(227L << 40 | 150L) +#define MLG_ZOPC (unsigned long)(227L << 40 | 134L) + +// RR, BFP +#define MEEBR_ZOPC (unsigned int)(179 << 24 | 23 << 16) +#define MDEBR_ZOPC (unsigned int)(179 << 24 | 12 << 16) +#define MDBR_ZOPC (unsigned int)(179 << 24 | 28 << 16) +#define MXDBR_ZOPC (unsigned int)(179 << 24 | 7 << 16) +#define MXBR_ZOPC (unsigned int)(179 << 24 | 76 << 16) +// RM, BFP +#define MEEB_ZOPC (unsigned long)(237L << 40 | 23) +#define MDEB_ZOPC (unsigned long)(237L << 40 | 12) +#define MDB_ZOPC (unsigned long)(237L << 40 | 28) +#define MXDB_ZOPC (unsigned long)(237L << 40 | 7) + +// Divide +// RR, signed +#define DSGFR_ZOPC (unsigned int)(0xb91d << 16) +#define DSGR_ZOPC (unsigned int)(0xb90d << 16) +// RM, signed +#define D_ZOPC (unsigned int)(93 << 24) +#define DSGF_ZOPC (unsigned long)(227L << 40 | 29L) +#define DSG_ZOPC (unsigned long)(227L << 40 | 13L) +// RR, unsigned +#define DLR_ZOPC (unsigned int)(185 << 24 | 151 << 16) +#define DLGR_ZOPC (unsigned int)(185 << 24 | 135 << 16) +// RM, unsigned +#define DL_ZOPC (unsigned long)(227L << 40 | 151L) +#define DLG_ZOPC (unsigned long)(227L << 40 | 135L) + +// RR, BFP +#define DEBR_ZOPC (unsigned int)(179 << 24 | 13 << 16) +#define DDBR_ZOPC (unsigned int)(179 << 24 | 29 << 16) +#define DXBR_ZOPC (unsigned int)(179 << 24 | 77 << 16) +// RM, BFP +#define DEB_ZOPC (unsigned long)(237L << 40 | 13) +#define DDB_ZOPC (unsigned long)(237L << 40 | 29) + +// Square Root +// RR, BFP +#define SQEBR_ZOPC (unsigned int)(0xb314 << 16) +#define SQDBR_ZOPC (unsigned int)(0xb315 << 16) +#define SQXBR_ZOPC (unsigned int)(0xb316 << 16) +// RM, BFP +#define SQEB_ZOPC (unsigned long)(237L << 40 | 20) +#define SQDB_ZOPC (unsigned long)(237L << 40 | 21) + +// Compare and Test +// RR, signed +#define CR_ZOPC (unsigned int)(25 << 8) +#define CGFR_ZOPC (unsigned int)(185 << 24 | 48 << 16) +#define CGR_ZOPC (unsigned int)(185 << 24 | 32 << 16) +// RI, signed +#define CHI_ZOPC (unsigned int)(167 << 24 | 14 << 16) +#define CFI_ZOPC (unsigned long)(0xc2L << 40 | 0xdL << 32) +#define CGHI_ZOPC (unsigned int)(167 << 24 | 15 << 16) +#define CGFI_ZOPC (unsigned long)(0xc2L << 40 | 0xcL << 32) +// RM, signed +#define CH_ZOPC (unsigned int)(0x49 << 24) +#define CHY_ZOPC (unsigned long)(227L << 40 | 121L) +#define C_ZOPC (unsigned int)(0x59 << 24) +#define CY_ZOPC (unsigned long)(227L << 40 | 89L) +#define CGF_ZOPC (unsigned long)(227L << 40 | 48L) +#define CG_ZOPC (unsigned long)(227L << 40 | 32L) +// RR, unsigned +#define CLR_ZOPC (unsigned int)(21 << 8) +#define CLGFR_ZOPC (unsigned int)(185 << 24 | 49 << 16) +#define CLGR_ZOPC (unsigned int)(185 << 24 | 33 << 16) +// RIL, unsigned +#define CLFI_ZOPC (unsigned long)(0xc2L << 40 | 0xfL << 32) +#define CLGFI_ZOPC (unsigned long)(0xc2L << 40 | 0xeL << 32) +// RM, unsigned +#define CL_ZOPC (unsigned int)(0x55 << 24) +#define CLY_ZOPC (unsigned long)(227L << 40 | 85L) +#define CLGF_ZOPC (unsigned long)(227L << 40 | 49L) +#define CLG_ZOPC (unsigned long)(227L << 40 | 33L) +// RI, unsigned +#define TMHH_ZOPC (unsigned int)(167 << 24 | 2 << 16) +#define TMHL_ZOPC (unsigned int)(167 << 24 | 3 << 16) +#define TMLH_ZOPC (unsigned int)(167 << 24) +#define TMLL_ZOPC (unsigned int)(167 << 24 | 1 << 16) + +// RR, BFP +#define CEBR_ZOPC (unsigned int)(179 << 24 | 9 << 16) +#define CDBR_ZOPC (unsigned int)(179 << 24 | 25 << 16) +#define CXBR_ZOPC (unsigned int)(179 << 24 | 73 << 16) +// RM, BFP +#define CEB_ZOPC (unsigned long)(237L << 40 | 9) +#define CDB_ZOPC (unsigned long)(237L << 40 | 25) + +// Shift +// arithmetic +#define SLA_ZOPC (unsigned int)(139 << 24) +#define SLAG_ZOPC (unsigned long)(235L << 40 | 11L) +#define SRA_ZOPC (unsigned int)(138 << 24) +#define SRAG_ZOPC (unsigned long)(235L << 40 | 10L) +// logical +#define SLL_ZOPC (unsigned int)(137 << 24) +#define SLLG_ZOPC (unsigned long)(235L << 40 | 13L) +#define SRL_ZOPC (unsigned int)(136 << 24) +#define SRLG_ZOPC (unsigned long)(235L << 40 | 12L) + +// Rotate, then AND/XOR/OR/insert +// rotate +#define RLL_ZOPC (unsigned long)(0xebL << 40 | 0x1dL) // z10 +#define RLLG_ZOPC (unsigned long)(0xebL << 40 | 0x1cL) // z10 +// rotate and {AND|XOR|OR|INS} +#define RNSBG_ZOPC (unsigned long)(0xecL << 40 | 0x54L) // z196 +#define RXSBG_ZOPC (unsigned long)(0xecL << 40 | 0x57L) // z196 +#define ROSBG_ZOPC (unsigned long)(0xecL << 40 | 0x56L) // z196 +#define RISBG_ZOPC (unsigned long)(0xecL << 40 | 0x55L) // z196 + +// AND +// RR, signed +#define NR_ZOPC (unsigned int)(20 << 8) +#define NGR_ZOPC (unsigned int)(185 << 24 | 128 << 16) +// RRF, signed +#define NRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00f4 << 16) +#define NGRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00e4 << 16) +// RI, signed +#define NIHH_ZOPC (unsigned int)(165 << 24 | 4 << 16) +#define NIHL_ZOPC (unsigned int)(165 << 24 | 5 << 16) +#define NILH_ZOPC (unsigned int)(165 << 24 | 6 << 16) +#define NILL_ZOPC (unsigned int)(165 << 24 | 7 << 16) +#define NIHF_ZOPC (unsigned long)(0xc0L << 40 | 10L << 32) +#define NILF_ZOPC (unsigned long)(0xc0L << 40 | 11L << 32) +// RM, signed +#define N_ZOPC (unsigned int)(0x54 << 24) +#define NY_ZOPC (unsigned long)(227L << 40 | 84L) +#define NG_ZOPC (unsigned long)(227L << 40 | 128L) + +// OR +// RR, signed +#define OR_ZOPC (unsigned int)(22 << 8) +#define OGR_ZOPC (unsigned int)(185 << 24 | 129 << 16) +// RRF, signed +#define ORK_ZOPC (unsigned int)(0xb9 << 24 | 0x00f6 << 16) +#define OGRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00e6 << 16) +// RI, signed +#define OIHH_ZOPC (unsigned int)(165 << 24 | 8 << 16) +#define OIHL_ZOPC (unsigned int)(165 << 24 | 9 << 16) +#define OILH_ZOPC (unsigned int)(165 << 24 | 10 << 16) +#define OILL_ZOPC (unsigned int)(165 << 24 | 11 << 16) +#define OIHF_ZOPC (unsigned long)(0xc0L << 40 | 12L << 32) +#define OILF_ZOPC (unsigned long)(0xc0L << 40 | 13L << 32) +// RM, signed +#define O_ZOPC (unsigned int)(0x56 << 24) +#define OY_ZOPC (unsigned long)(227L << 40 | 86L) +#define OG_ZOPC (unsigned long)(227L << 40 | 129L) + +// XOR +// RR, signed +#define XR_ZOPC (unsigned int)(23 << 8) +#define XGR_ZOPC (unsigned int)(185 << 24 | 130 << 16) +// RRF, signed +#define XRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00f7 << 16) +#define XGRK_ZOPC (unsigned int)(0xb9 << 24 | 0x00e7 << 16) +// RI, signed +#define XIHF_ZOPC (unsigned long)(0xc0L << 40 | 6L << 32) +#define XILF_ZOPC (unsigned long)(0xc0L << 40 | 7L << 32) +// RM, signed +#define X_ZOPC (unsigned int)(0x57 << 24) +#define XY_ZOPC (unsigned long)(227L << 40 | 87L) +#define XG_ZOPC (unsigned long)(227L << 40 | 130L) + + +// Data Conversion + +// INT to BFP +#define CEFBR_ZOPC (unsigned int)(179 << 24 | 148 << 16) +#define CDFBR_ZOPC (unsigned int)(179 << 24 | 149 << 16) +#define CXFBR_ZOPC (unsigned int)(179 << 24 | 150 << 16) +#define CEGBR_ZOPC (unsigned int)(179 << 24 | 164 << 16) +#define CDGBR_ZOPC (unsigned int)(179 << 24 | 165 << 16) +#define CXGBR_ZOPC (unsigned int)(179 << 24 | 166 << 16) +// BFP to INT +#define CFEBR_ZOPC (unsigned int)(179 << 24 | 152 << 16) +#define CFDBR_ZOPC (unsigned int)(179 << 24 | 153 << 16) +#define CFXBR_ZOPC (unsigned int)(179 << 24 | 154 << 16) +#define CGEBR_ZOPC (unsigned int)(179 << 24 | 168 << 16) +#define CGDBR_ZOPC (unsigned int)(179 << 24 | 169 << 16) +#define CGXBR_ZOPC (unsigned int)(179 << 24 | 170 << 16) +// INT to DEC +#define CVD_ZOPC (unsigned int)(0x4e << 24) +#define CVDY_ZOPC (unsigned long)(0xe3L << 40 | 0x26L) +#define CVDG_ZOPC (unsigned long)(0xe3L << 40 | 0x2eL) + + +// BFP Control + +#define SRNM_ZOPC (unsigned int)(178 << 24 | 153 << 16) +#define EFPC_ZOPC (unsigned int)(179 << 24 | 140 << 16) +#define SFPC_ZOPC (unsigned int)(179 << 24 | 132 << 16) +#define STFPC_ZOPC (unsigned int)(178 << 24 | 156 << 16) +#define LFPC_ZOPC (unsigned int)(178 << 24 | 157 << 16) + + +// Branch Instructions + +// Register +#define BCR_ZOPC (unsigned int)(7 << 8) +#define BALR_ZOPC (unsigned int)(5 << 8) +#define BASR_ZOPC (unsigned int)(13 << 8) +#define BCTGR_ZOPC (unsigned long)(0xb946 << 16) +// Absolute +#define BC_ZOPC (unsigned int)(71 << 24) +#define BAL_ZOPC (unsigned int)(69 << 24) +#define BAS_ZOPC (unsigned int)(77 << 24) +#define BXH_ZOPC (unsigned int)(134 << 24) +#define BXHG_ZOPC (unsigned long)(235L << 40 | 68) +// Relative +#define BRC_ZOPC (unsigned int)(167 << 24 | 4 << 16) +#define BRCL_ZOPC (unsigned long)(192L << 40 | 4L << 32) +#define BRAS_ZOPC (unsigned int)(167 << 24 | 5 << 16) +#define BRASL_ZOPC (unsigned long)(192L << 40 | 5L << 32) +#define BRCT_ZOPC (unsigned int)(167 << 24 | 6 << 16) +#define BRCTG_ZOPC (unsigned int)(167 << 24 | 7 << 16) +#define BRXH_ZOPC (unsigned int)(132 << 24) +#define BRXHG_ZOPC (unsigned long)(236L << 40 | 68) +#define BRXLE_ZOPC (unsigned int)(133 << 24) +#define BRXLG_ZOPC (unsigned long)(236L << 40 | 69) + + +// Compare and Branch Instructions + +// signed comp reg/reg, branch Absolute +#define CRB_ZOPC (unsigned long)(0xecL << 40 | 0xf6L) // z10 +#define CGRB_ZOPC (unsigned long)(0xecL << 40 | 0xe4L) // z10 +// signed comp reg/reg, branch Relative +#define CRJ_ZOPC (unsigned long)(0xecL << 40 | 0x76L) // z10 +#define CGRJ_ZOPC (unsigned long)(0xecL << 40 | 0x64L) // z10 +// signed comp reg/imm, branch absolute +#define CIB_ZOPC (unsigned long)(0xecL << 40 | 0xfeL) // z10 +#define CGIB_ZOPC (unsigned long)(0xecL << 40 | 0xfcL) // z10 +// signed comp reg/imm, branch relative +#define CIJ_ZOPC (unsigned long)(0xecL << 40 | 0x7eL) // z10 +#define CGIJ_ZOPC (unsigned long)(0xecL << 40 | 0x7cL) // z10 + +// unsigned comp reg/reg, branch Absolute +#define CLRB_ZOPC (unsigned long)(0xecL << 40 | 0xf7L) // z10 +#define CLGRB_ZOPC (unsigned long)(0xecL << 40 | 0xe5L) // z10 +// unsigned comp reg/reg, branch Relative +#define CLRJ_ZOPC (unsigned long)(0xecL << 40 | 0x77L) // z10 +#define CLGRJ_ZOPC (unsigned long)(0xecL << 40 | 0x65L) // z10 +// unsigned comp reg/imm, branch absolute +#define CLIB_ZOPC (unsigned long)(0xecL << 40 | 0xffL) // z10 +#define CLGIB_ZOPC (unsigned long)(0xecL << 40 | 0xfdL) // z10 +// unsigned comp reg/imm, branch relative +#define CLIJ_ZOPC (unsigned long)(0xecL << 40 | 0x7fL) // z10 +#define CLGIJ_ZOPC (unsigned long)(0xecL << 40 | 0x7dL) // z10 + +// comp reg/reg, trap +#define CRT_ZOPC (unsigned int)(0xb972 << 16) // z10 +#define CGRT_ZOPC (unsigned int)(0xb960 << 16) // z10 +#define CLRT_ZOPC (unsigned int)(0xb973 << 16) // z10 +#define CLGRT_ZOPC (unsigned int)(0xb961 << 16) // z10 +// comp reg/imm, trap +#define CIT_ZOPC (unsigned long)(0xecL << 40 | 0x72L) // z10 +#define CGIT_ZOPC (unsigned long)(0xecL << 40 | 0x70L) // z10 +#define CLFIT_ZOPC (unsigned long)(0xecL << 40 | 0x73L) // z10 +#define CLGIT_ZOPC (unsigned long)(0xecL << 40 | 0x71L) // z10 + + +// Direct Memory Operations + +// Compare +#define CLI_ZOPC (unsigned int)(0x95 << 24) +#define CLIY_ZOPC (unsigned long)(0xebL << 40 | 0x55L) +#define CLC_ZOPC (unsigned long)(0xd5L << 40) +#define CLCL_ZOPC (unsigned int)(0x0f << 8) +#define CLCLE_ZOPC (unsigned int)(0xa9 << 24) +#define CLCLU_ZOPC (unsigned long)(0xebL << 40 | 0x8fL) + +// Move +#define MVI_ZOPC (unsigned int)(0x92 << 24) +#define MVIY_ZOPC (unsigned long)(0xebL << 40 | 0x52L) +#define MVC_ZOPC (unsigned long)(0xd2L << 40) +#define MVCL_ZOPC (unsigned int)(0x0e << 8) +#define MVCLE_ZOPC (unsigned int)(0xa8 << 24) + +// Test +#define TM_ZOPC (unsigned int)(0x91 << 24) +#define TMY_ZOPC (unsigned long)(0xebL << 40 | 0x51L) + +// AND +#define NI_ZOPC (unsigned int)(0x94 << 24) +#define NIY_ZOPC (unsigned long)(0xebL << 40 | 0x54L) +#define NC_ZOPC (unsigned long)(0xd4L << 40) + +// OR +#define OI_ZOPC (unsigned int)(0x96 << 24) +#define OIY_ZOPC (unsigned long)(0xebL << 40 | 0x56L) +#define OC_ZOPC (unsigned long)(0xd6L << 40) + +// XOR +#define XI_ZOPC (unsigned int)(0x97 << 24) +#define XIY_ZOPC (unsigned long)(0xebL << 40 | 0x57L) +#define XC_ZOPC (unsigned long)(0xd7L << 40) + +// Search String +#define SRST_ZOPC (unsigned int)(178 << 24 | 94 << 16) +#define SRSTU_ZOPC (unsigned int)(185 << 24 | 190 << 16) + +// Translate characters +#define TROO_ZOPC (unsigned int)(0xb9 << 24 | 0x93 << 16) +#define TROT_ZOPC (unsigned int)(0xb9 << 24 | 0x92 << 16) +#define TRTO_ZOPC (unsigned int)(0xb9 << 24 | 0x91 << 16) +#define TRTT_ZOPC (unsigned int)(0xb9 << 24 | 0x90 << 16) + + +// Miscellaneous Operations + +// Execute +#define EX_ZOPC (unsigned int)(68L << 24) +#define EXRL_ZOPC (unsigned long)(0xc6L << 40 | 0x00L << 32) // z10 + +// Compare and Swap +#define CS_ZOPC (unsigned int)(0xba << 24) +#define CSY_ZOPC (unsigned long)(0xebL << 40 | 0x14L) +#define CSG_ZOPC (unsigned long)(0xebL << 40 | 0x30L) + +// Interlocked-Update +#define LAA_ZOPC (unsigned long)(0xebL << 40 | 0xf8L) // z196 +#define LAAG_ZOPC (unsigned long)(0xebL << 40 | 0xe8L) // z196 +#define LAAL_ZOPC (unsigned long)(0xebL << 40 | 0xfaL) // z196 +#define LAALG_ZOPC (unsigned long)(0xebL << 40 | 0xeaL) // z196 +#define LAN_ZOPC (unsigned long)(0xebL << 40 | 0xf4L) // z196 +#define LANG_ZOPC (unsigned long)(0xebL << 40 | 0xe4L) // z196 +#define LAX_ZOPC (unsigned long)(0xebL << 40 | 0xf7L) // z196 +#define LAXG_ZOPC (unsigned long)(0xebL << 40 | 0xe7L) // z196 +#define LAO_ZOPC (unsigned long)(0xebL << 40 | 0xf6L) // z196 +#define LAOG_ZOPC (unsigned long)(0xebL << 40 | 0xe6L) // z196 + +// System Functions +#define STCK_ZOPC (unsigned int)(0xb2 << 24 | 0x05 << 16) +#define STCKF_ZOPC (unsigned int)(0xb2 << 24 | 0x7c << 16) +#define STFLE_ZOPC (unsigned int)(0xb2 << 24 | 0xb0 << 16) +#define ECTG_ZOPC (unsigned long)(0xc8L <<40 | 0x01L << 32) // z10 +#define ECAG_ZOPC (unsigned long)(0xebL <<40 | 0x4cL) // z10 + +// Execution Prediction +#define PFD_ZOPC (unsigned long)(0xe3L <<40 | 0x36L) // z10 +#define PFDRL_ZOPC (unsigned long)(0xc6L <<40 | 0x02L << 32) // z10 +#define BPP_ZOPC (unsigned long)(0xc7L <<40) // branch prediction preload -- EC12 +#define BPRP_ZOPC (unsigned long)(0xc5L <<40) // branch prediction preload -- EC12 + +// Transaction Control +#define TBEGIN_ZOPC (unsigned long)(0xe560L << 32) // tx begin -- EC12 +#define TBEGINC_ZOPC (unsigned long)(0xe561L << 32) // tx begin (constrained) -- EC12 +#define TEND_ZOPC (unsigned int)(0xb2f8 << 16) // tx end -- EC12 +#define TABORT_ZOPC (unsigned int)(0xb2fc << 16) // tx abort -- EC12 +#define ETND_ZOPC (unsigned int)(0xb2ec << 16) // tx nesting depth -- EC12 +#define PPA_ZOPC (unsigned int)(0xb2e8 << 16) // tx processor assist -- EC12 + +// Crypto and Checksum +#define CKSM_ZOPC (unsigned int)(0xb2 << 24 | 0x41 << 16) // checksum. This is NOT CRC32 +#define KM_ZOPC (unsigned int)(0xb9 << 24 | 0x2e << 16) // cipher +#define KMC_ZOPC (unsigned int)(0xb9 << 24 | 0x2f << 16) // cipher +#define KIMD_ZOPC (unsigned int)(0xb9 << 24 | 0x3e << 16) // SHA (msg digest) +#define KLMD_ZOPC (unsigned int)(0xb9 << 24 | 0x3f << 16) // SHA (msg digest) +#define KMAC_ZOPC (unsigned int)(0xb9 << 24 | 0x1e << 16) // Message Authentication Code + +// Various +#define TCEB_ZOPC (unsigned long)(237L << 40 | 16) +#define TCDB_ZOPC (unsigned long)(237L << 40 | 17) +#define TAM_ZOPC (unsigned long)(267) + +#define FLOGR_ZOPC (unsigned int)(0xb9 << 24 | 0x83 << 16) +#define POPCNT_ZOPC (unsigned int)(0xb9e1 << 16) +#define AHHHR_ZOPC (unsigned int)(0xb9c8 << 16) +#define AHHLR_ZOPC (unsigned int)(0xb9d8 << 16) + + +// OpCode field masks + +#define RI_MASK (unsigned int)(0xff << 24 | 0x0f << 16) +#define RRE_MASK (unsigned int)(0xff << 24 | 0xff << 16) +#define RSI_MASK (unsigned int)(0xff << 24) +#define RIE_MASK (unsigned long)(0xffL << 40 | 0xffL) +#define RIL_MASK (unsigned long)(0xffL << 40 | 0x0fL << 32) + +#define BASR_MASK (unsigned int)(0xff << 8) +#define BCR_MASK (unsigned int)(0xff << 8) +#define BRC_MASK (unsigned int)(0xff << 24 | 0x0f << 16) +#define LGHI_MASK (unsigned int)(0xff << 24 | 0x0f << 16) +#define LLI_MASK (unsigned int)(0xff << 24 | 0x0f << 16) +#define II_MASK (unsigned int)(0xff << 24 | 0x0f << 16) +#define LLIF_MASK (unsigned long)(0xffL << 40 | 0x0fL << 32) +#define IIF_MASK (unsigned long)(0xffL << 40 | 0x0fL << 32) +#define BRASL_MASK (unsigned long)(0xffL << 40 | 0x0fL << 32) +#define TM_MASK (unsigned int)(0xff << 24) +#define TMY_MASK (unsigned long)(0xffL << 40 | 0xffL) +#define LB_MASK (unsigned long)(0xffL << 40 | 0xffL) +#define LH_MASK (unsigned int)(0xff << 24) +#define L_MASK (unsigned int)(0xff << 24) +#define LY_MASK (unsigned long)(0xffL << 40 | 0xffL) +#define LG_MASK (unsigned long)(0xffL << 40 | 0xffL) +#define LLGH_MASK (unsigned long)(0xffL << 40 | 0xffL) +#define LLGF_MASK (unsigned long)(0xffL << 40 | 0xffL) +#define SLAG_MASK (unsigned long)(0xffL << 40 | 0xffL) +#define LARL_MASK (unsigned long)(0xff0fL << 32) +#define LGRL_MASK (unsigned long)(0xff0fL << 32) +#define LE_MASK (unsigned int)(0xff << 24) +#define LD_MASK (unsigned int)(0xff << 24) +#define ST_MASK (unsigned int)(0xff << 24) +#define STC_MASK (unsigned int)(0xff << 24) +#define STG_MASK (unsigned long)(0xffL << 40 | 0xffL) +#define STH_MASK (unsigned int)(0xff << 24) +#define STE_MASK (unsigned int)(0xff << 24) +#define STD_MASK (unsigned int)(0xff << 24) +#define CMPBRANCH_MASK (unsigned long)(0xffL << 40 | 0xffL) +#define REL_LONG_MASK (unsigned long)(0xff0fL << 32) + + public: + // Condition code masks. Details: + // - Mask bit#3 must be zero for all compare and branch/trap instructions to ensure + // future compatibility. + // - For all arithmetic instructions which set the condition code, mask bit#3 + // indicates overflow ("unordered" in float operations). + // - "unordered" float comparison results have to be treated as low. + // - When overflow/unordered is detected, none of the branch conditions is true, + // except for bcondOverflow/bcondNotOrdered and bcondAlways. + // - For INT comparisons, the inverse condition can be calculated as (14-cond). + // - For FLOAT comparisons, the inverse condition can be calculated as (15-cond). + enum branch_condition { + bcondNever = 0, + bcondAlways = 15, + + // Specific names. Make use of lightweight sync. + // Full and lightweight sync operation. + bcondFullSync = 15, + bcondLightSync = 14, + bcondNop = 0, + + // arithmetic compare instructions + // arithmetic load and test, insert instructions + // Mask bit#3 must be zero for future compatibility. + bcondEqual = 8, + bcondNotEqual = 6, + bcondLow = 4, + bcondNotLow = 10, + bcondHigh = 2, + bcondNotHigh = 12, + // arithmetic calculation instructions + // Mask bit#3 indicates overflow if detected by instr. + // Mask bit#3 = 0 (overflow is not handled by compiler). + bcondOverflow = 1, + bcondNotOverflow = 14, + bcondZero = bcondEqual, + bcondNotZero = bcondNotEqual, + bcondNegative = bcondLow, + bcondNotNegative = bcondNotLow, + bcondPositive = bcondHigh, + bcondNotPositive = bcondNotHigh, + bcondNotOrdered = 1, // float comparisons + bcondOrdered = 14, // float comparisons + bcondLowOrNotOrdered = bcondLow|bcondNotOrdered, // float comparisons + bcondHighOrNotOrdered = bcondHigh|bcondNotOrdered, // float comparisons + // unsigned arithmetic calculation instructions + // Mask bit#0 is not used by these instructions. + // There is no indication of overflow for these instr. + bcondLogZero = 2, + bcondLogNotZero = 5, + bcondLogNotZero_Borrow = 4, + bcondLogNotZero_NoBorrow = 1, + // string search instructions + bcondFound = 4, + bcondNotFound = 2, + bcondInterrupted = 1, + // bit test instructions + bcondAllZero = 8, + bcondMixed = 6, + bcondAllOne = 1, + bcondNotAllZero = 7 // for tmll + }; + + enum Condition { + // z/Architecture + negative = 0, + less = 0, + positive = 1, + greater = 1, + zero = 2, + equal = 2, + summary_overflow = 3, + }; + + // Rounding mode for float-2-int conversions. + enum RoundingMode { + current_mode = 0, // Mode taken from FPC register. + biased_to_nearest = 1, + to_nearest = 4, + to_zero = 5, + to_plus_infinity = 6, + to_minus_infinity = 7 + }; + + // Inverse condition code, i.e. determine "15 - cc" for a given condition code cc. + static branch_condition inverse_condition(branch_condition cc); + static branch_condition inverse_float_condition(branch_condition cc); + + + //----------------------------------------------- + // instruction property getter methods + //----------------------------------------------- + + // Calculate length of instruction. + static int instr_len(unsigned char *instr); + + // Longest instructions are 6 bytes on z/Architecture. + static int instr_maxlen() { return 6; } + + // Average instruction is 4 bytes on z/Architecture (just a guess). + static int instr_avglen() { return 4; } + + // Shortest instructions are 2 bytes on z/Architecture. + static int instr_minlen() { return 2; } + + // Move instruction at pc right-justified into passed long int. + // Return instr len in bytes as function result. + static unsigned int get_instruction(unsigned char *pc, unsigned long *instr); + + // Move instruction in passed (long int) into storage at pc. + // This code is _NOT_ MT-safe!! + static void set_instruction(unsigned char *pc, unsigned long instr, unsigned int len) { + memcpy(pc, ((unsigned char *)&instr)+sizeof(unsigned long)-len, len); + } + + + //------------------------------------------ + // instruction field test methods + //------------------------------------------ + + // Only used once in s390.ad to implement Matcher::is_short_branch_offset(). + static bool is_within_range_of_RelAddr16(address target, address origin) { + return RelAddr::is_in_range_of_RelAddr16(target, origin); + } + + + //---------------------------------- + // some diagnostic output + //---------------------------------- + + static void print_dbg_msg(outputStream* out, unsigned long inst, const char* msg, int ilen) PRODUCT_RETURN; + static void dump_code_range(outputStream* out, address pc, const unsigned int range, const char* msg = " ") PRODUCT_RETURN; + + protected: + + //------------------------------------------------------- + // instruction field helper methods (internal) + //------------------------------------------------------- + + // Return a mask of 1s between hi_bit and lo_bit (inclusive). + static long fmask(unsigned int hi_bit, unsigned int lo_bit) { + assert(hi_bit >= lo_bit && hi_bit < 48, "bad bits"); + return ((1L<<(hi_bit-lo_bit+1)) - 1) << lo_bit; + } + + // extract u_field + // unsigned value + static long inv_u_field(long x, int hi_bit, int lo_bit) { + return (x & fmask(hi_bit, lo_bit)) >> lo_bit; + } + + // extract s_field + // Signed value, may need sign extension. + static long inv_s_field(long x, int hi_bit, int lo_bit) { + x = inv_u_field(x, hi_bit, lo_bit); + // Highest extracted bit set -> sign extension. + return (x >= (1L<<(hi_bit-lo_bit)) ? x | ((-1L)<<(hi_bit-lo_bit)) : x); + } + + // Extract primary opcode from instruction. + static int z_inv_op(int x) { return inv_u_field(x, 31, 24); } + static int z_inv_op(long x) { return inv_u_field(x, 47, 40); } + + static int inv_reg( long x, int s, int len) { return inv_u_field(x, (len-s)-1, (len-s)-4); } // Regs are encoded in 4 bits. + static int inv_mask(long x, int s, int len) { return inv_u_field(x, (len-s)-1, (len-s)-8); } // Mask is 8 bits long. + static int inv_simm16_48(long x) { return (inv_s_field(x, 31, 16)); } // 6-byte instructions only + static int inv_simm16(long x) { return (inv_s_field(x, 15, 0)); } // 4-byte instructions only + static int inv_simm20(long x) { return (inv_u_field(x, 27, 16) | // 6-byte instructions only + inv_s_field(x, 15, 8)<<12); } + static int inv_simm32(long x) { return (inv_s_field(x, 31, 0)); } // 6-byte instructions only + static int inv_uimm12(long x) { return (inv_u_field(x, 11, 0)); } // 4-byte instructions only + + // Encode u_field from long value. + static long u_field(long x, int hi_bit, int lo_bit) { + long r = x << lo_bit; + assert((r & ~fmask(hi_bit, lo_bit)) == 0, "value out of range"); + assert(inv_u_field(r, hi_bit, lo_bit) == x, "just checking"); + return r; + } + + public: + + //-------------------------------------------------- + // instruction field construction methods + //-------------------------------------------------- + + // Compute relative address (32 bit) for branch. + // Only used once in nativeInst_s390.cpp. + static intptr_t z_pcrel_off(address dest, address pc) { + return RelAddr::pcrel_off32(dest, pc); + } + + // Extract 20-bit signed displacement. + // Only used in disassembler_s390.cpp for temp enhancements. + static int inv_simm20_xx(address iLoc) { + unsigned long instr = 0; + unsigned long iLen = get_instruction(iLoc, &instr); + return inv_simm20(instr); + } + + // unsigned immediate, in low bits, nbits long + static long uimm(long x, int nbits) { + assert(Immediate::is_uimm(x, nbits), "unsigned constant out of range"); + return x & fmask(nbits - 1, 0); + } + + // Cast '1' to long to avoid sign extension if nbits = 32. + // signed immediate, in low bits, nbits long + static long simm(long x, int nbits) { + assert(Immediate::is_simm(x, nbits), "value out of range"); + return x & fmask(nbits - 1, 0); + } + + static long imm(int64_t x, int nbits) { + // Assert that x can be represented with nbits bits ignoring the sign bits, + // i.e. the more higher bits should all be 0 or 1. + assert((x >> nbits) == 0 || (x >> nbits) == -1, "value out of range"); + return x & fmask(nbits-1, 0); + } + + // A 20-bit displacement is only in instructions of the + // RSY, RXY, or SIY format. In these instructions, the D + // field consists of a DL (low) field in bit positions 20-31 + // and of a DH (high) field in bit positions 32-39. The + // value of the displacement is formed by appending the + // contents of the DH field to the left of the contents of + // the DL field. + static long simm20(int64_t ui20) { + assert(Immediate::is_simm(ui20, 20), "value out of range"); + return ( ((ui20 & 0xfffL) << (48-32)) | // DL + (((ui20 >> 12) & 0xffL) << (48-40))); // DH + } + + static long reg(Register r, int s, int len) { return u_field(r->encoding(), (len-s)-1, (len-s)-4); } + static long reg(int r, int s, int len) { return u_field(r, (len-s)-1, (len-s)-4); } + static long regt(Register r, int s, int len) { return reg(r, s, len); } + static long regz(Register r, int s, int len) { assert(r != Z_R0, "cannot use register R0 in memory access"); return reg(r, s, len); } + + static long uimm4( int64_t ui4, int s, int len) { return uimm(ui4, 4) << (len-s-4); } + static long uimm6( int64_t ui6, int s, int len) { return uimm(ui6, 6) << (len-s-6); } + static long uimm8( int64_t ui8, int s, int len) { return uimm(ui8, 8) << (len-s-8); } + static long uimm12(int64_t ui12, int s, int len) { return uimm(ui12, 12) << (len-s-12); } + static long uimm16(int64_t ui16, int s, int len) { return uimm(ui16, 16) << (len-s-16); } + static long uimm32(int64_t ui32, int s, int len) { return uimm((unsigned)ui32, 32) << (len-s-32); } // prevent sign extension + + static long simm8( int64_t si8, int s, int len) { return simm(si8, 8) << (len-s-8); } + static long simm12(int64_t si12, int s, int len) { return simm(si12, 12) << (len-s-12); } + static long simm16(int64_t si16, int s, int len) { return simm(si16, 16) << (len-s-16); } + static long simm24(int64_t si24, int s, int len) { return simm(si24, 24) << (len-s-24); } + static long simm32(int64_t si32, int s, int len) { return simm(si32, 32) << (len-s-32); } + + static long imm8( int64_t i8, int s, int len) { return imm(i8, 8) << (len-s-8); } + static long imm12(int64_t i12, int s, int len) { return imm(i12, 12) << (len-s-12); } + static long imm16(int64_t i16, int s, int len) { return imm(i16, 16) << (len-s-16); } + static long imm24(int64_t i24, int s, int len) { return imm(i24, 24) << (len-s-24); } + static long imm32(int64_t i32, int s, int len) { return imm(i32, 32) << (len-s-32); } + + static long fregt(FloatRegister r, int s, int len) { return freg(r,s,len); } + static long freg( FloatRegister r, int s, int len) { return u_field(r->encoding(), (len-s)-1, (len-s)-4); } + + // Rounding mode for float-2-int conversions. + static long rounding_mode(RoundingMode m, int s, int len) { + assert(m != 2 && m != 3, "invalid mode"); + return uimm(m, 4) << (len-s-4); + } + + //-------------------------------------------- + // instruction field getter methods + //-------------------------------------------- + + static int get_imm32(address a, int instruction_number) { + int imm; + int *p =((int *)(a + 2 + 6 * instruction_number)); + imm = *p; + return imm; + } + + static short get_imm16(address a, int instruction_number) { + short imm; + short *p =((short *)a) + 2 * instruction_number + 1; + imm = *p; + return imm; + } + + + //-------------------------------------------- + // instruction field setter methods + //-------------------------------------------- + + static void set_imm32(address a, int64_t s) { + assert(Immediate::is_simm32(s) || Immediate::is_uimm32(s), "to big"); + int* p = (int *) (a + 2); + *p = s; + } + + static void set_imm16(int* instr, int64_t s) { + assert(Immediate::is_simm16(s) || Immediate::is_uimm16(s), "to big"); + short* p = ((short *)instr) + 1; + *p = s; + } + + public: + + static unsigned int align(unsigned int x, unsigned int a) { return ((x + (a - 1)) & ~(a - 1)); } + static bool is_aligned(unsigned int x, unsigned int a) { return (0 == x % a); } + + inline void emit_16(int x); + inline void emit_32(int x); + inline void emit_48(long x); + + // Compare and control flow instructions + // ===================================== + + // See also commodity routines compare64_and_branch(), compare32_and_branch(). + + // compare instructions + // compare register + inline void z_cr( Register r1, Register r2); // compare (r1, r2) ; int32 + inline void z_cgr( Register r1, Register r2); // compare (r1, r2) ; int64 + inline void z_cgfr(Register r1, Register r2); // compare (r1, r2) ; int64 <--> int32 + // compare immediate + inline void z_chi( Register r1, int64_t i2); // compare (r1, i2_imm16) ; int32 + inline void z_cfi( Register r1, int64_t i2); // compare (r1, i2_imm32) ; int32 + inline void z_cghi(Register r1, int64_t i2); // compare (r1, i2_imm16) ; int64 + inline void z_cgfi(Register r1, int64_t i2); // compare (r1, i2_imm32) ; int64 + // compare memory + inline void z_ch( Register r1, const Address &a); // compare (r1, *(a)) ; int32 <--> int16 + inline void z_ch( Register r1, int64_t d2, Register x2, Register b2); // compare (r1, *(d2_uimm12+x2+b2)) ; int32 <--> int16 + inline void z_c( Register r1, const Address &a); // compare (r1, *(a)) ; int32 + inline void z_c( Register r1, int64_t d2, Register x2, Register b2); // compare (r1, *(d2_uimm12+x2+b2)) ; int32 + inline void z_cy( Register r1, int64_t d2, Register x2, Register b2); // compare (r1, *(d2_uimm20+x2+b2)) ; int32 + inline void z_cy( Register r1, int64_t d2, Register b2); // compare (r1, *(d2_uimm20+x2+b2)) ; int32 + inline void z_cy( Register r1, const Address& a); // compare (r1, *(a)) ; int32 + //inline void z_cgf(Register r1,const Address &a); // compare (r1, *(a)) ; int64 <--> int32 + //inline void z_cgf(Register r1,int64_t d2, Register x2, Register b2);// compare (r1, *(d2_uimm12+x2+b2)) ; int64 <--> int32 + inline void z_cg( Register r1, const Address &a); // compare (r1, *(a)) ; int64 + inline void z_cg( Register r1, int64_t d2, Register x2, Register b2); // compare (r1, *(d2_imm20+x2+b2)) ; int64 + + // compare logical instructions + // compare register + inline void z_clr( Register r1, Register r2); // compare (r1, r2) ; uint32 + inline void z_clgr( Register r1, Register r2); // compare (r1, r2) ; uint64 + // compare immediate + inline void z_clfi( Register r1, int64_t i2); // compare (r1, i2_uimm32) ; uint32 + inline void z_clgfi(Register r1, int64_t i2); // compare (r1, i2_uimm32) ; uint64 + inline void z_cl( Register r1, const Address &a); // compare (r1, *(a) ; uint32 + inline void z_cl( Register r1, int64_t d2, Register x2, Register b2);// compare (r1, *(d2_uimm12+x2+b2) ; uint32 + inline void z_cly( Register r1, int64_t d2, Register x2, Register b2);// compare (r1, *(d2_uimm20+x2+b2)) ; uint32 + inline void z_cly( Register r1, int64_t d2, Register b2); // compare (r1, *(d2_uimm20+x2+b2)) ; uint32 + inline void z_cly( Register r1, const Address& a); // compare (r1, *(a)) ; uint32 + inline void z_clg( Register r1, const Address &a); // compare (r1, *(a) ; uint64 + inline void z_clg( Register r1, int64_t d2, Register x2, Register b2);// compare (r1, *(d2_imm20+x2+b2) ; uint64 + + // test under mask + inline void z_tmll(Register r1, int64_t i2); // test under mask, see docu + inline void z_tmlh(Register r1, int64_t i2); // test under mask, see docu + inline void z_tmhl(Register r1, int64_t i2); // test under mask, see docu + inline void z_tmhh(Register r1, int64_t i2); // test under mask, see docu + + // branch instructions + inline void z_bc( branch_condition m1, int64_t d2, Register x2, Register b2);// branch m1 ? pc = (d2_uimm12+x2+b2) + inline void z_bcr( branch_condition m1, Register r2); // branch (m1 && r2!=R0) ? pc = r2 + inline void z_brc( branch_condition i1, int64_t i2); // branch i1 ? pc = pc + i2_imm16 + inline void z_brc( branch_condition i1, address a); // branch i1 ? pc = a + inline void z_brc( branch_condition i1, Label& L); // branch i1 ? pc = Label + //inline void z_brcl(branch_condition i1, int64_t i2); // branch i1 ? pc = pc + i2_imm32 + inline void z_brcl(branch_condition i1, address a); // branch i1 ? pc = a + inline void z_brcl(branch_condition i1, Label& L); // branch i1 ? pc = Label + inline void z_bctgr(Register r1, Register r2); // branch on count r1 -= 1; (r1!=0) ? pc = r2 ; r1 is int64 + + // branch unconditional / always + inline void z_br(Register r2); // branch to r2, nop if r2 == Z_R0 + + + // See also commodity routines compare64_and_branch(), compare32_and_branch(). + // signed comparison and branch + inline void z_crb( Register r1, Register r2, branch_condition m3, int64_t d4, Register b4); // (r1 m3 r2) ? goto b4+d4 ; int32 -- z10 + inline void z_cgrb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4); // (r1 m3 r2) ? goto b4+d4 ; int64 -- z10 + inline void z_crj( Register r1, Register r2, branch_condition m3, Label& L); // (r1 m3 r2) ? goto L ; int32 -- z10 + inline void z_crj( Register r1, Register r2, branch_condition m3, address a4); // (r1 m3 r2) ? goto (pc+a4<<1) ; int32 -- z10 + inline void z_cgrj(Register r1, Register r2, branch_condition m3, Label& L); // (r1 m3 r2) ? goto L ; int64 -- z10 + inline void z_cgrj(Register r1, Register r2, branch_condition m3, address a4); // (r1 m3 r2) ? goto (pc+a4<<1) ; int64 -- z10 + inline void z_cib( Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4); // (r1 m3 i2_imm8) ? goto b4+d4 ; int32 -- z10 + inline void z_cgib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4); // (r1 m3 i2_imm8) ? goto b4+d4 ; int64 -- z10 + inline void z_cij( Register r1, int64_t i2, branch_condition m3, Label& L); // (r1 m3 i2_imm8) ? goto L ; int32 -- z10 + inline void z_cij( Register r1, int64_t i2, branch_condition m3, address a4); // (r1 m3 i2_imm8) ? goto (pc+a4<<1) ; int32 -- z10 + inline void z_cgij(Register r1, int64_t i2, branch_condition m3, Label& L); // (r1 m3 i2_imm8) ? goto L ; int64 -- z10 + inline void z_cgij(Register r1, int64_t i2, branch_condition m3, address a4); // (r1 m3 i2_imm8) ? goto (pc+a4<<1) ; int64 -- z10 + // unsigned comparison and branch + inline void z_clrb( Register r1, Register r2, branch_condition m3, int64_t d4, Register b4);// (r1 m3 r2) ? goto b4+d4 ; uint32 -- z10 + inline void z_clgrb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4);// (r1 m3 r2) ? goto b4+d4 ; uint64 -- z10 + inline void z_clrj( Register r1, Register r2, branch_condition m3, Label& L); // (r1 m3 r2) ? goto L ; uint32 -- z10 + inline void z_clrj( Register r1, Register r2, branch_condition m3, address a4); // (r1 m3 r2) ? goto (pc+a4<<1) ; uint32 -- z10 + inline void z_clgrj(Register r1, Register r2, branch_condition m3, Label& L); // (r1 m3 r2) ? goto L ; uint64 -- z10 + inline void z_clgrj(Register r1, Register r2, branch_condition m3, address a4); // (r1 m3 r2) ? goto (pc+a4<<1) ; uint64 -- z10 + inline void z_clib( Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4); // (r1 m3 i2_uimm8) ? goto b4+d4 ; uint32 -- z10 + inline void z_clgib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4); // (r1 m3 i2_uimm8) ? goto b4+d4 ; uint64 -- z10 + inline void z_clij( Register r1, int64_t i2, branch_condition m3, Label& L); // (r1 m3 i2_uimm8) ? goto L ; uint32 -- z10 + inline void z_clij( Register r1, int64_t i2, branch_condition m3, address a4); // (r1 m3 i2_uimm8) ? goto (pc+a4<<1) ; uint32 -- z10 + inline void z_clgij(Register r1, int64_t i2, branch_condition m3, Label& L); // (r1 m3 i2_uimm8) ? goto L ; uint64 -- z10 + inline void z_clgij(Register r1, int64_t i2, branch_condition m3, address a4); // (r1 m3 i2_uimm8) ? goto (pc+a4<<1) ; uint64 -- z10 + + // Compare and trap instructions. + // signed comparison + inline void z_crt(Register r1, Register r2, int64_t m3); // (r1 m3 r2) ? trap ; int32 -- z10 + inline void z_cgrt(Register r1, Register r2, int64_t m3); // (r1 m3 r2) ? trap ; int64 -- z10 + inline void z_cit(Register r1, int64_t i2, int64_t m3); // (r1 m3 i2_imm16) ? trap ; int32 -- z10 + inline void z_cgit(Register r1, int64_t i2, int64_t m3); // (r1 m3 i2_imm16) ? trap ; int64 -- z10 + // unsigned comparison + inline void z_clrt(Register r1, Register r2, int64_t m3); // (r1 m3 r2) ? trap ; uint32 -- z10 + inline void z_clgrt(Register r1, Register r2, int64_t m3); // (r1 m3 r2) ? trap ; uint64 -- z10 + inline void z_clfit(Register r1, int64_t i2, int64_t m3); // (r1 m3 i2_uimm16) ? trap ; uint32 -- z10 + inline void z_clgit(Register r1, int64_t i2, int64_t m3); // (r1 m3 i2_uimm16) ? trap ; uint64 -- z10 + + inline void z_illtrap(); + inline void z_illtrap(int id); + inline void z_illtrap_eyecatcher(unsigned short xpattern, unsigned short pattern); + + + // load address, add for addresses + // =============================== + + // The versions without suffix z assert that the base reg is != Z_R0. + // Z_R0 is interpreted as constant '0'. The variants with Address operand + // check this automatically, so no two versions are needed. + inline void z_layz(Register r1, int64_t d2, Register x2, Register b2); // Special version. Allows Z_R0 as base reg. + inline void z_lay(Register r1, const Address &a); // r1 = a + inline void z_lay(Register r1, int64_t d2, Register x2, Register b2); // r1 = d2_imm20+x2+b2 + inline void z_laz(Register r1, int64_t d2, Register x2, Register b2); // Special version. Allows Z_R0 as base reg. + inline void z_la(Register r1, const Address &a); // r1 = a ; unsigned immediate! + inline void z_la(Register r1, int64_t d2, Register x2, Register b2); // r1 = d2_uimm12+x2+b2 ; unsigned immediate! + inline void z_larl(Register r1, int64_t i2); // r1 = pc + i2_imm32<<1; + inline void z_larl(Register r1, address a2); // r1 = pc + i2_imm32<<1; + + // Load instructions for integers + // ============================== + + // Address as base + index + offset + inline void z_lb( Register r1, const Address &a); // load r1 = *(a) ; int32 <- int8 + inline void z_lb( Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; int32 <- int8 + inline void z_lh( Register r1, const Address &a); // load r1 = *(a) ; int32 <- int16 + inline void z_lh( Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_uimm12+x2+b2); int32 <- int16 + inline void z_lhy(Register r1, const Address &a); // load r1 = *(a) ; int32 <- int16 + inline void z_lhy(Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; int32 <- int16 + inline void z_l( Register r1, const Address& a); // load r1 = *(a) ; int32 + inline void z_l( Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_uimm12+x2+b2); int32 + inline void z_ly( Register r1, const Address& a); // load r1 = *(a) ; int32 + inline void z_ly( Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; int32 + + inline void z_lgb(Register r1, const Address &a); // load r1 = *(a) ; int64 <- int8 + inline void z_lgb(Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; int64 <- int8 + inline void z_lgh(Register r1, const Address &a); // load r1 = *(a) ; int64 <- int16 + inline void z_lgh(Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm12+x2+b2) ; int64 <- int16 + inline void z_lgf(Register r1, const Address &a); // load r1 = *(a) ; int64 <- int32 + inline void z_lgf(Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; int64 <- int32 + inline void z_lg( Register r1, const Address& a); // load r1 = *(a) ; int64 <- int64 + inline void z_lg( Register r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; int64 <- int64 + + // load and test + inline void z_lt( Register r1, const Address &a); // load and test r1 = *(a) ; int32 + inline void z_lt( Register r1, int64_t d2, Register x2, Register b2);// load and test r1 = *(d2_imm20+x2+b2) ; int32 + inline void z_ltg( Register r1, const Address &a); // load and test r1 = *(a) ; int64 + inline void z_ltg( Register r1, int64_t d2, Register x2, Register b2);// load and test r1 = *(d2_imm20+x2+b2) ; int64 + inline void z_ltgf(Register r1, const Address &a); // load and test r1 = *(a) ; int64 <- int32 + inline void z_ltgf(Register r1, int64_t d2, Register x2, Register b2);// load and test r1 = *(d2_imm20+x2+b2) ; int64 <- int32 + + // load unsigned integer - zero extended + inline void z_llc( Register r1, const Address& a); // load r1 = *(a) ; uint32 <- uint8 + inline void z_llc( Register r1, int64_t d2, Register x2, Register b2);// load r1 = *(d2_imm20+x2+b2) ; uint32 <- uint8 + inline void z_llh( Register r1, const Address& a); // load r1 = *(a) ; uint32 <- uint16 + inline void z_llh( Register r1, int64_t d2, Register x2, Register b2);// load r1 = *(d2_imm20+x2+b2) ; uint32 <- uint16 + inline void z_llgc(Register r1, const Address& a); // load r1 = *(a) ; uint64 <- uint8 + inline void z_llgc(Register r1, int64_t d2, Register x2, Register b2);// load r1 = *(d2_imm20+x2+b2) ; uint64 <- uint8 + inline void z_llgc( Register r1, int64_t d2, Register b2); // load r1 = *(d2_imm20+b2) ; uint64 <- uint8 + inline void z_llgh(Register r1, const Address& a); // load r1 = *(a) ; uint64 <- uint16 + inline void z_llgh(Register r1, int64_t d2, Register x2, Register b2);// load r1 = *(d2_imm20+x2+b2) ; uint64 <- uint16 + inline void z_llgf(Register r1, const Address& a); // load r1 = *(a) ; uint64 <- uint32 + inline void z_llgf(Register r1, int64_t d2, Register x2, Register b2);// load r1 = *(d2_imm20+x2+b2) ; uint64 <- uint32 + + // pc relative addressing + inline void z_lhrl( Register r1, int64_t i2); // load r1 = *(pc + i2_imm32<<1) ; int32 <- int16 -- z10 + inline void z_lrl( Register r1, int64_t i2); // load r1 = *(pc + i2_imm32<<1) ; int32 -- z10 + inline void z_lghrl(Register r1, int64_t i2); // load r1 = *(pc + i2_imm32<<1) ; int64 <- int16 -- z10 + inline void z_lgfrl(Register r1, int64_t i2); // load r1 = *(pc + i2_imm32<<1) ; int64 <- int32 -- z10 + inline void z_lgrl( Register r1, int64_t i2); // load r1 = *(pc + i2_imm32<<1) ; int64 -- z10 + + inline void z_llhrl( Register r1, int64_t i2); // load r1 = *(pc + i2_imm32<<1) ; uint32 <- uint16 -- z10 + inline void z_llghrl(Register r1, int64_t i2); // load r1 = *(pc + i2_imm32<<1) ; uint64 <- uint16 -- z10 + inline void z_llgfrl(Register r1, int64_t i2); // load r1 = *(pc + i2_imm32<<1) ; uint64 <- uint32 -- z10 + + // Store instructions for integers + // =============================== + + // Address as base + index + offset + inline void z_stc( Register r1, const Address &d); // store *(a) = r1 ; int8 + inline void z_stc( Register r1, int64_t d2, Register x2, Register b2); // store *(d2_uimm12+x2+b2) = r1 ; int8 + inline void z_stcy(Register r1, const Address &d); // store *(a) = r1 ; int8 + inline void z_stcy(Register r1, int64_t d2, Register x2, Register b2); // store *(d2_imm20+x2+b2) = r1 ; int8 + inline void z_sth( Register r1, const Address &d); // store *(a) = r1 ; int16 + inline void z_sth( Register r1, int64_t d2, Register x2, Register b2); // store *(d2_uimm12+x2+b2) = r1 ; int16 + inline void z_sthy(Register r1, const Address &d); // store *(a) = r1 ; int16 + inline void z_sthy(Register r1, int64_t d2, Register x2, Register b2); // store *(d2_imm20+x2+b2) = r1 ; int16 + inline void z_st( Register r1, const Address &d); // store *(a) = r1 ; int32 + inline void z_st( Register r1, int64_t d2, Register x2, Register b2); // store *(d2_uimm12+x2+b2) = r1 ; int32 + inline void z_sty( Register r1, const Address &d); // store *(a) = r1 ; int32 + inline void z_sty( Register r1, int64_t d2, Register x2, Register b2); // store *(d2_imm20+x2+b2) = r1 ; int32 + inline void z_stg( Register r1, const Address &d); // store *(a) = r1 ; int64 + inline void z_stg( Register r1, int64_t d2, Register x2, Register b2); // store *(d2_uimm12+x2+b2) = r1 ; int64 + + inline void z_stcm( Register r1, int64_t m3, int64_t d2, Register b2); // store character under mask + inline void z_stcmy(Register r1, int64_t m3, int64_t d2, Register b2); // store character under mask + inline void z_stcmh(Register r1, int64_t m3, int64_t d2, Register b2); // store character under mask + + // pc relative addressing + inline void z_sthrl(Register r1, int64_t i2); // store *(pc + i2_imm32<<1) = r1 ; int16 -- z10 + inline void z_strl( Register r1, int64_t i2); // store *(pc + i2_imm32<<1) = r1 ; int32 -- z10 + inline void z_stgrl(Register r1, int64_t i2); // store *(pc + i2_imm32<<1) = r1 ; int64 -- z10 + + + // Load and store immediates + // ========================= + + // load immediate + inline void z_lhi( Register r1, int64_t i2); // r1 = i2_imm16 ; int32 <- int16 + inline void z_lghi(Register r1, int64_t i2); // r1 = i2_imm16 ; int64 <- int16 + inline void z_lgfi(Register r1, int64_t i2); // r1 = i2_imm32 ; int64 <- int32 + + inline void z_llihf(Register r1, int64_t i2); // r1 = i2_imm32 ; uint64 <- (uint32<<32) + inline void z_llilf(Register r1, int64_t i2); // r1 = i2_imm32 ; uint64 <- uint32 + inline void z_llihh(Register r1, int64_t i2); // r1 = i2_imm16 ; uint64 <- (uint16<<48) + inline void z_llihl(Register r1, int64_t i2); // r1 = i2_imm16 ; uint64 <- (uint16<<32) + inline void z_llilh(Register r1, int64_t i2); // r1 = i2_imm16 ; uint64 <- (uint16<<16) + inline void z_llill(Register r1, int64_t i2); // r1 = i2_imm16 ; uint64 <- uint16 + + // insert immediate + inline void z_ic( Register r1, int64_t d2, Register x2, Register b2); // insert character + inline void z_icy( Register r1, int64_t d2, Register x2, Register b2); // insert character + inline void z_icm( Register r1, int64_t m3, int64_t d2, Register b2); // insert character under mask + inline void z_icmy(Register r1, int64_t m3, int64_t d2, Register b2); // insert character under mask + inline void z_icmh(Register r1, int64_t m3, int64_t d2, Register b2); // insert character under mask + + inline void z_iihh(Register r1, int64_t i2); // insert immediate r1[ 0-15] = i2_imm16 + inline void z_iihl(Register r1, int64_t i2); // insert immediate r1[16-31] = i2_imm16 + inline void z_iilh(Register r1, int64_t i2); // insert immediate r1[32-47] = i2_imm16 + inline void z_iill(Register r1, int64_t i2); // insert immediate r1[48-63] = i2_imm16 + inline void z_iihf(Register r1, int64_t i2); // insert immediate r1[32-63] = i2_imm32 + inline void z_iilf(Register r1, int64_t i2); // insert immediate r1[ 0-31] = i2_imm32 + + // store immediate + inline void z_mvhhi(const Address &d, int64_t i2); // store *(d) = i2_imm16 ; int16 + inline void z_mvhhi(int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) = i2_imm16 ; int16 + inline void z_mvhi( const Address &d, int64_t i2); // store *(d) = i2_imm16 ; int32 + inline void z_mvhi( int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) = i2_imm16 ; int32 + inline void z_mvghi(const Address &d, int64_t i2); // store *(d) = i2_imm16 ; int64 + inline void z_mvghi(int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) = i2_imm16 ; int64 + + // Move and Convert instructions + // ============================= + + // move, sign extend + inline void z_lbr(Register r1, Register r2); // move r1 = r2 ; int32 <- int8 + inline void z_lhr( Register r1, Register r2); // move r1 = r2 ; int32 <- int16 + inline void z_lr(Register r1, Register r2); // move r1 = r2 ; int32, no sign extension + inline void z_lgbr(Register r1, Register r2); // move r1 = r2 ; int64 <- int8 + inline void z_lghr(Register r1, Register r2); // move r1 = r2 ; int64 <- int16 + inline void z_lgfr(Register r1, Register r2); // move r1 = r2 ; int64 <- int32 + inline void z_lgr(Register r1, Register r2); // move r1 = r2 ; int64 + // move, zero extend + inline void z_llhr( Register r1, Register r2); // move r1 = r2 ; uint32 <- uint16 + inline void z_llgcr(Register r1, Register r2); // move r1 = r2 ; uint64 <- uint8 + inline void z_llghr(Register r1, Register r2); // move r1 = r2 ; uint64 <- uint16 + inline void z_llgfr(Register r1, Register r2); // move r1 = r2 ; uint64 <- uint32 + + // move and test register + inline void z_ltr(Register r1, Register r2); // load/move and test r1 = r2; int32 + inline void z_ltgr(Register r1, Register r2); // load/move and test r1 = r2; int64 + inline void z_ltgfr(Register r1, Register r2); // load/move and test r1 = r2; int64 <-- int32 + + // move and byte-reverse + inline void z_lrvr( Register r1, Register r2); // move and reverse byte order r1 = r2; int32 + inline void z_lrvgr(Register r1, Register r2); // move and reverse byte order r1 = r2; int64 + + + // Arithmetic instructions (Integer only) + // ====================================== + // For float arithmetic instructions scroll further down + // Add logical differs in the condition codes set! + + // add registers + inline void z_ar( Register r1, Register r2); // add r1 = r1 + r2 ; int32 + inline void z_agr( Register r1, Register r2); // add r1 = r1 + r2 ; int64 + inline void z_agfr( Register r1, Register r2); // add r1 = r1 + r2 ; int64 <- int32 + inline void z_ark( Register r1, Register r2, Register r3); // add r1 = r2 + r3 ; int32 + inline void z_agrk( Register r1, Register r2, Register r3); // add r1 = r2 + r3 ; int64 + + inline void z_alr( Register r1, Register r2); // add logical r1 = r1 + r2 ; int32 + inline void z_algr( Register r1, Register r2); // add logical r1 = r1 + r2 ; int64 + inline void z_algfr(Register r1, Register r2); // add logical r1 = r1 + r2 ; int64 <- int32 + inline void z_alrk( Register r1, Register r2, Register r3); // add logical r1 = r2 + r3 ; int32 + inline void z_algrk(Register r1, Register r2, Register r3); // add logical r1 = r2 + r3 ; int64 + inline void z_alcgr(Register r1, Register r2); // add logical with carry r1 = r1 + r2 + c ; int64 + + // add immediate + inline void z_ahi( Register r1, int64_t i2); // add r1 = r1 + i2_imm16 ; int32 + inline void z_afi( Register r1, int64_t i2); // add r1 = r1 + i2_imm32 ; int32 + inline void z_alfi( Register r1, int64_t i2); // add r1 = r1 + i2_imm32 ; int32 + inline void z_aghi( Register r1, int64_t i2); // add logical r1 = r1 + i2_imm16 ; int64 + inline void z_agfi( Register r1, int64_t i2); // add r1 = r1 + i2_imm32 ; int64 + inline void z_algfi(Register r1, int64_t i2); // add logical r1 = r1 + i2_imm32 ; int64 + inline void z_ahik( Register r1, Register r3, int64_t i2); // add r1 = r3 + i2_imm16 ; int32 + inline void z_aghik(Register r1, Register r3, int64_t i2); // add r1 = r3 + i2_imm16 ; int64 + inline void z_aih( Register r1, int64_t i2); // add r1 = r1 + i2_imm32 ; int32 (HiWord) + + // add memory + inline void z_a( Register r1, int64_t d2, Register x2, Register b2); // add r1 = r1 + *(d2_uimm12+s2+b2) ; int32 + inline void z_ay( Register r1, int64_t d2, Register x2, Register b2);// add r1 = r1 + *(d2_imm20+s2+b2) ; int32 + inline void z_ag( Register r1, int64_t d2, Register x2, Register b2);// add r1 = r1 + *(d2_imm20+s2+b2) ; int64 + inline void z_agf( Register r1, int64_t d2, Register x2, Register b2);// add r1 = r1 + *(d2_imm20+x2+b2) ; int64 <- int32 + inline void z_al( Register r1, int64_t d2, Register x2, Register b2);// add r1 = r1 + *(d2_uimm12+x2+b2) ; int32 + inline void z_aly( Register r1, int64_t d2, Register x2, Register b2);// add r1 = r1 + *(d2_imm20+x2+b2) ; int32 + inline void z_alg( Register r1, int64_t d2, Register x2, Register b2);// add r1 = r1 + *(d2_imm20+x2+b2) ; int64 + inline void z_algf(Register r1, int64_t d2, Register x2, Register b2);// add r1 = r1 + *(d2_imm20+x2+b2) ; int64 <- int32 + inline void z_a( Register r1, const Address& a); // add r1 = r1 + *(a) ; int32 + inline void z_ay( Register r1, const Address& a); // add r1 = r1 + *(a) ; int32 + inline void z_al( Register r1, const Address& a); // add r1 = r1 + *(a) ; int32 + inline void z_aly( Register r1, const Address& a); // add r1 = r1 + *(a) ; int32 + inline void z_ag( Register r1, const Address& a); // add r1 = r1 + *(a) ; int64 + inline void z_agf( Register r1, const Address& a); // add r1 = r1 + *(a) ; int64 <- int32 + inline void z_alg( Register r1, const Address& a); // add r1 = r1 + *(a) ; int64 + inline void z_algf(Register r1, const Address& a); // add r1 = r1 + *(a) ; int64 <- int32 + + + inline void z_alhsik( Register r1, Register r3, int64_t i2); // add logical r1 = r3 + i2_imm16 ; int32 + inline void z_alghsik(Register r1, Register r3, int64_t i2); // add logical r1 = r3 + i2_imm16 ; int64 + + inline void z_asi( int64_t d1, Register b1, int64_t i2); // add *(d1_imm20+b1) += i2_imm8 ; int32 -- z10 + inline void z_agsi( int64_t d1, Register b1, int64_t i2); // add *(d1_imm20+b1) += i2_imm8 ; int64 -- z10 + inline void z_alsi( int64_t d1, Register b1, int64_t i2); // add logical *(d1_imm20+b1) += i2_imm8 ; uint32 -- z10 + inline void z_algsi(int64_t d1, Register b1, int64_t i2); // add logical *(d1_imm20+b1) += i2_imm8 ; uint64 -- z10 + inline void z_asi( const Address& d, int64_t i2); // add *(d) += i2_imm8 ; int32 -- z10 + inline void z_agsi( const Address& d, int64_t i2); // add *(d) += i2_imm8 ; int64 -- z10 + inline void z_alsi( const Address& d, int64_t i2); // add logical *(d) += i2_imm8 ; uint32 -- z10 + inline void z_algsi(const Address& d, int64_t i2); // add logical *(d) += i2_imm8 ; uint64 -- z10 + + // negate + inline void z_lcr( Register r1, Register r2 = noreg); // neg r1 = -r2 ; int32 + inline void z_lcgr( Register r1, Register r2 = noreg); // neg r1 = -r2 ; int64 + inline void z_lcgfr(Register r1, Register r2); // neg r1 = -r2 ; int64 <- int32 + inline void z_lnr( Register r1, Register r2 = noreg); // neg r1 = -|r2| ; int32 + inline void z_lngr( Register r1, Register r2 = noreg); // neg r1 = -|r2| ; int64 + inline void z_lngfr(Register r1, Register r2); // neg r1 = -|r2| ; int64 <- int32 + + // subtract intstructions + // sub registers + inline void z_sr( Register r1, Register r2); // sub r1 = r1 - r2 ; int32 + inline void z_sgr( Register r1, Register r2); // sub r1 = r1 - r2 ; int64 + inline void z_sgfr( Register r1, Register r2); // sub r1 = r1 - r2 ; int64 <- int32 + inline void z_srk( Register r1, Register r2, Register r3); // sub r1 = r2 - r3 ; int32 + inline void z_sgrk( Register r1, Register r2, Register r3); // sub r1 = r2 - r3 ; int64 + + inline void z_slr( Register r1, Register r2); // sub logical r1 = r1 - r2 ; int32 + inline void z_slgr( Register r1, Register r2); // sub logical r1 = r1 - r2 ; int64 + inline void z_slgfr(Register r1, Register r2); // sub logical r1 = r1 - r2 ; int64 <- int32 + inline void z_slrk( Register r1, Register r2, Register r3); // sub logical r1 = r2 - r3 ; int32 + inline void z_slgrk(Register r1, Register r2, Register r3); // sub logical r1 = r2 - r3 ; int64 + inline void z_slfi( Register r1, int64_t i2); // sub logical r1 = r1 - i2_uimm32 ; int32 + inline void z_slgfi(Register r1, int64_t i2); // add logical r1 = r1 - i2_uimm32 ; int64 + + // sub memory + inline void z_s( Register r1, int64_t d2, Register x2, Register b2); // sub r1 = r1 - *(d2_imm12+x2+b2) ; int32 + inline void z_sy( Register r1, int64_t d2, Register x2, Register b2); // sub r1 = r1 + *(d2_imm20+s2+b2) ; int32 + inline void z_sg( Register r1, int64_t d2, Register x2, Register b2); // sub r1 = r1 - *(d2_imm12+x2+b2) ; int64 + inline void z_sgf( Register r1, int64_t d2, Register x2, Register b2); // sub r1 = r1 - *(d2_imm12+x2+b2) ; int64 - int32 + inline void z_slg( Register r1, int64_t d2, Register x2, Register b2); // sub logical r1 = r1 - *(d2_imm20+x2+b2) ; uint64 + inline void z_slgf(Register r1, int64_t d2, Register x2, Register b2); // sub logical r1 = r1 - *(d2_imm20+x2+b2) ; uint64 - uint32 + inline void z_s( Register r1, const Address& a); // sub r1 = r1 - *(a) ; int32 + inline void z_sy( Register r1, const Address& a); // sub r1 = r1 - *(a) ; int32 + inline void z_sg( Register r1, const Address& a); // sub r1 = r1 - *(a) ; int64 + inline void z_sgf( Register r1, const Address& a); // sub r1 = r1 - *(a) ; int64 - int32 + inline void z_slg( Register r1, const Address& a); // sub r1 = r1 - *(a) ; uint64 + inline void z_slgf(Register r1, const Address& a); // sub r1 = r1 - *(a) ; uint64 - uint32 + + inline void z_sh( Register r1, int64_t d2, Register x2, Register b2); // sub r1 = r1 - *(d2_imm12+x2+b2) ; int32 - int16 + inline void z_shy( Register r1, int64_t d2, Register x2, Register b2); // sub r1 = r1 - *(d2_imm20+x2+b2) ; int32 - int16 + inline void z_sh( Register r1, const Address &a); // sub r1 = r1 - *(d2_imm12+x2+b2) ; int32 - int16 + inline void z_shy( Register r1, const Address &a); // sub r1 = r1 - *(d2_imm20+x2+b2) ; int32 - int16 + + // Multiplication instructions + // mul registers + inline void z_msr( Register r1, Register r2); // mul r1 = r1 * r2 ; int32 + inline void z_msgr( Register r1, Register r2); // mul r1 = r1 * r2 ; int64 + inline void z_msgfr(Register r1, Register r2); // mul r1 = r1 * r2 ; int64 <- int32 + inline void z_mlr( Register r1, Register r2); // mul r1 = r1 * r2 ; int32 unsigned + inline void z_mlgr( Register r1, Register r2); // mul r1 = r1 * r2 ; int64 unsigned + // mul register - memory + inline void z_mhy( Register r1, int64_t d2, Register x2, Register b2); // mul r1 = r1 * *(d2+x2+b2) + inline void z_msy( Register r1, int64_t d2, Register x2, Register b2); // mul r1 = r1 * *(d2+x2+b2) + inline void z_msg( Register r1, int64_t d2, Register x2, Register b2); // mul r1 = r1 * *(d2+x2+b2) + inline void z_msgf(Register r1, int64_t d2, Register x2, Register b2); // mul r1 = r1 * *(d2+x2+b2) + inline void z_ml( Register r1, int64_t d2, Register x2, Register b2); // mul r1 = r1 * *(d2+x2+b2) + inline void z_mlg( Register r1, int64_t d2, Register x2, Register b2); // mul r1 = r1 * *(d2+x2+b2) + inline void z_mhy( Register r1, const Address& a); // mul r1 = r1 * *(a) + inline void z_msy( Register r1, const Address& a); // mul r1 = r1 * *(a) + inline void z_msg( Register r1, const Address& a); // mul r1 = r1 * *(a) + inline void z_msgf(Register r1, const Address& a); // mul r1 = r1 * *(a) + inline void z_ml( Register r1, const Address& a); // mul r1 = r1 * *(a) + inline void z_mlg( Register r1, const Address& a); // mul r1 = r1 * *(a) + + inline void z_msfi( Register r1, int64_t i2); // mult r1 = r1 * i2_imm32; int32 -- z10 + inline void z_msgfi(Register r1, int64_t i2); // mult r1 = r1 * i2_imm32; int64 -- z10 + inline void z_mhi( Register r1, int64_t i2); // mult r1 = r1 * i2_imm16; int32 + inline void z_mghi( Register r1, int64_t i2); // mult r1 = r1 * i2_imm16; int64 + + // Division instructions + inline void z_dsgr( Register r1, Register r2); // div r1 = r1 / r2 ; int64/int32 needs reg pair! + inline void z_dsgfr(Register r1, Register r2); // div r1 = r1 / r2 ; int64/int32 needs reg pair! + + + // Logic instructions + // =================== + + // and + inline void z_n( Register r1, int64_t d2, Register x2, Register b2); + inline void z_ny( Register r1, int64_t d2, Register x2, Register b2); + inline void z_ng( Register r1, int64_t d2, Register x2, Register b2); + inline void z_n( Register r1, const Address& a); + inline void z_ny( Register r1, const Address& a); + inline void z_ng( Register r1, const Address& a); + + inline void z_nr( Register r1, Register r2); // and r1 = r1 & r2 ; int32 + inline void z_ngr( Register r1, Register r2); // and r1 = r1 & r2 ; int64 + inline void z_nrk( Register r1, Register r2, Register r3); // and r1 = r2 & r3 ; int32 + inline void z_ngrk(Register r1, Register r2, Register r3); // and r1 = r2 & r3 ; int64 + + inline void z_nihh(Register r1, int64_t i2); // and r1 = r1 & i2_imm16 ; and only for bits 0-15 + inline void z_nihl(Register r1, int64_t i2); // and r1 = r1 & i2_imm16 ; and only for bits 16-31 + inline void z_nilh(Register r1, int64_t i2); // and r1 = r1 & i2_imm16 ; and only for bits 32-47 + inline void z_nill(Register r1, int64_t i2); // and r1 = r1 & i2_imm16 ; and only for bits 48-63 + inline void z_nihf(Register r1, int64_t i2); // and r1 = r1 & i2_imm32 ; and only for bits 0-31 + inline void z_nilf(Register r1, int64_t i2); // and r1 = r1 & i2_imm32 ; and only for bits 32-63 see also MacroAssembler::nilf. + + // or + inline void z_o( Register r1, int64_t d2, Register x2, Register b2); + inline void z_oy( Register r1, int64_t d2, Register x2, Register b2); + inline void z_og( Register r1, int64_t d2, Register x2, Register b2); + inline void z_o( Register r1, const Address& a); + inline void z_oy( Register r1, const Address& a); + inline void z_og( Register r1, const Address& a); + + inline void z_or( Register r1, Register r2); // or r1 = r1 | r2; int32 + inline void z_ogr( Register r1, Register r2); // or r1 = r1 | r2; int64 + inline void z_ork( Register r1, Register r2, Register r3); // or r1 = r2 | r3 ; int32 + inline void z_ogrk(Register r1, Register r2, Register r3); // or r1 = r2 | r3 ; int64 + + inline void z_oihh(Register r1, int64_t i2); // or r1 = r1 | i2_imm16 ; or only for bits 0-15 + inline void z_oihl(Register r1, int64_t i2); // or r1 = r1 | i2_imm16 ; or only for bits 16-31 + inline void z_oilh(Register r1, int64_t i2); // or r1 = r1 | i2_imm16 ; or only for bits 32-47 + inline void z_oill(Register r1, int64_t i2); // or r1 = r1 | i2_imm16 ; or only for bits 48-63 + inline void z_oihf(Register r1, int64_t i2); // or r1 = r1 | i2_imm32 ; or only for bits 0-31 + inline void z_oilf(Register r1, int64_t i2); // or r1 = r1 | i2_imm32 ; or only for bits 32-63 + + // xor + inline void z_x( Register r1, int64_t d2, Register x2, Register b2); + inline void z_xy( Register r1, int64_t d2, Register x2, Register b2); + inline void z_xg( Register r1, int64_t d2, Register x2, Register b2); + inline void z_x( Register r1, const Address& a); + inline void z_xy( Register r1, const Address& a); + inline void z_xg( Register r1, const Address& a); + + inline void z_xr( Register r1, Register r2); // xor r1 = r1 ^ r2 ; int32 + inline void z_xgr( Register r1, Register r2); // xor r1 = r1 ^ r2 ; int64 + inline void z_xrk( Register r1, Register r2, Register r3); // xor r1 = r2 ^ r3 ; int32 + inline void z_xgrk(Register r1, Register r2, Register r3); // xor r1 = r2 ^ r3 ; int64 + + inline void z_xihf(Register r1, int64_t i2); // xor r1 = r1 ^ i2_imm32 ; or only for bits 0-31 + inline void z_xilf(Register r1, int64_t i2); // xor r1 = r1 ^ i2_imm32 ; or only for bits 32-63 + + // shift + inline void z_sla( Register r1, int64_t d2, Register b2=Z_R0); // shift left r1 = r1 << ((d2+b2)&0x3f) ; int32, only 31 bits shifted, sign preserved! + inline void z_slag(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift left r1 = r3 << ((d2+b2)&0x3f) ; int64, only 63 bits shifted, sign preserved! + inline void z_sra( Register r1, int64_t d2, Register b2=Z_R0); // shift right r1 = r1 >> ((d2+b2)&0x3f) ; int32, sign extended + inline void z_srag(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift right r1 = r3 >> ((d2+b2)&0x3f) ; int64, sign extended + inline void z_sll( Register r1, int64_t d2, Register b2=Z_R0); // shift left r1 = r1 << ((d2+b2)&0x3f) ; int32, zeros added + inline void z_sllg(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift left r1 = r3 << ((d2+b2)&0x3f) ; int64, zeros added + inline void z_srl( Register r1, int64_t d2, Register b2=Z_R0); // shift right r1 = r1 >> ((d2+b2)&0x3f) ; int32, zero extended + inline void z_srlg(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // shift right r1 = r3 >> ((d2+b2)&0x3f) ; int64, zero extended + + // rotate + inline void z_rll( Register r1, Register r3, int64_t d2, Register b2=Z_R0); // rot r1 = r3 << (d2+b2 & 0x3f) ; int32 -- z10 + inline void z_rllg(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // rot r1 = r3 << (d2+b2 & 0x3f) ; int64 -- z10 + + // rotate the AND/XOR/OR/insert + inline void z_rnsbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool test_only = false); // rotate then AND selected bits -- z196 + inline void z_rxsbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool test_only = false); // rotate then XOR selected bits -- z196 + inline void z_rosbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool test_only = false); // rotate then OR selected bits -- z196 + inline void z_risbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool zero_rest = false); // rotate then INS selected bits -- z196 + + + // memory-immediate instructions (8-bit immediate) + // =============================================== + + inline void z_cli( int64_t d1, Register b1, int64_t i2); // compare *(d1_imm12+b1) ^= i2_imm8 ; int8 + inline void z_mvi( int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) = i2_imm8 ; int8 + inline void z_tm( int64_t d1, Register b1, int64_t i2); // test *(d1_imm12+b1) against mask i2_imm8 ; int8 + inline void z_ni( int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) &= i2_imm8 ; int8 + inline void z_oi( int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) |= i2_imm8 ; int8 + inline void z_xi( int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) ^= i2_imm8 ; int8 + inline void z_cliy(int64_t d1, Register b1, int64_t i2); // compare *(d1_imm12+b1) ^= i2_imm8 ; int8 + inline void z_mviy(int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) = i2_imm8 ; int8 + inline void z_tmy( int64_t d1, Register b1, int64_t i2); // test *(d1_imm12+b1) against mask i2_imm8 ; int8 + inline void z_niy( int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) &= i2_imm8 ; int8 + inline void z_oiy( int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) |= i2_imm8 ; int8 + inline void z_xiy( int64_t d1, Register b1, int64_t i2); // store *(d1_imm12+b1) ^= i2_imm8 ; int8 + inline void z_cli( const Address& a, int64_t imm8); // compare *(a) ^= imm8 ; int8 + inline void z_mvi( const Address& a, int64_t imm8); // store *(a) = imm8 ; int8 + inline void z_tm( const Address& a, int64_t imm8); // test *(a) against mask imm8 ; int8 + inline void z_ni( const Address& a, int64_t imm8); // store *(a) &= imm8 ; int8 + inline void z_oi( const Address& a, int64_t imm8); // store *(a) |= imm8 ; int8 + inline void z_xi( const Address& a, int64_t imm8); // store *(a) ^= imm8 ; int8 + inline void z_cliy(const Address& a, int64_t imm8); // compare *(a) ^= imm8 ; int8 + inline void z_mviy(const Address& a, int64_t imm8); // store *(a) = imm8 ; int8 + inline void z_tmy( const Address& a, int64_t imm8); // test *(a) against mask imm8 ; int8 + inline void z_niy( const Address& a, int64_t imm8); // store *(a) &= imm8 ; int8 + inline void z_oiy( const Address& a, int64_t imm8); // store *(a) |= imm8 ; int8 + inline void z_xiy( const Address& a, int64_t imm8); // store *(a) ^= imm8 ; int8 + + + //------------------------------ + // Interlocked-Update + //------------------------------ + inline void z_laa( Register r1, Register r3, int64_t d2, Register b2); // load and add int32, signed -- z196 + inline void z_laag( Register r1, Register r3, int64_t d2, Register b2); // load and add int64, signed -- z196 + inline void z_laal( Register r1, Register r3, int64_t d2, Register b2); // load and add int32, unsigned -- z196 + inline void z_laalg(Register r1, Register r3, int64_t d2, Register b2); // load and add int64, unsigned -- z196 + inline void z_lan( Register r1, Register r3, int64_t d2, Register b2); // load and and int32 -- z196 + inline void z_lang( Register r1, Register r3, int64_t d2, Register b2); // load and and int64 -- z196 + inline void z_lax( Register r1, Register r3, int64_t d2, Register b2); // load and xor int32 -- z196 + inline void z_laxg( Register r1, Register r3, int64_t d2, Register b2); // load and xor int64 -- z196 + inline void z_lao( Register r1, Register r3, int64_t d2, Register b2); // load and or int32 -- z196 + inline void z_laog( Register r1, Register r3, int64_t d2, Register b2); // load and or int64 -- z196 + + inline void z_laa( Register r1, Register r3, const Address& a); // load and add int32, signed -- z196 + inline void z_laag( Register r1, Register r3, const Address& a); // load and add int64, signed -- z196 + inline void z_laal( Register r1, Register r3, const Address& a); // load and add int32, unsigned -- z196 + inline void z_laalg(Register r1, Register r3, const Address& a); // load and add int64, unsigned -- z196 + inline void z_lan( Register r1, Register r3, const Address& a); // load and and int32 -- z196 + inline void z_lang( Register r1, Register r3, const Address& a); // load and and int64 -- z196 + inline void z_lax( Register r1, Register r3, const Address& a); // load and xor int32 -- z196 + inline void z_laxg( Register r1, Register r3, const Address& a); // load and xor int64 -- z196 + inline void z_lao( Register r1, Register r3, const Address& a); // load and or int32 -- z196 + inline void z_laog( Register r1, Register r3, const Address& a); // load and or int64 -- z196 + + //-------------------------------- + // Execution Prediction + //-------------------------------- + inline void z_pfd( int64_t m1, int64_t d2, Register x2, Register b2); // prefetch + inline void z_pfd( int64_t m1, Address a); + inline void z_pfdrl(int64_t m1, int64_t i2); // prefetch + inline void z_bpp( int64_t m1, int64_t i2, int64_t d3, Register b3); // branch prediction -- EC12 + inline void z_bprp( int64_t m1, int64_t i2, int64_t i3); // branch prediction -- EC12 + + //------------------------------- + // Transaction Control + //------------------------------- + inline void z_tbegin(int64_t d1, Register b1, int64_t i2); // begin transaction -- EC12 + inline void z_tbeginc(int64_t d1, Register b1, int64_t i2); // begin transaction (constrained) -- EC12 + inline void z_tend(); // end transaction -- EC12 + inline void z_tabort(int64_t d2, Register b2); // abort transaction -- EC12 + inline void z_etnd(Register r1); // extract tx nesting depth -- EC12 + inline void z_ppa(Register r1, Register r2, int64_t m3); // perform processor assist -- EC12 + + //--------------------------------- + // Conditional Execution + //--------------------------------- + inline void z_locr( Register r1, Register r2, branch_condition cc); // if (cc) load r1 = r2 ; int32 -- z196 + inline void z_locgr(Register r1, Register r2, branch_condition cc); // if (cc) load r1 = r2 ; int64 -- z196 + inline void z_loc( Register r1, int64_t d2, Register b2, branch_condition cc); // if (cc) load r1 = *(d2_simm20+b2) ; int32 -- z196 + inline void z_locg( Register r1, int64_t d2, Register b2, branch_condition cc); // if (cc) load r1 = *(d2_simm20+b2) ; int64 -- z196 + inline void z_loc( Register r1, const Address& a, branch_condition cc); // if (cc) load r1 = *(a) ; int32 -- z196 + inline void z_locg( Register r1, const Address& a, branch_condition cc); // if (cc) load r1 = *(a) ; int64 -- z196 + inline void z_stoc( Register r1, int64_t d2, Register b2, branch_condition cc); // if (cc) store *(d2_simm20+b2) = r1 ; int32 -- z196 + inline void z_stocg(Register r1, int64_t d2, Register b2, branch_condition cc); // if (cc) store *(d2_simm20+b2) = r1 ; int64 -- z196 + + + // Complex CISC instructions + // ========================== + + inline void z_cksm(Register r1, Register r2); // checksum. This is NOT CRC32 + inline void z_km( Register r1, Register r2); // cipher message + inline void z_kmc( Register r1, Register r2); // cipher message with chaining + inline void z_kimd(Register r1, Register r2); // msg digest (SHA) + inline void z_klmd(Register r1, Register r2); // msg digest (SHA) + inline void z_kmac(Register r1, Register r2); // msg authentication code + + inline void z_ex(Register r1, int64_t d2, Register x2, Register b2);// execute + inline void z_exrl(Register r1, int64_t i2); // execute relative long -- z10 + inline void z_exrl(Register r1, address a2); // execute relative long -- z10 + + inline void z_ectg(int64_t d1, Register b1, int64_t d2, Register b2, Register r3); // extract cpu time + inline void z_ecag(Register r1, Register r3, int64_t d2, Register b2); // extract CPU attribute + + inline void z_srst(Register r1, Register r2); // search string + inline void z_srstu(Register r1, Register r2); // search string unicode + + inline void z_mvc(const Address& d, const Address& s, int64_t l); // move l bytes + inline void z_mvc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2); // move l+1 bytes + inline void z_mvcle(Register r1, Register r3, int64_t d2, Register b2=Z_R0); // move region of memory + + inline void z_stfle(int64_t d2, Register b2); // store facility list extended + + inline void z_nc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2);// and *(d1+b1) = *(d1+l+b1) & *(d2+b2) ; d1, d2: uimm12, ands l+1 bytes + inline void z_oc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2);// or *(d1+b1) = *(d1+l+b1) | *(d2+b2) ; d1, d2: uimm12, ors l+1 bytes + inline void z_xc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2);// xor *(d1+b1) = *(d1+l+b1) ^ *(d2+b2) ; d1, d2: uimm12, xors l+1 bytes + inline void z_nc(Address dst, int64_t len, Address src2); // and *dst = *dst & *src2, ands len bytes in memory + inline void z_oc(Address dst, int64_t len, Address src2); // or *dst = *dst | *src2, ors len bytes in memory + inline void z_xc(Address dst, int64_t len, Address src2); // xor *dst = *dst ^ *src2, xors len bytes in memory + + // compare instructions + inline void z_clc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2); // compare (*(d1_uimm12+b1), *(d1_uimm12+b1)) ; compare l bytes + inline void z_clcle(Register r1, Register r3, int64_t d2, Register b2); // compare logical long extended, see docu + inline void z_clclu(Register r1, Register r3, int64_t d2, Register b2); // compare logical long unicode, see docu + + // Translate characters + inline void z_troo(Register r1, Register r2, int64_t m3); + inline void z_trot(Register r1, Register r2, int64_t m3); + inline void z_trto(Register r1, Register r2, int64_t m3); + inline void z_trtt(Register r1, Register r2, int64_t m3); + + + // Floatingpoint instructions + // ========================== + + // compare instructions + inline void z_cebr(FloatRegister r1, FloatRegister r2); // compare (r1, r2) ; float + inline void z_ceb(FloatRegister r1, int64_t d2, Register x2, Register b2); // compare (r1, *(d2_imm12+x2+b2)) ; float + inline void z_ceb(FloatRegister r1, const Address &a); // compare (r1, *(d2_imm12+x2+b2)) ; float + inline void z_cdbr(FloatRegister r1, FloatRegister r2); // compare (r1, r2) ; double + inline void z_cdb(FloatRegister r1, int64_t d2, Register x2, Register b2); // compare (r1, *(d2_imm12+x2+b2)) ; double + inline void z_cdb(FloatRegister r1, const Address &a); // compare (r1, *(d2_imm12+x2+b2)) ; double + + // load instructions + inline void z_le( FloatRegister r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_uimm12+x2+b2) ; float + inline void z_ley(FloatRegister r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; float + inline void z_ld( FloatRegister r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_uimm12+x2+b2) ; double + inline void z_ldy(FloatRegister r1, int64_t d2, Register x2, Register b2); // load r1 = *(d2_imm20+x2+b2) ; double + inline void z_le( FloatRegister r1, const Address &a); // load r1 = *(a) ; float + inline void z_ley(FloatRegister r1, const Address &a); // load r1 = *(a) ; float + inline void z_ld( FloatRegister r1, const Address &a); // load r1 = *(a) ; double + inline void z_ldy(FloatRegister r1, const Address &a); // load r1 = *(a) ; double + + // store instructions + inline void z_ste( FloatRegister r1, int64_t d2, Register x2, Register b2); // store *(d2_uimm12+x2+b2) = r1 ; float + inline void z_stey(FloatRegister r1, int64_t d2, Register x2, Register b2); // store *(d2_imm20+x2+b2) = r1 ; float + inline void z_std( FloatRegister r1, int64_t d2, Register x2, Register b2); // store *(d2_uimm12+x2+b2) = r1 ; double + inline void z_stdy(FloatRegister r1, int64_t d2, Register x2, Register b2); // store *(d2_imm20+x2+b2) = r1 ; double + inline void z_ste( FloatRegister r1, const Address &a); // store *(a) = r1 ; float + inline void z_stey(FloatRegister r1, const Address &a); // store *(a) = r1 ; float + inline void z_std( FloatRegister r1, const Address &a); // store *(a) = r1 ; double + inline void z_stdy(FloatRegister r1, const Address &a); // store *(a) = r1 ; double + + // load and store immediates + inline void z_lzer(FloatRegister r1); // r1 = 0 ; single + inline void z_lzdr(FloatRegister r1); // r1 = 0 ; double + + // Move and Convert instructions + inline void z_ler(FloatRegister r1, FloatRegister r2); // move r1 = r2 ; float + inline void z_ldr(FloatRegister r1, FloatRegister r2); // move r1 = r2 ; double + inline void z_ledbr(FloatRegister r1, FloatRegister r2); // conv / round r1 = r2 ; float <- double + inline void z_ldebr(FloatRegister r1, FloatRegister r2); // conv r1 = r2 ; double <- float + + // move between integer and float registers + inline void z_cefbr( FloatRegister r1, Register r2); // r1 = r2; float <-- int32 + inline void z_cdfbr( FloatRegister r1, Register r2); // r1 = r2; double <-- int32 + inline void z_cegbr( FloatRegister r1, Register r2); // r1 = r2; float <-- int64 + inline void z_cdgbr( FloatRegister r1, Register r2); // r1 = r2; double <-- int64 + + // rounding mode for float-2-int conversions + inline void z_cfebr(Register r1, FloatRegister r2, RoundingMode m); // conv r1 = r2 ; int32 <-- float + inline void z_cfdbr(Register r1, FloatRegister r2, RoundingMode m); // conv r1 = r2 ; int32 <-- double + inline void z_cgebr(Register r1, FloatRegister r2, RoundingMode m); // conv r1 = r2 ; int64 <-- float + inline void z_cgdbr(Register r1, FloatRegister r2, RoundingMode m); // conv r1 = r2 ; int64 <-- double + + inline void z_ldgr(FloatRegister r1, Register r2); // fr1 = r2 ; what kind of conversion? -- z10 + inline void z_lgdr(Register r1, FloatRegister r2); // r1 = fr2 ; what kind of conversion? -- z10 + + + // ADD + inline void z_aebr(FloatRegister f1, FloatRegister f2); // f1 = f1 + f2 ; float + inline void z_adbr(FloatRegister f1, FloatRegister f2); // f1 = f1 + f2 ; double + inline void z_aeb( FloatRegister f1, int64_t d2, Register x2, Register b2); // f1 = f1 + *(d2+x2+b2) ; float + inline void z_adb( FloatRegister f1, int64_t d2, Register x2, Register b2); // f1 = f1 + *(d2+x2+b2) ; double + inline void z_aeb( FloatRegister f1, const Address& a); // f1 = f1 + *(a) ; float + inline void z_adb( FloatRegister f1, const Address& a); // f1 = f1 + *(a) ; double + + // SUB + inline void z_sebr(FloatRegister f1, FloatRegister f2); // f1 = f1 - f2 ; float + inline void z_sdbr(FloatRegister f1, FloatRegister f2); // f1 = f1 - f2 ; double + inline void z_seb( FloatRegister f1, int64_t d2, Register x2, Register b2); // f1 = f1 - *(d2+x2+b2) ; float + inline void z_sdb( FloatRegister f1, int64_t d2, Register x2, Register b2); // f1 = f1 - *(d2+x2+b2) ; double + inline void z_seb( FloatRegister f1, const Address& a); // f1 = f1 - *(a) ; float + inline void z_sdb( FloatRegister f1, const Address& a); // f1 = f1 - *(a) ; double + // negate + inline void z_lcebr(FloatRegister r1, FloatRegister r2); // neg r1 = -r2 ; float + inline void z_lcdbr(FloatRegister r1, FloatRegister r2); // neg r1 = -r2 ; double + + // Absolute value, monadic if fr2 == noreg. + inline void z_lpdbr( FloatRegister fr1, FloatRegister fr2 = fnoreg); // fr1 = |fr2| + + + // MUL + inline void z_meebr(FloatRegister f1, FloatRegister f2); // f1 = f1 * f2 ; float + inline void z_mdbr( FloatRegister f1, FloatRegister f2); // f1 = f1 * f2 ; double + inline void z_meeb( FloatRegister f1, int64_t d2, Register x2, Register b2); // f1 = f1 * *(d2+x2+b2) ; float + inline void z_mdb( FloatRegister f1, int64_t d2, Register x2, Register b2); // f1 = f1 * *(d2+x2+b2) ; double + inline void z_meeb( FloatRegister f1, const Address& a); + inline void z_mdb( FloatRegister f1, const Address& a); + + // DIV + inline void z_debr( FloatRegister f1, FloatRegister f2); // f1 = f1 / f2 ; float + inline void z_ddbr( FloatRegister f1, FloatRegister f2); // f1 = f1 / f2 ; double + inline void z_deb( FloatRegister f1, int64_t d2, Register x2, Register b2); // f1 = f1 / *(d2+x2+b2) ; float + inline void z_ddb( FloatRegister f1, int64_t d2, Register x2, Register b2); // f1 = f1 / *(d2+x2+b2) ; double + inline void z_deb( FloatRegister f1, const Address& a); // f1 = f1 / *(a) ; float + inline void z_ddb( FloatRegister f1, const Address& a); // f1 = f1 / *(a) ; double + + // square root + inline void z_sqdbr(FloatRegister fr1, FloatRegister fr2); // fr1 = sqrt(fr2) ; double + inline void z_sqdb( FloatRegister fr1, int64_t d2, Register x2, Register b2); // fr1 = srqt( *(d2+x2+b2) + inline void z_sqdb( FloatRegister fr1, int64_t d2, Register b2); // fr1 = srqt( *(d2+b2) + + // Nop instruction + // =============== + + // branch never (nop) + inline void z_nop(); + + // =============================================================================================== + + // Simplified emitters: + // ==================== + + + // Some memory instructions without index register (just convenience). + inline void z_layz(Register r1, int64_t d2, Register b2 = Z_R0); + inline void z_lay(Register r1, int64_t d2, Register b2); + inline void z_laz(Register r1, int64_t d2, Register b2); + inline void z_la(Register r1, int64_t d2, Register b2); + inline void z_l(Register r1, int64_t d2, Register b2); + inline void z_ly(Register r1, int64_t d2, Register b2); + inline void z_lg(Register r1, int64_t d2, Register b2); + inline void z_st(Register r1, int64_t d2, Register b2); + inline void z_sty(Register r1, int64_t d2, Register b2); + inline void z_stg(Register r1, int64_t d2, Register b2); + inline void z_lgf(Register r1, int64_t d2, Register b2); + inline void z_lgh(Register r1, int64_t d2, Register b2); + inline void z_llgh(Register r1, int64_t d2, Register b2); + inline void z_llgf(Register r1, int64_t d2, Register b2); + inline void z_lgb(Register r1, int64_t d2, Register b2); + inline void z_cl( Register r1, int64_t d2, Register b2); + inline void z_c(Register r1, int64_t d2, Register b2); + inline void z_cg(Register r1, int64_t d2, Register b2); + inline void z_sh(Register r1, int64_t d2, Register b2); + inline void z_shy(Register r1, int64_t d2, Register b2); + inline void z_ste(FloatRegister r1, int64_t d2, Register b2); + inline void z_std(FloatRegister r1, int64_t d2, Register b2); + inline void z_stdy(FloatRegister r1, int64_t d2, Register b2); + inline void z_stey(FloatRegister r1, int64_t d2, Register b2); + inline void z_ld(FloatRegister r1, int64_t d2, Register b2); + inline void z_ldy(FloatRegister r1, int64_t d2, Register b2); + inline void z_le(FloatRegister r1, int64_t d2, Register b2); + inline void z_ley(FloatRegister r1, int64_t d2, Register b2); + + inline void z_agf(Register r1, int64_t d2, Register b2); + + inline void z_exrl(Register r1, Label& L); + inline void z_larl(Register r1, Label& L); + inline void z_bru( Label& L); + inline void z_brul(Label& L); + inline void z_brul(address a); + inline void z_brh( Label& L); + inline void z_brl( Label& L); + inline void z_bre( Label& L); + inline void z_brnh(Label& L); + inline void z_brnl(Label& L); + inline void z_brne(Label& L); + inline void z_brz( Label& L); + inline void z_brnz(Label& L); + inline void z_brnaz(Label& L); + inline void z_braz(Label& L); + inline void z_brnp(Label& L); + + inline void z_btrue( Label& L); + inline void z_bfalse(Label& L); + + inline void z_brno( Label& L); + + + inline void z_basr(Register r1, Register r2); + inline void z_brasl(Register r1, address a); + inline void z_brct(Register r1, address a); + inline void z_brct(Register r1, Label& L); + + inline void z_brxh(Register r1, Register r3, address a); + inline void z_brxh(Register r1, Register r3, Label& L); + + inline void z_brxle(Register r1, Register r3, address a); + inline void z_brxle(Register r1, Register r3, Label& L); + + inline void z_brxhg(Register r1, Register r3, address a); + inline void z_brxhg(Register r1, Register r3, Label& L); + + inline void z_brxlg(Register r1, Register r3, address a); + inline void z_brxlg(Register r1, Register r3, Label& L); + + // Ppopulation count intrinsics. + inline void z_flogr(Register r1, Register r2); // find leftmost one + inline void z_popcnt(Register r1, Register r2); // population count + inline void z_ahhhr(Register r1, Register r2, Register r3); // ADD halfword high high + inline void z_ahhlr(Register r1, Register r2, Register r3); // ADD halfword high low + + inline void z_tam(); + inline void z_stck(int64_t d2, Register b2); + inline void z_stckf(int64_t d2, Register b2); + inline void z_stmg(Register r1, Register r3, int64_t d2, Register b2); + inline void z_lmg(Register r1, Register r3, int64_t d2, Register b2); + + inline void z_cs( Register r1, Register r3, int64_t d2, Register b2); + inline void z_csy(Register r1, Register r3, int64_t d2, Register b2); + inline void z_csg(Register r1, Register r3, int64_t d2, Register b2); + inline void z_cs( Register r1, Register r3, const Address& a); + inline void z_csy(Register r1, Register r3, const Address& a); + inline void z_csg(Register r1, Register r3, const Address& a); + + inline void z_cvd(Register r1, int64_t d2, Register x2, Register b2); + inline void z_cvdg(Register r1, int64_t d2, Register x2, Register b2); + inline void z_cvd(Register r1, int64_t d2, Register b2); + inline void z_cvdg(Register r1, int64_t d2, Register b2); + + // Instruction queries: + // instruction properties and recognize emitted instructions + // =========================================================== + + static int nop_size() { return 2; } + + static int z_brul_size() { return 6; } + + static bool is_z_basr(short x) { + return (BASR_ZOPC == (x & BASR_MASK)); + } + static bool is_z_algr(long x) { + return (ALGR_ZOPC == (x & RRE_MASK)); + } + static bool is_z_lb(long x) { + return (LB_ZOPC == (x & LB_MASK)); + } + static bool is_z_lh(int x) { + return (LH_ZOPC == (x & LH_MASK)); + } + static bool is_z_l(int x) { + return (L_ZOPC == (x & L_MASK)); + } + static bool is_z_lgr(long x) { + return (LGR_ZOPC == (x & RRE_MASK)); + } + static bool is_z_ly(long x) { + return (LY_ZOPC == (x & LY_MASK)); + } + static bool is_z_lg(long x) { + return (LG_ZOPC == (x & LG_MASK)); + } + static bool is_z_llgh(long x) { + return (LLGH_ZOPC == (x & LLGH_MASK)); + } + static bool is_z_llgf(long x) { + return (LLGF_ZOPC == (x & LLGF_MASK)); + } + static bool is_z_le(int x) { + return (LE_ZOPC == (x & LE_MASK)); + } + static bool is_z_ld(int x) { + return (LD_ZOPC == (x & LD_MASK)); + } + static bool is_z_st(int x) { + return (ST_ZOPC == (x & ST_MASK)); + } + static bool is_z_stc(int x) { + return (STC_ZOPC == (x & STC_MASK)); + } + static bool is_z_stg(long x) { + return (STG_ZOPC == (x & STG_MASK)); + } + static bool is_z_sth(int x) { + return (STH_ZOPC == (x & STH_MASK)); + } + static bool is_z_ste(int x) { + return (STE_ZOPC == (x & STE_MASK)); + } + static bool is_z_std(int x) { + return (STD_ZOPC == (x & STD_MASK)); + } + static bool is_z_slag(long x) { + return (SLAG_ZOPC == (x & SLAG_MASK)); + } + static bool is_z_tmy(long x) { + return (TMY_ZOPC == (x & TMY_MASK)); + } + static bool is_z_tm(long x) { + return ((unsigned int)TM_ZOPC == (x & (unsigned int)TM_MASK)); + } + static bool is_z_bcr(long x) { + return (BCR_ZOPC == (x & BCR_MASK)); + } + static bool is_z_nop(long x) { + return is_z_bcr(x) && ((x & 0x00ff) == 0); + } + static bool is_z_nop(address x) { + return is_z_nop(* (short *) x); + } + static bool is_z_br(long x) { + return is_z_bcr(x) && ((x & 0x00f0) == 0x00f0); + } + static bool is_z_brc(long x, int cond) { + return ((unsigned int)BRC_ZOPC == (x & BRC_MASK)) && ((cond<<20) == (x & 0x00f00000U)); + } + // Make use of lightweight sync. + static bool is_z_sync_full(long x) { + return is_z_bcr(x) && (((x & 0x00f0)>>4)==bcondFullSync) && ((x & 0x000f)==0x0000); + } + static bool is_z_sync_light(long x) { + return is_z_bcr(x) && (((x & 0x00f0)>>4)==bcondLightSync) && ((x & 0x000f)==0x0000); + } + static bool is_z_sync(long x) { + return is_z_sync_full(x) || is_z_sync_light(x); + } + + static bool is_z_brasl(long x) { + return (BRASL_ZOPC == (x & BRASL_MASK)); + } + static bool is_z_brasl(address a) { + long x = (*((long *)a))>>16; + return is_z_brasl(x); + } + static bool is_z_larl(long x) { + return (LARL_ZOPC == (x & LARL_MASK)); + } + static bool is_z_lgrl(long x) { + return (LGRL_ZOPC == (x & LGRL_MASK)); + } + static bool is_z_lgrl(address a) { + long x = (*((long *)a))>>16; + return is_z_lgrl(x); + } + + static bool is_z_lghi(unsigned long x) { + return (unsigned int)LGHI_ZOPC == (x & (unsigned int)LGHI_MASK); + } + + static bool is_z_llill(unsigned long x) { + return (unsigned int)LLILL_ZOPC == (x & (unsigned int)LLI_MASK); + } + static bool is_z_llilh(unsigned long x) { + return (unsigned int)LLILH_ZOPC == (x & (unsigned int)LLI_MASK); + } + static bool is_z_llihl(unsigned long x) { + return (unsigned int)LLIHL_ZOPC == (x & (unsigned int)LLI_MASK); + } + static bool is_z_llihh(unsigned long x) { + return (unsigned int)LLIHH_ZOPC == (x & (unsigned int)LLI_MASK); + } + static bool is_z_llilf(unsigned long x) { + return LLILF_ZOPC == (x & LLIF_MASK); + } + static bool is_z_llihf(unsigned long x) { + return LLIHF_ZOPC == (x & LLIF_MASK); + } + + static bool is_z_iill(unsigned long x) { + return (unsigned int)IILL_ZOPC == (x & (unsigned int)II_MASK); + } + static bool is_z_iilh(unsigned long x) { + return (unsigned int)IILH_ZOPC == (x & (unsigned int)II_MASK); + } + static bool is_z_iihl(unsigned long x) { + return (unsigned int)IIHL_ZOPC == (x & (unsigned int)II_MASK); + } + static bool is_z_iihh(unsigned long x) { + return (unsigned int)IIHH_ZOPC == (x & (unsigned int)II_MASK); + } + static bool is_z_iilf(unsigned long x) { + return IILF_ZOPC == (x & IIF_MASK); + } + static bool is_z_iihf(unsigned long x) { + return IIHF_ZOPC == (x & IIF_MASK); + } + + static inline bool is_equal(unsigned long inst, unsigned long idef); + static inline bool is_equal(unsigned long inst, unsigned long idef, unsigned long imask); + static inline bool is_equal(address iloc, unsigned long idef); + static inline bool is_equal(address iloc, unsigned long idef, unsigned long imask); + + static inline bool is_sigtrap_range_check(address pc); + static inline bool is_sigtrap_zero_check(address pc); + + //----------------- + // memory barriers + //----------------- + // machine barrier instructions: + // + // - z_sync Two-way memory barrier, aka fence. + // Only load-after-store-order is not guaranteed in the + // z/Architecture memory model, i.e. only 'fence' is needed. + // + // semantic barrier instructions: + // (as defined in orderAccess.hpp) + // + // - z_release orders Store|Store, empty implementation + // Load|Store + // - z_acquire orders Load|Store, empty implementation + // Load|Load + // - z_fence orders Store|Store, implemented as z_sync. + // Load|Store, + // Load|Load, + // Store|Load + // + // For this implementation to be correct, we need H/W fixes on (very) old H/W: + // For z990, it is Driver-55: MCL232 in the J13484 (i390/ML) Stream. + // For z9, it is Driver-67: MCL065 in the G40963 (i390/ML) Stream. + // These drivers are a prereq. Otherwise, memory synchronization will not work. + + inline void z_sync(); + inline void z_release(); + inline void z_acquire(); + inline void z_fence(); + + // Creation + Assembler(CodeBuffer* code) : AbstractAssembler(code) { } + +}; + +#endif // CPU_S390_VM_ASSEMBLER_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/assembler_s390.inline.hpp b/hotspot/src/cpu/s390/vm/assembler_s390.inline.hpp new file mode 100644 index 00000000000..42038c8f95e --- /dev/null +++ b/hotspot/src/cpu/s390/vm/assembler_s390.inline.hpp @@ -0,0 +1,1015 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_ASSEMBLER_S390_INLINE_HPP +#define CPU_S390_VM_ASSEMBLER_S390_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" + +// Convention: Use Z_R0 and Z_R1 instead of Z_scratch_* in all +// assembler_s390.* files. + +// Local implementation of byte emitters to help inlining. +inline void Assembler::emit_16(int x) { + CodeSection* cs = code_section(); + address code_pos = pc(); + *(unsigned short*)code_pos = (unsigned short)x; + cs->set_end( code_pos + sizeof(unsigned short)); +} + +inline void Assembler::emit_32(int x) { + CodeSection* cs = code_section(); + address code_pos = pc(); + *(jint*)code_pos = (jint)x; + cs->set_end( code_pos + sizeof( jint)); +} + +inline void Assembler::emit_48(long x) { + CodeSection* cs = code_section(); + address code_pos = pc(); + *(unsigned short*)code_pos = (unsigned short)(x>>32); + *(jint*)(code_pos+sizeof(unsigned short)) = (jint)x; + cs->set_end( code_pos + sizeof( jint) + sizeof( unsigned short)); +} + +// Support lightweight sync (from z196). Experimental as of now. For explanation see *.hpp file. +inline void Assembler::z_sync() { + if (VM_Version::has_FastSync()) { + z_bcr(bcondLightSync, Z_R0); + } else { + z_bcr(bcondFullSync, Z_R0); + } +} +inline void Assembler::z_release() { } +inline void Assembler::z_acquire() { } +inline void Assembler::z_fence() { z_sync(); } + +inline void Assembler::z_illtrap() { + emit_16(0); +} +inline void Assembler::z_illtrap(int id) { + emit_16(id & 0x00ff); +} +inline void Assembler::z_illtrap_eyecatcher(unsigned short xpattern, unsigned short pattern) { + z_llill(Z_R0, xpattern); + z_iilh(Z_R0, pattern); + z_illtrap((unsigned int)xpattern); +} + +inline void Assembler::z_lhrl(Register r1, int64_t i2) { emit_48( LHRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); } +inline void Assembler::z_lrl(Register r1, int64_t i2) { emit_48( LRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); } +inline void Assembler::z_lghrl(Register r1, int64_t i2) { emit_48( LGHRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); } +inline void Assembler::z_lgfrl(Register r1, int64_t i2) { emit_48( LGFRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); } +inline void Assembler::z_lgrl(Register r1, int64_t i2) { emit_48( LGRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); } +inline void Assembler::z_llhrl(Register r1, int64_t i2) { emit_48( LLHRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); } +inline void Assembler::z_llghrl(Register r1, int64_t i2){ emit_48( LLGHRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); } +inline void Assembler::z_llgfrl(Register r1, int64_t i2){ emit_48( LLGFRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); } + +inline void Assembler::z_sthrl(Register r1, int64_t i2) { emit_48( STHRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); } +inline void Assembler::z_strl(Register r1, int64_t i2) { emit_48( STRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); } +inline void Assembler::z_stgrl(Register r1, int64_t i2) { emit_48( STGRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); } + +inline void Assembler::z_cksm(Register r1, Register r2) { emit_32( CKSM_ZOPC | regt(r1, 24, 32) | regt(r2, 28, 32)); } +inline void Assembler::z_km( Register r1, Register r2) { emit_32( KM_ZOPC | regt(r1, 24, 32) | regt(r2, 28, 32)); } +inline void Assembler::z_kmc( Register r1, Register r2) { emit_32( KMC_ZOPC | regt(r1, 24, 32) | regt(r2, 28, 32)); } +inline void Assembler::z_kimd(Register r1, Register r2) { emit_32( KIMD_ZOPC | regt(r1, 24, 32) | regt(r2, 28, 32)); } +inline void Assembler::z_klmd(Register r1, Register r2) { emit_32( KLMD_ZOPC | regt(r1, 24, 32) | regt(r2, 28, 32)); } +inline void Assembler::z_kmac(Register r1, Register r2) { emit_32( KMAC_ZOPC | regt(r1, 24, 32) | regt(r2, 28, 32)); } + +inline void Assembler::z_exrl(Register r1, int64_t i2) { emit_48( EXRL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); } // z10 +inline void Assembler::z_exrl(Register r1, address a2) { emit_48( EXRL_ZOPC | regt(r1, 8, 48) | simm32(RelAddr::pcrel_off32(a2, pc()), 16, 48)); } // z10 + +inline void Assembler::z_ectg(int64_t d1, Register b1, int64_t d2, Register b2, Register r3) { emit_48( ECTG_ZOPC | reg(r3, 8, 48) | uimm12(d1, 20, 48) | reg(b1, 16, 48) | uimm12(d2, 36, 48) | reg(b2, 32, 48)); } +inline void Assembler::z_ecag(Register r1, Register r3, int64_t d2, Register b2) { emit_48( ECAG_ZOPC | reg(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | reg(b2, 16, 48)); } + + +//------------------------------ +// Interlocked-Update +//------------------------------ +inline void Assembler::z_laa( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAA_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); } +inline void Assembler::z_laag( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAAG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); } +inline void Assembler::z_laal( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAAL_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); } +inline void Assembler::z_laalg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAALG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); } +inline void Assembler::z_lan( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAN_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); } +inline void Assembler::z_lang( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LANG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); } +inline void Assembler::z_lax( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAX_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); } +inline void Assembler::z_laxg( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAXG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); } +inline void Assembler::z_lao( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAO_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); } +inline void Assembler::z_laog( Register r1, Register r3, int64_t d2, Register b2) { emit_48( LAOG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); } + +inline void Assembler::z_laa( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_laa( r1, r3, a.disp12(), a.base()); } +inline void Assembler::z_laag( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_laag( r1, r3, a.disp12(), a.base()); } +inline void Assembler::z_laal( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_laal( r1, r3, a.disp12(), a.base()); } +inline void Assembler::z_laalg(Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_laalg(r1, r3, a.disp12(), a.base()); } +inline void Assembler::z_lan( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_lan( r1, r3, a.disp12(), a.base()); } +inline void Assembler::z_lang( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_lang( r1, r3, a.disp12(), a.base()); } +inline void Assembler::z_lax( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_lax( r1, r3, a.disp12(), a.base()); } +inline void Assembler::z_laxg( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_laxg( r1, r3, a.disp12(), a.base()); } +inline void Assembler::z_lao( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_lao( r1, r3, a.disp12(), a.base()); } +inline void Assembler::z_laog( Register r1, Register r3, const Address& a) { assert(!a.has_index(), " no index reg allowed"); z_laog( r1, r3, a.disp12(), a.base()); } + +//-------------------------------- +// Execution Prediction +//-------------------------------- +inline void Assembler::z_pfd( int64_t m1, int64_t d2, Register x2, Register b2) { emit_48( PFD_ZOPC | uimm4(m1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_pfd( int64_t m1, Address a) { z_pfd(m1, a.disp(), a.indexOrR0(), a.base()); } +inline void Assembler::z_pfdrl(int64_t m1, int64_t i2) { emit_48( PFDRL_ZOPC | uimm4(m1, 8, 48) | simm32(i2, 16, 48)); } +inline void Assembler::z_bpp( int64_t m1, int64_t i2, int64_t d3, Register b3) { emit_48( BPP_ZOPC | uimm4(m1, 8, 48) | uimm12(d3, 20, 48) | reg(b3, 16, 48) | simm16(i2, 32, 48)); } +inline void Assembler::z_bprp( int64_t m1, int64_t i2, int64_t i3) { emit_48( BPRP_ZOPC | uimm4(m1, 8, 48) | simm12(i2, 12, 48) | simm24(i3, 24, 48)); } + +//------------------------------- +// Transaction Control +//------------------------------- +inline void Assembler::z_tbegin( int64_t d1, Register b1, int64_t i2) { emit_48( TBEGIN_ZOPC | uimm12(d1, 20, 48) | reg(b1, 16, 48) | uimm16(i2, 32, 48)); } +inline void Assembler::z_tbeginc(int64_t d1, Register b1, int64_t i2) { emit_48( TBEGINC_ZOPC | uimm12(d1, 20, 48) | reg(b1, 16, 48) | uimm16(i2, 32, 48)); } +inline void Assembler::z_tend() { emit_32( TEND_ZOPC); } +inline void Assembler::z_tabort( int64_t d2, Register b2) { emit_32( TABORT_ZOPC | uimm12(d2, 20, 32) | reg(b2, 16, 32)); } +inline void Assembler::z_etnd(Register r1) { emit_32( ETND_ZOPC | regt(r1, 24, 32)); } +inline void Assembler::z_ppa(Register r1, Register r2, int64_t m3) { emit_32( PPA_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); } + +//--------------------------------- +// Conditional Execution +//--------------------------------- +inline void Assembler::z_locr( Register r1, Register r2, branch_condition cc) { emit_32( LOCR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | uimm4(cc, 16, 32)); } // z196 +inline void Assembler::z_locgr( Register r1, Register r2, branch_condition cc) { emit_32( LOCGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | uimm4(cc, 16, 32)); } // z196 +inline void Assembler::z_loc( Register r1, int64_t d2, Register b2, branch_condition cc) { emit_48( LOC_ZOPC | regt(r1, 8, 48) | simm20(d2) | regz(b2, 16, 48) | uimm4(cc, 12, 48)); } // z196 +inline void Assembler::z_locg( Register r1, int64_t d2, Register b2, branch_condition cc) { emit_48( LOCG_ZOPC | regt(r1, 8, 48) | simm20(d2) | regz(b2, 16, 48) | uimm4(cc, 12, 48)); } // z196 +inline void Assembler::z_loc( Register r1, const Address &a, branch_condition cc) { z_loc(r1, a.disp(), a.base(), cc); } +inline void Assembler::z_locg( Register r1, const Address &a, branch_condition cc) { z_locg(r1, a.disp(), a.base(), cc); } +inline void Assembler::z_stoc( Register r1, int64_t d2, Register b2, branch_condition cc) { emit_48( STOC_ZOPC | regt(r1, 8, 48) | simm20(d2) | regz(b2, 16, 48) | uimm4(cc, 12, 48)); } // z196 +inline void Assembler::z_stocg( Register r1, int64_t d2, Register b2, branch_condition cc) { emit_48( STOCG_ZOPC | regt(r1, 8, 48) | simm20(d2) | regz(b2, 16, 48) | uimm4(cc, 12, 48)); } // z196 + +inline void Assembler::z_srst( Register r1, Register r2) { emit_32( SRST_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_srstu(Register r1, Register r2) { emit_32( SRSTU_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } + +//--------------------------------- +// Address calculation +//--------------------------------- +inline void Assembler::z_layz(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LAY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | reg(b2, 16, 48)); } +inline void Assembler::z_lay( Register r1, const Address &a) { z_layz(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_lay( Register r1, int64_t d2, Register x2, Register b2) { emit_48( LAY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_laz( Register r1, int64_t d2, Register x2, Register b2) { emit_32( LA_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | reg(b2, 16, 32)); } +inline void Assembler::z_la( Register r1, const Address &a) { z_laz(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_la( Register r1, int64_t d2, Register x2, Register b2) { emit_32( LA_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32));} +inline void Assembler::z_larl(Register r1, int64_t i2) { emit_48( LARL_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); } +inline void Assembler::z_larl(Register r1, address a) { emit_48( LARL_ZOPC | regt(r1, 8, 48) | simm32(RelAddr::pcrel_off32(a, pc()), 16, 48)); } + +inline void Assembler::z_lr(Register r1, Register r2) { emit_16( LR_ZOPC | regt(r1,8,16) | reg(r2,12,16)); } +inline void Assembler::z_lgr(Register r1, Register r2) { emit_32( LGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_lh(Register r1, int64_t d2, Register x2, Register b2) { emit_32( LH_ZOPC | 0 << 16 | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_lh(Register r1, const Address &a) { z_lh(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_l(Register r1, int64_t d2, Register x2, Register b2) { emit_32( L_ZOPC | 0 << 16 | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_l(Register r1, const Address &a) { z_l(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_lg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_lg(Register r1, const Address &a) { z_lg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } + +inline void Assembler::z_lbr( Register r1, Register r2) { emit_32( LBR_ZOPC | regt(r1, 24, 32) | reg( r2, 28, 32)); } +inline void Assembler::z_lhr( Register r1, Register r2) { emit_32( LHR_ZOPC | regt(r1, 24, 32) | reg( r2, 28, 32)); } +inline void Assembler::z_lgbr( Register r1, Register r2) { emit_32( LGBR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_lghr( Register r1, Register r2) { emit_32( LGHR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_lgfr( Register r1, Register r2) { emit_32( LGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_llhr( Register r1, Register r2) { emit_32( LLHR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_llgcr(Register r1, Register r2) { emit_32( LLGCR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_llghr(Register r1, Register r2) { emit_32( LLGHR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_llgfr(Register r1, Register r2) { emit_32( LLGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } + +inline void Assembler::z_sth(Register r1, const Address &a) { z_sth(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_sth(Register r1, int64_t d2, Register x2, Register b2) { emit_32( STH_ZOPC | reg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_st( Register r1, const Address& d) { z_st(r1, d.disp(), d.indexOrR0(), d.base()); } +inline void Assembler::z_st( Register r1, int64_t d2, Register x2, Register b2) { emit_32( ST_ZOPC | reg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_stg(Register r1, const Address& d) { z_stg(r1, d.disp(), d.indexOrR0(), d.base()); } +inline void Assembler::z_stg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( STG_ZOPC | reg(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } + +inline void Assembler::z_stcm (Register r1, int64_t m3, int64_t d2, Register b2) { emit_32( STCM_ZOPC | regt(r1, 8, 32) | uimm4(m3, 12, 32) | uimm12(d2, 20, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_stcmy(Register r1, int64_t m3, int64_t d2, Register b2) { emit_48( STCMY_ZOPC | regt(r1, 8, 48) | uimm4(m3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); } +inline void Assembler::z_stcmh(Register r1, int64_t m3, int64_t d2, Register b2) { emit_48( STCMH_ZOPC | regt(r1, 8, 48) | uimm4(m3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); } + +// memory-immediate instructions (8-bit immediate) +inline void Assembler::z_cli( int64_t d1, Register b1, int64_t i2) { emit_32( CLI_ZOPC | uimm12(d1, 20, 32) | regz(b1, 16, 32) | uimm8(i2, 8, 32)); } +inline void Assembler::z_mvi( int64_t d1, Register b1, int64_t i2) { emit_32( MVI_ZOPC | uimm12(d1, 20, 32) | regz(b1, 16, 32) | imm8(i2, 8, 32)); } +inline void Assembler::z_tm( int64_t d1, Register b1, int64_t i2) { emit_32( TM_ZOPC | uimm12(d1, 20, 32) | regz(b1, 16, 32) | imm8(i2, 8, 32)); } +inline void Assembler::z_ni( int64_t d1, Register b1, int64_t i2) { emit_32( NI_ZOPC | uimm12(d1, 20, 32) | regz(b1, 16, 32) | imm8(i2, 8, 32)); } +inline void Assembler::z_oi( int64_t d1, Register b1, int64_t i2) { emit_32( OI_ZOPC | uimm12(d1, 20, 32) | regz(b1, 16, 32) | imm8(i2, 8, 32)); } +inline void Assembler::z_xi( int64_t d1, Register b1, int64_t i2) { emit_32( XI_ZOPC | uimm12(d1, 20, 32) | regz(b1, 16, 32) | imm8(i2, 8, 32)); } +inline void Assembler::z_cliy(int64_t d1, Register b1, int64_t i2) { emit_48( CLIY_ZOPC | simm20(d1) | regz(b1, 16, 48) | uimm8(i2, 8, 48)); } +inline void Assembler::z_mviy(int64_t d1, Register b1, int64_t i2) { emit_48( MVIY_ZOPC | simm20(d1) | regz(b1, 16, 48) | imm8(i2, 8, 48)); } +inline void Assembler::z_tmy( int64_t d1, Register b1, int64_t i2) { emit_48( TMY_ZOPC | simm20(d1) | regz(b1, 16, 48) | imm8(i2, 8, 48)); } +inline void Assembler::z_niy( int64_t d1, Register b1, int64_t i2) { emit_48( NIY_ZOPC | simm20(d1) | regz(b1, 16, 48) | imm8(i2, 8, 48)); } +inline void Assembler::z_oiy( int64_t d1, Register b1, int64_t i2) { emit_48( OIY_ZOPC | simm20(d1) | regz(b1, 16, 48) | imm8(i2, 8, 48)); } +inline void Assembler::z_xiy( int64_t d1, Register b1, int64_t i2) { emit_48( XIY_ZOPC | simm20(d1) | regz(b1, 16, 48) | imm8(i2, 8, 48)); } + +inline void Assembler::z_cli( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLI"); z_cli( a.disp12(), a.base(), imm); } +inline void Assembler::z_mvi( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLI"); z_mvi( a.disp12(), a.base(), imm); } +inline void Assembler::z_tm( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLI"); z_tm( a.disp12(), a.base(), imm); } +inline void Assembler::z_ni( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLI"); z_ni( a.disp12(), a.base(), imm); } +inline void Assembler::z_oi( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLI"); z_oi( a.disp12(), a.base(), imm); } +inline void Assembler::z_xi( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLI"); z_xi( a.disp12(), a.base(), imm); } +inline void Assembler::z_cliy(const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in CLIY"); z_cliy(a.disp20(), a.base(), imm); } +inline void Assembler::z_mviy(const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in MVIY"); z_mviy(a.disp20(), a.base(), imm); } +inline void Assembler::z_tmy( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in TMY"); z_tmy( a.disp20(), a.base(), imm); } +inline void Assembler::z_niy( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in NIY"); z_niy( a.disp20(), a.base(), imm); } +inline void Assembler::z_oiy( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in OIY"); z_oiy( a.disp20(), a.base(), imm); } +inline void Assembler::z_xiy( const Address& a, int64_t imm) { assert(!a.has_index(), " no index reg allowed in XIY"); z_xiy( a.disp20(), a.base(), imm); } + + +inline void Assembler::z_mvc(const Address& d, const Address& s, int64_t l) { + assert(!d.has_index() && !s.has_index(), "Address operand can not be encoded."); + z_mvc(d.disp(), l-1, d.base(), s.disp(), s.base()); +} +inline void Assembler::z_mvc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( MVC_ZOPC | uimm12(d1, 20, 48) | uimm8(l, 8, 48) | regz(b1, 16, 48) | uimm12(d2, 36, 48) | regz(b2, 32, 48)); } +inline void Assembler::z_mvcle(Register r1, Register r3, int64_t d2, Register b2) { emit_32( MVCLE_ZOPC | reg(r1, 8, 32) | reg(r3, 12, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); } + +inline void Assembler::z_mvhhi( int64_t d1, Register b1, int64_t i2) { emit_48( MVHHI_ZOPC | uimm12( d1, 20, 48) | regz(b1, 16, 48) | simm16(i2, 32, 48)); } +inline void Assembler::z_mvhi ( int64_t d1, Register b1, int64_t i2) { emit_48( MVHI_ZOPC | uimm12( d1, 20, 48) | regz(b1, 16, 48) | simm16(i2, 32, 48)); } +inline void Assembler::z_mvghi( int64_t d1, Register b1, int64_t i2) { emit_48( MVGHI_ZOPC | uimm12( d1, 20, 48) | regz(b1, 16, 48) | simm16(i2, 32, 48)); } +inline void Assembler::z_mvhhi( const Address &d, int64_t i2) { assert(!d.has_index(), " no index reg allowed in MVHHI"); z_mvghi( d.disp(), d.baseOrR0(), i2); } +inline void Assembler::z_mvhi ( const Address &d, int64_t i2) { assert(!d.has_index(), " no index reg allowed in MVHI"); z_mvghi( d.disp(), d.baseOrR0(), i2); } +inline void Assembler::z_mvghi( const Address &d, int64_t i2) { assert(!d.has_index(), " no index reg allowed in MVGHI"); z_mvghi( d.disp(), d.baseOrR0(), i2); } + +inline void Assembler::z_ex(Register r1, int64_t d2, Register x2, Register b2) { emit_32( EX_ZOPC | regz(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } + +inline void Assembler::z_ic (Register r1, int64_t d2, Register x2, Register b2) { emit_32( IC_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_icy (Register r1, int64_t d2, Register x2, Register b2) { emit_48( ICY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_icm (Register r1, int64_t m3, int64_t d2, Register b2) { emit_32( ICM_ZOPC | regt(r1, 8, 32) | uimm4(m3, 12, 32) | uimm12(d2, 20, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_icmy(Register r1, int64_t m3, int64_t d2, Register b2) { emit_48( ICMY_ZOPC | regt(r1, 8, 48) | uimm4(m3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); } +inline void Assembler::z_icmh(Register r1, int64_t m3, int64_t d2, Register b2) { emit_48( ICMH_ZOPC | regt(r1, 8, 48) | uimm4(m3, 12, 48) | simm20(d2) | regz(b2, 16, 48)); } +inline void Assembler::z_iihh(Register r1, int64_t i2) { emit_32( IIHH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); } +inline void Assembler::z_iihl(Register r1, int64_t i2) { emit_32( IIHL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); } +inline void Assembler::z_iilh(Register r1, int64_t i2) { emit_32( IILH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); } +inline void Assembler::z_iill(Register r1, int64_t i2) { emit_32( IILL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); } +inline void Assembler::z_iihf(Register r1, int64_t i2) { emit_48( IIHF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); } +inline void Assembler::z_iilf(Register r1, int64_t i2) { emit_48( IILF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); } +inline void Assembler::z_lgf(Register r1, const Address& a) { z_lgf(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_lgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_lhy(Register r1, const Address &a) { z_lhy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_lhy(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LHY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_lgh(Register r1, const Address &a) { z_lgh(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_lgh(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LGH_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_lt(Register r1, const Address &a) { z_lt(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_lt (Register r1, int64_t d2, Register x2, Register b2) { emit_48( LT_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_ltg(Register r1, const Address &a) { z_ltg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_ltg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LTG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_ltgf(Register r1, const Address &a) { z_ltgf(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_ltgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LTGF_ZOPC| regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_lb(Register r1, const Address &a) { z_lb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_lb (Register r1, int64_t d2, Register x2, Register b2) { emit_48( LB_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_lgb(Register r1, const Address &a) { z_lgb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_lgb(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LGB_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_ly(Register r1, const Address &a) { z_ly(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_ly(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_llc(Register r1, const Address& a) { z_llc(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_llc(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLC_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_llh(Register r1, const Address &a) { z_llh(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_llh(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLH_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_llgf(Register r1, const Address &a) { z_llgf(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_llgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_llgh(Register r1, const Address &a) { z_llgh(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_llgh(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLGH_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_llgc(Register r1, const Address &a) { z_llgc(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_llgc(Register r1, int64_t d2, Register x2, Register b2) { emit_48( LLGC_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_llgc(Register r1, int64_t d2, Register b2) { z_llgc( r1, d2, Z_R0, b2); } +inline void Assembler::z_lhi(Register r1, int64_t i2) { emit_32( LHI_ZOPC | regt(r1, 8, 32) | simm16(i2, 16, 32)); } +inline void Assembler::z_lghi(Register r1, int64_t i2) { emit_32( LGHI_ZOPC | regt(r1, 8, 32) | simm16(i2, 16, 32)); } +inline void Assembler::z_lgfi(Register r1, int64_t i2) { emit_48( LGFI_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); } +inline void Assembler::z_llihf(Register r1, int64_t i2) { emit_48( LLIHF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); } +inline void Assembler::z_llilf(Register r1, int64_t i2) { emit_48( LLILF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); } +inline void Assembler::z_llihh(Register r1, int64_t i2) { emit_32( LLIHH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); } +inline void Assembler::z_llihl(Register r1, int64_t i2) { emit_32( LLIHL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); } +inline void Assembler::z_llilh(Register r1, int64_t i2) { emit_32( LLILH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); } +inline void Assembler::z_llill(Register r1, int64_t i2) { emit_32( LLILL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); } + +// allow "monadic" use +inline void Assembler::z_lcr( Register r1, Register r2) { emit_16( LCR_ZOPC | regt( r1, 8, 16) | reg((r2 == noreg) ? r1:r2, 12, 16)); } +inline void Assembler::z_lcgr( Register r1, Register r2) { emit_32( LCGR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); } +inline void Assembler::z_lcgfr(Register r1, Register r2) { emit_32( LCGFR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); } +inline void Assembler::z_lnr( Register r1, Register r2) { emit_16( LNR_ZOPC | regt( r1, 8, 16) | reg((r2 == noreg) ? r1:r2, 12, 16)); } +inline void Assembler::z_lngr( Register r1, Register r2) { emit_32( LNGR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); } +inline void Assembler::z_lngfr(Register r1, Register r2) { emit_32( LNGFR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); } + +inline void Assembler::z_lrvr( Register r1, Register r2) { emit_32( LRVR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_lrvgr(Register r1, Register r2) { emit_32( LRVGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } + +inline void Assembler::z_ltr( Register r1, Register r2) { emit_16( LTR_ZOPC | regt(r1, 8, 16) | reg(r2, 12, 16)); } +inline void Assembler::z_ltgr( Register r1, Register r2) { emit_32( LTGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_ltgfr(Register r1, Register r2) { emit_32( LTGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_stc( Register r1, const Address &a) { z_stc(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_stc( Register r1, int64_t d2, Register x2, Register b2) { emit_32( STC_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_stcy( Register r1, const Address &a) { z_stcy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_stcy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( STCY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_sthy( Register r1, const Address &a) { z_sthy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_sthy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( STHY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_sty( Register r1, const Address &a) { z_sty(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_sty( Register r1, int64_t d2, Register x2, Register b2) { emit_48( STY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_stfle(int64_t d2, Register b2) { emit_32(STFLE_ZOPC | uimm12(d2,20,32) | regz(b2,16,32)); } + + +//----------------------------------- +// SHIFT/RORATE OPERATIONS +//----------------------------------- +inline void Assembler::z_sla( Register r1, int64_t d2, Register b2) { emit_32( SLA_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); } +inline void Assembler::z_slag(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SLAG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); } +inline void Assembler::z_sra( Register r1, int64_t d2, Register b2) { emit_32( SRA_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); } +inline void Assembler::z_srag(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SRAG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); } +inline void Assembler::z_sll( Register r1, int64_t d2, Register b2) { emit_32( SLL_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); } +inline void Assembler::z_sllg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SLLG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); } +inline void Assembler::z_srl( Register r1, int64_t d2, Register b2) { emit_32( SRL_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); } +inline void Assembler::z_srlg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( SRLG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(b2, 16, 48) | reg(r3, 12, 48)); } + +// rotate left +inline void Assembler::z_rll( Register r1, Register r3, int64_t d2, Register b2) { emit_48( RLL_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | reg(b2, 16, 48)); } +inline void Assembler::z_rllg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( RLLG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm20(d2) | reg(b2, 16, 48)); } + +// Rotate the AND/XOR/OR/insert +inline void Assembler::z_rnsbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool test_only) { // Rotate then AND selected bits. -- z196 + const int64_t len = 48; + assert(Immediate::is_uimm(spos3, 6), "range start out of range"); // Could just trim to 6bits wide w/o assertion. + assert(Immediate::is_uimm(epos4, 6), "range end out of range"); // Could just trim to 6bits wide w/o assertion. + assert(Immediate::is_uimm(nrot5, 6), "rotate amount out of range"); // Could just leave it as is. leftmost 2 bits are ignored by instruction. + emit_48( RNSBG_ZOPC | regt(r1, 8, len) | regt(r2, 12, len) | uimm6(spos3, 16+2, len) | uimm6(epos4, 24+2, len) | uimm6(nrot5, 32+2, len) | u_field(test_only ? 1 : 0, len-16-1, len-16-1)); +} +inline void Assembler::z_rxsbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool test_only) { // Rotate then XOR selected bits. -- z196 + const int64_t len = 48; + assert(Immediate::is_uimm(spos3, 6), "range start out of range"); // Could just trim to 6bits wide w/o assertion. + assert(Immediate::is_uimm(epos4, 6), "range end out of range"); // Could just trim to 6bits wide w/o assertion. + assert(Immediate::is_uimm(nrot5, 6), "rotate amount out of range"); // Could just leave it as is. leftmost 2 bits are ignored by instruction. + emit_48( RXSBG_ZOPC | regt(r1, 8, len) | regt(r2, 12, len) | uimm6(spos3, 16+2, len) | uimm6(epos4, 24+2, len) | uimm6(nrot5, 32+2, len) | u_field(test_only ? 1 : 0, len-16-1, len-16-1)); +} +inline void Assembler::z_rosbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool test_only) { // Rotate then OR selected bits. -- z196 + const int64_t len = 48; + assert(Immediate::is_uimm(spos3, 6), "range start out of range"); // Could just trim to 6bits wide w/o assertion. + assert(Immediate::is_uimm(epos4, 6), "range end out of range"); // Could just trim to 6bits wide w/o assertion. + assert(Immediate::is_uimm(nrot5, 6), "rotate amount out of range"); // Could just leave it as is. leftmost 2 bits are ignored by instruction. + emit_48( ROSBG_ZOPC | regt(r1, 8, len) | regt(r2, 12, len) | uimm6(spos3, 16+2, len) | uimm6(epos4, 24+2, len) | uimm6(nrot5, 32+2, len) | u_field(test_only ? 1 : 0, len-16-1, len-16-1)); +} +inline void Assembler::z_risbg( Register r1, Register r2, int64_t spos3, int64_t epos4, int64_t nrot5, bool zero_rest) { // Rotate then INS selected bits. -- z196 + const int64_t len = 48; + assert(Immediate::is_uimm(spos3, 6), "range start out of range"); // Could just trim to 6bits wide w/o assertion. + assert(Immediate::is_uimm(epos4, 6), "range end out of range"); // Could just trim to 6bits wide w/o assertion. + assert(Immediate::is_uimm(nrot5, 6), "rotate amount out of range"); // Could just leave it as is. leftmost 2 bits are ignored by instruction. + emit_48( RISBG_ZOPC | regt(r1, 8, len) | regt(r2, 12, len) | uimm6(spos3, 16+2, len) | uimm6(epos4, 24+2, len) | uimm6(nrot5, 32+2, len) | u_field(zero_rest ? 1 : 0, len-24-1, len-24-1)); +} + + +//------------------------------ +// LOGICAL OPERATIONS +//------------------------------ +inline void Assembler::z_n( Register r1, int64_t d2, Register x2, Register b2) { emit_32( N_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_ny( Register r1, int64_t d2, Register x2, Register b2) { emit_48( NY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_ng( Register r1, int64_t d2, Register x2, Register b2) { emit_48( NG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_n( Register r1, const Address& a) { z_n( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_ny( Register r1, const Address& a) { z_ny(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_ng( Register r1, const Address& a) { z_ng(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } + +inline void Assembler::z_nr( Register r1, Register r2) { emit_16( NR_ZOPC | regt(r1, 8, 16) | reg(r2, 12, 16)); } +inline void Assembler::z_ngr( Register r1, Register r2) { emit_32( NGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_nrk( Register r1, Register r2, Register r3) { emit_32( NRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); } +inline void Assembler::z_ngrk(Register r1, Register r2, Register r3) { emit_32( NGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); } + +inline void Assembler::z_nihh(Register r1, int64_t i2) { emit_32( NIHH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); } +inline void Assembler::z_nihl(Register r1, int64_t i2) { emit_32( NIHL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); } +inline void Assembler::z_nilh(Register r1, int64_t i2) { emit_32( NILH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); } +inline void Assembler::z_nill(Register r1, int64_t i2) { emit_32( NILL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); } +inline void Assembler::z_nihf(Register r1, int64_t i2) { emit_48( NIHF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); } +inline void Assembler::z_nilf(Register r1, int64_t i2) { emit_48( NILF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); } + +inline void Assembler::z_o( Register r1, int64_t d2, Register x2, Register b2) { emit_32( O_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_oy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( OY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_og( Register r1, int64_t d2, Register x2, Register b2) { emit_48( OG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_o( Register r1, const Address& a) { z_o( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_oy( Register r1, const Address& a) { z_oy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_og( Register r1, const Address& a) { z_og(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } + +inline void Assembler::z_or( Register r1, Register r2) { emit_16( OR_ZOPC | regt(r1, 8, 16) | reg(r2, 12, 16)); } +inline void Assembler::z_ogr( Register r1, Register r2) { emit_32( OGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_ork( Register r1, Register r2, Register r3) { emit_32( ORK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); } +inline void Assembler::z_ogrk(Register r1, Register r2, Register r3) { emit_32( OGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); } + +inline void Assembler::z_oihh(Register r1, int64_t i2) { emit_32( OIHH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); } +inline void Assembler::z_oihl(Register r1, int64_t i2) { emit_32( OIHL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); } +inline void Assembler::z_oilh(Register r1, int64_t i2) { emit_32( OILH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); } +inline void Assembler::z_oill(Register r1, int64_t i2) { emit_32( OILL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); } +inline void Assembler::z_oihf(Register r1, int64_t i2) { emit_48( OIHF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); } +inline void Assembler::z_oilf(Register r1, int64_t i2) { emit_48( OILF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); } + +inline void Assembler::z_x( Register r1, int64_t d2, Register x2, Register b2) { emit_32( X_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_xy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( XY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_xg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( XG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_x( Register r1, const Address& a) { z_x( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_xy( Register r1, const Address& a) { z_xy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_xg( Register r1, const Address& a) { z_xg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } + +inline void Assembler::z_xr( Register r1, Register r2) { emit_16( XR_ZOPC | regt(r1, 8, 16) | reg(r2, 12, 16)); } +inline void Assembler::z_xgr( Register r1, Register r2) { emit_32( XGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_xrk( Register r1, Register r2, Register r3) { emit_32( XRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); } +inline void Assembler::z_xgrk(Register r1, Register r2, Register r3) { emit_32( XGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); } + +inline void Assembler::z_xihf(Register r1, int64_t i2) { emit_48( XIHF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); } +inline void Assembler::z_xilf(Register r1, int64_t i2) { emit_48( XILF_ZOPC | regt(r1, 8, 48) | imm32(i2, 16, 48)); } + +inline void Assembler::z_nc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( NC_ZOPC | uimm12(d1, 20, 48) | uimm8(l, 8, 48) | regz(b1, 16, 48) | uimm12(d2, 36, 48) | regz(b2, 32, 48)); } +inline void Assembler::z_oc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( OC_ZOPC | uimm12(d1, 20, 48) | uimm8(l, 8, 48) | regz(b1, 16, 48) | uimm12(d2, 36, 48) | regz(b2, 32, 48)); } +inline void Assembler::z_xc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( XC_ZOPC | uimm12(d1, 20, 48) | uimm8(l, 8, 48) | regz(b1, 16, 48) | uimm12(d2, 36, 48) | regz(b2, 32, 48)); } +inline void Assembler::z_nc(Address dst, int64_t len, Address src2) { assert(!dst.has_index() && !src2.has_index(), "Cannot encode index"); z_nc(dst.disp12(), len-1, dst.base(), src2.disp12(), src2.base()); } +inline void Assembler::z_oc(Address dst, int64_t len, Address src2) { assert(!dst.has_index() && !src2.has_index(), "Cannot encode index"); z_oc(dst.disp12(), len-1, dst.base(), src2.disp12(), src2.base()); } +inline void Assembler::z_xc(Address dst, int64_t len, Address src2) { assert(!dst.has_index() && !src2.has_index(), "Cannot encode index"); z_xc(dst.disp12(), len-1, dst.base(), src2.disp12(), src2.base()); } + + +//--------------- +// ADD +//--------------- +inline void Assembler::z_a( Register r1, int64_t d2, Register x2, Register b2) { emit_32( A_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_ay( Register r1, int64_t d2, Register x2, Register b2) { emit_48( AY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_al( Register r1, int64_t d2, Register x2, Register b2) { emit_32( AL_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_aly( Register r1, int64_t d2, Register x2, Register b2) { emit_48( ALY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_ag( Register r1, int64_t d2, Register x2, Register b2) { emit_48( AG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_agf( Register r1, int64_t d2, Register x2, Register b2) { emit_48( AGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_alg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( ALG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_algf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( ALGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_a( Register r1, const Address& a) { z_a( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_ay( Register r1, const Address& a) { z_ay( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_al( Register r1, const Address& a) { z_al( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_aly( Register r1, const Address& a) { z_aly( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_ag( Register r1, const Address& a) { z_ag( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_agf( Register r1, const Address& a) { z_agf( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_alg( Register r1, const Address& a) { z_alg( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_algf(Register r1, const Address& a) { z_algf(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } + +inline void Assembler::z_ar( Register r1, Register r2) { emit_16( AR_ZOPC | regt(r1, 8, 16) | reg(r2, 12, 16)); } +inline void Assembler::z_agr( Register r1, Register r2) { emit_32( AGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_agfr(Register r1, Register r2) { emit_32( AGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_ark( Register r1, Register r2, Register r3) { emit_32( ARK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); } +inline void Assembler::z_agrk(Register r1, Register r2, Register r3) { emit_32( AGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); } + +inline void Assembler::z_ahi( Register r1, int64_t i2) { emit_32( AHI_ZOPC | regt(r1, 8, 32) | simm16(i2, 16, 32)); } +inline void Assembler::z_afi( Register r1, int64_t i2) { emit_48( AFI_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); } +inline void Assembler::z_aghi( Register r1, int64_t i2) { emit_32( AGHI_ZOPC | regt(r1, 8, 32) | simm16(i2, 16, 32)); } +inline void Assembler::z_agfi( Register r1, int64_t i2) { emit_48( AGFI_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); } +inline void Assembler::z_aih( Register r1, int64_t i2) { emit_48( AIH_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); } +inline void Assembler::z_ahik( Register r1, Register r3, int64_t i2) { emit_48( AHIK_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm16(i2, 16, 48)); } +inline void Assembler::z_aghik(Register r1, Register r3, int64_t i2) { emit_48( AGHIK_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm16(i2, 16, 48)); } + + +//----------------------- +// ADD LOGICAL +//----------------------- +inline void Assembler::z_alr( Register r1, Register r2) { emit_16( ALR_ZOPC | regt(r1, 8, 16) | reg(r2, 12, 16)); } +inline void Assembler::z_algr( Register r1, Register r2) { emit_32( ALGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_algfr(Register r1, Register r2) { emit_32( ALGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_alrk( Register r1, Register r2, Register r3) { emit_32( ALRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); } +inline void Assembler::z_algrk(Register r1, Register r2, Register r3) { emit_32( ALGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); } +inline void Assembler::z_alcgr(Register r1, Register r2) { emit_32( ALCGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } + +inline void Assembler::z_alfi( Register r1, int64_t i2) { emit_48( ALFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); } +inline void Assembler::z_algfi(Register r1, int64_t i2) { emit_48( ALGFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); } + +inline void Assembler::z_alhsik( Register r1, Register r3, int64_t i2) { emit_48( ALHSIK_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm16(i2, 16, 48)); } +inline void Assembler::z_alghsik(Register r1, Register r3, int64_t i2) { emit_48( ALGHSIK_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | simm16(i2, 16, 48)); } + +// In-memory arithmetic (add signed, add logical with signed immediate) +inline void Assembler::z_asi( int64_t d1, Register b1, int64_t i2) { emit_48( ASI_ZOPC | simm8(i2, 8, 48) | simm20(d1) | regz(b1, 16, 48)); } +inline void Assembler::z_agsi( int64_t d1, Register b1, int64_t i2) { emit_48( AGSI_ZOPC | simm8(i2, 8, 48) | simm20(d1) | regz(b1, 16, 48)); } +inline void Assembler::z_alsi( int64_t d1, Register b1, int64_t i2) { emit_48( ALSI_ZOPC | simm8(i2, 8, 48) | simm20(d1) | regz(b1, 16, 48)); } +inline void Assembler::z_algsi(int64_t d1, Register b1, int64_t i2) { emit_48( ALGSI_ZOPC | simm8(i2, 8, 48) | simm20(d1) | regz(b1, 16, 48)); } +inline void Assembler::z_asi( const Address& d, int64_t i2) { assert(!d.has_index(), "No index in ASI"); z_asi( d.disp(), d.base(), i2); } +inline void Assembler::z_agsi( const Address& d, int64_t i2) { assert(!d.has_index(), "No index in AGSI"); z_agsi( d.disp(), d.base(), i2); } +inline void Assembler::z_alsi( const Address& d, int64_t i2) { assert(!d.has_index(), "No index in ALSI"); z_alsi( d.disp(), d.base(), i2); } +inline void Assembler::z_algsi(const Address& d, int64_t i2) { assert(!d.has_index(), "No index in ALGSI"); z_algsi(d.disp(), d.base(), i2); } + + +//-------------------- +// SUBTRACT +//-------------------- +inline void Assembler::z_s( Register r1, int64_t d2, Register x2, Register b2) { emit_32( S_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_sy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_sg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_sgf( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_slg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SLG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_slgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( SLGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_s( Register r1, const Address& a) { z_s( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_sy( Register r1, const Address& a) { z_sy( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_sg( Register r1, const Address& a) { z_sg( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_sgf( Register r1, const Address& a) { z_sgf( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_slg( Register r1, const Address& a) { z_slg( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_slgf(Register r1, const Address& a) { z_slgf(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } + +inline void Assembler::z_sr( Register r1, Register r2) { emit_16( SR_ZOPC | regt(r1, 8, 16) | reg(r2, 12, 16)); } +inline void Assembler::z_sgr( Register r1, Register r2) { emit_32( SGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_sgfr(Register r1, Register r2) { emit_32( SGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_srk( Register r1, Register r2, Register r3) { emit_32( SRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); } +inline void Assembler::z_sgrk(Register r1, Register r2, Register r3) { emit_32( SGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); } + +inline void Assembler::z_sh( Register r1, int64_t d2, Register x2, Register b2) { emit_32( SH_ZOPC | regt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_shy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( SHY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_sh( Register r1, const Address &a) { z_sh( r1, a.disp(), a.indexOrR0(), a.base()); } +inline void Assembler::z_shy( Register r1, const Address &a) { z_shy(r1, a.disp(), a.indexOrR0(), a.base()); } + + +//---------------------------- +// SUBTRACT LOGICAL +//---------------------------- +inline void Assembler::z_slr( Register r1, Register r2) { emit_16( SLR_ZOPC | regt(r1, 8, 16) | reg(r2, 12, 16)); } +inline void Assembler::z_slgr( Register r1, Register r2) { emit_32( SLGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_slgfr(Register r1, Register r2) { emit_32( SLGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_slrk( Register r1, Register r2, Register r3) { emit_32(SLRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); } +inline void Assembler::z_slgrk(Register r1, Register r2, Register r3) { emit_32(SLGRK_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32) | reg(r3, 16, 32)); } +inline void Assembler::z_slfi( Register r1, int64_t i2) { emit_48( SLFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); } +inline void Assembler::z_slgfi(Register r1, int64_t i2) { emit_48( SLGFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); } + + +//-------------------- +// MULTIPLY +//-------------------- +inline void Assembler::z_msr( Register r1, Register r2) { emit_32( MSR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_msgr( Register r1, Register r2) { emit_32( MSGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_msgfr(Register r1, Register r2) { emit_32( MSGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_mlr( Register r1, Register r2) { emit_32( MLR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_mlgr( Register r1, Register r2) { emit_32( MLGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } + +inline void Assembler::z_mhy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( MHY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_msy( Register r1, int64_t d2, Register x2, Register b2) { emit_48( MSY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_msg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( MSG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_msgf(Register r1, int64_t d2, Register x2, Register b2) { emit_48( MSGF_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_ml( Register r1, int64_t d2, Register x2, Register b2) { emit_48( ML_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_mlg( Register r1, int64_t d2, Register x2, Register b2) { emit_48( MLG_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } + +inline void Assembler::z_mhy( Register r1, const Address& a) { z_mhy( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_msy( Register r1, const Address& a) { z_msy( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_msg( Register r1, const Address& a) { z_msg( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_msgf(Register r1, const Address& a) { z_msgf(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_ml( Register r1, const Address& a) { z_ml( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_mlg( Register r1, const Address& a) { z_mlg( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } + +inline void Assembler::z_msfi( Register r1, int64_t i2) { emit_48( MSFI_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); } +inline void Assembler::z_msgfi(Register r1, int64_t i2) { emit_48( MSGFI_ZOPC | regt(r1, 8, 48) | simm32(i2, 16, 48)); } +inline void Assembler::z_mhi( Register r1, int64_t i2) { emit_32( MHI_ZOPC | regt(r1, 8, 32) | simm16(i2, 16, 32)); } +inline void Assembler::z_mghi( Register r1, int64_t i2) { emit_32( MGHI_ZOPC | regt(r1, 8, 32) | simm16(i2, 16, 32)); } + + +//------------------ +// DIVIDE +//------------------ +inline void Assembler::z_dsgr( Register r1, Register r2) { emit_32( DSGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_dsgfr(Register r1, Register r2) { emit_32( DSGFR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } + + +//------------------- +// COMPARE +//------------------- +inline void Assembler::z_cr( Register r1, Register r2) { emit_16( CR_ZOPC | reg(r1, 8, 16) | reg(r2,12,16)); } +inline void Assembler::z_cgr( Register r1, Register r2) { emit_32( CGR_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_cgfr(Register r1, Register r2) { emit_32( CGFR_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_chi( Register r1, int64_t i2) { emit_32( CHI_ZOPC | reg(r1, 8, 32) | simm16(i2, 16, 32)); } +inline void Assembler::z_cghi(Register r1, int64_t i2) { emit_32( CGHI_ZOPC | reg(r1, 8, 32) | simm16(i2, 16, 32)); } +inline void Assembler::z_cfi( Register r1, int64_t i2) { emit_48( CFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); } +inline void Assembler::z_cgfi(Register r1, int64_t i2) { emit_48( CGFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); } +inline void Assembler::z_ch(Register r1, const Address &a) { z_ch(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_ch(Register r1, int64_t d2, Register x2, Register b2) { emit_32( CH_ZOPC | reg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_c(Register r1, const Address &a) { z_c(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_c(Register r1, int64_t d2, Register x2, Register b2) { emit_32( C_ZOPC | reg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_cy(Register r1, const Address &a) { z_cy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_cy(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_cy(Register r1, int64_t d2, Register b2) { z_cy(r1, d2, Z_R0, b2); } +inline void Assembler::z_cg(Register r1, const Address &a) { z_cg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_cg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CG_ZOPC | reg(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_clr(Register r1, Register r2) { emit_16( CLR_ZOPC | reg(r1,8,16) | reg(r2,12,16)); } +inline void Assembler::z_clgr(Register r1, Register r2) { emit_32( CLGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); } + + +inline void Assembler::z_clfi(Register r1, int64_t i2) { emit_48( CLFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); } +inline void Assembler::z_clgfi(Register r1, int64_t i2) { emit_48( CLGFI_ZOPC | regt(r1, 8, 48) | uimm32(i2, 16, 48)); } +inline void Assembler::z_cl(Register r1, const Address &a) { z_cl(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_cl(Register r1, int64_t d2, Register x2, Register b2) { emit_32( CL_ZOPC | regt(r1, 8, 32) | uimm12(d2,20,32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_cly(Register r1, const Address &a) { z_cly(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_cly(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CLY_ZOPC | regt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_cly(Register r1, int64_t d2, Register b2) { z_cly(r1, d2, Z_R0, b2); } +inline void Assembler::z_clg(Register r1, const Address &a) { z_clg(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_clg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CLG_ZOPC | reg(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_clc(int64_t d1, int64_t l, Register b1, int64_t d2, Register b2) { emit_48( CLC_ZOPC | uimm12(d1, 20, 48) | uimm8(l, 8, 48) | regz(b1, 16, 48) | uimm12(d2, 36, 48) | regz(b2, 32, 48)); } +inline void Assembler::z_clcle(Register r1, Register r3, int64_t d2, Register b2) { emit_32( CLCLE_ZOPC | reg(r1, 8, 32) | reg(r3, 12, 32) | uimm12(d2, 20, 32) | reg(b2, 16, 32)); } +inline void Assembler::z_clclu(Register r1, Register r3, int64_t d2, Register b2) { emit_48( CLCLU_ZOPC | reg(r1, 8, 48) | reg(r3, 12, 48) | uimm12(d2, 20, 48) | reg(b2, 16, 48)); } + +inline void Assembler::z_tmll(Register r1, int64_t i2) { emit_32( TMLL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); } +inline void Assembler::z_tmlh(Register r1, int64_t i2) { emit_32( TMLH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); } +inline void Assembler::z_tmhl(Register r1, int64_t i2) { emit_32( TMHL_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); } +inline void Assembler::z_tmhh(Register r1, int64_t i2) { emit_32( TMHH_ZOPC | regt(r1, 8, 32) | imm16(i2, 16, 32)); } + +// translate characters +inline void Assembler::z_troo(Register r1, Register r2, int64_t m3) { emit_32( TROO_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); } +inline void Assembler::z_trot(Register r1, Register r2, int64_t m3) { emit_32( TROT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); } +inline void Assembler::z_trto(Register r1, Register r2, int64_t m3) { emit_32( TRTO_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); } +inline void Assembler::z_trtt(Register r1, Register r2, int64_t m3) { emit_32( TRTT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); } + +// signed comparison +inline void Assembler::z_crb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4) { emit_48( CRB_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm4(m3, 32, 48)); } +inline void Assembler::z_cgrb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4) { emit_48( CGRB_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm4(m3, 32, 48)); } +inline void Assembler::z_crj(Register r1, Register r2, branch_condition m3, address a4) { emit_48( CRJ_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm4(m3, 32, 48)); } +inline void Assembler::z_cgrj(Register r1, Register r2, branch_condition m3, address a4) { emit_48( CGRJ_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm4(m3, 32, 48)); } +inline void Assembler::z_cib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4) { emit_48( CIB_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | simm8(i2, 32, 48)); } +inline void Assembler::z_cgib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4) { emit_48( CGIB_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | simm8(i2, 32, 48)); } +inline void Assembler::z_cij(Register r1, int64_t i2, branch_condition m3, address a4) { emit_48( CIJ_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | simm8(i2, 32, 48)); } +inline void Assembler::z_cgij(Register r1, int64_t i2, branch_condition m3, address a4) { emit_48( CGIJ_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | simm8(i2, 32, 48)); } +// unsigned comparison +inline void Assembler::z_clrb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4) { emit_48( CLRB_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm4(m3, 32, 48)); } +inline void Assembler::z_clgrb(Register r1, Register r2, branch_condition m3, int64_t d4, Register b4) { emit_48( CLGRB_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm4(m3, 32, 48)); } +inline void Assembler::z_clrj(Register r1, Register r2, branch_condition m3, address a4) { emit_48( CLRJ_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm4(m3, 32, 48)); } +inline void Assembler::z_clgrj(Register r1, Register r2, branch_condition m3, address a4) { emit_48( CLGRJ_ZOPC | reg(r1, 8, 48) | reg(r2, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm4(m3, 32, 48)); } +inline void Assembler::z_clib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4) { emit_48( CLIB_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm8(i2, 32, 48)); } +inline void Assembler::z_clgib(Register r1, int64_t i2, branch_condition m3, int64_t d4, Register b4) { emit_48( CLGIB_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | uimm12(d4, 20, 48) | reg(b4, 16, 48) | uimm8(i2, 32, 48)); } +inline void Assembler::z_clij(Register r1, int64_t i2, branch_condition m3, address a4) { emit_48( CLIJ_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm8(i2, 32, 48)); } +inline void Assembler::z_clgij(Register r1, int64_t i2, branch_condition m3, address a4) { emit_48( CLGIJ_ZOPC | reg(r1, 8, 48) | uimm4(m3, 12, 48) | simm16(RelAddr::pcrel_off16(a4, pc()), 16, 48) | uimm8(i2, 32, 48)); } + +// Compare and trap instructions (signed). +inline void Assembler::z_crt(Register r1, Register r2, int64_t m3) { emit_32( CRT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); } +inline void Assembler::z_cgrt(Register r1, Register r2, int64_t m3) { emit_32( CGRT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); } +inline void Assembler::z_cit(Register r1, int64_t i2, int64_t m3) { emit_48( CIT_ZOPC | reg(r1, 8, 48) | simm16(i2, 16, 48) | uimm4(m3, 32, 48)); } +inline void Assembler::z_cgit(Register r1, int64_t i2, int64_t m3) { emit_48( CGIT_ZOPC | reg(r1, 8, 48) | simm16(i2, 16, 48) | uimm4(m3, 32, 48)); } + +// Compare and trap instructions (unsigned). +inline void Assembler::z_clrt(Register r1, Register r2, int64_t m3) { emit_32( CLRT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); } +inline void Assembler::z_clgrt(Register r1, Register r2, int64_t m3) { emit_32( CLGRT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32) | uimm4(m3, 16, 32)); } +inline void Assembler::z_clfit(Register r1, int64_t i2, int64_t m3) { emit_48( CLFIT_ZOPC | reg(r1, 8, 48) | uimm16(i2, 16, 48) | uimm4(m3, 32, 48)); } +inline void Assembler::z_clgit(Register r1, int64_t i2, int64_t m3) { emit_48( CLGIT_ZOPC | reg(r1, 8, 48) | uimm16(i2, 16, 48) | uimm4(m3, 32, 48)); } + +inline void Assembler::z_bc( branch_condition m1, int64_t d2, Register x2, Register b2) { emit_32( BC_ZOPC | 0 << 16 | uimm4(m1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_bcr( branch_condition m1, Register r2) { emit_16( BCR_ZOPC | uimm4(m1,8,16) | reg(r2,12,16)); } +inline void Assembler::z_brc( branch_condition i1, int64_t i2) { emit_32( BRC_ZOPC | uimm4(i1, 8, 32) | simm16(i2, 16, 32)); } +inline void Assembler::z_brc( branch_condition i1, address a) { emit_32( BRC_ZOPC | uimm4(i1, 8, 32) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 32)); } +inline void Assembler::z_brcl(branch_condition i1, address a) { emit_48( BRCL_ZOPC | uimm4(i1, 8, 48)| simm32(RelAddr::pcrel_off32(a, pc()), 16, 48)); } +inline void Assembler::z_bctgr(Register r1, Register r2) { emit_32( BCTGR_ZOPC | reg( r1, 24, 32) | reg( r2, 28, 32)); }; + +inline void Assembler::z_basr(Register r1, Register r2) { emit_16( BASR_ZOPC | regt(r1,8,16) | reg(r2,12,16)); } + +inline void Assembler::z_brasl(Register r1, address a) { emit_48( BRASL_ZOPC | regt(r1, 8, 48) | simm32(RelAddr::pcrel_off32(a, pc()), 16, 48)); } + +inline void Assembler::z_brct(Register r1, address a) { emit_32( BRCT_ZOPC | regt(r1, 8, 32) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 32)); } +inline void Assembler::z_brct(Register r1, Label& L) {z_brct(r1, target(L)); } + +inline void Assembler::z_brxh(Register r1, Register r3, address a) {emit_32( BRXH_ZOPC | reg(r1, 8, 32) | reg(r3, 12, 32) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 32));} +inline void Assembler::z_brxh(Register r1, Register r3, Label& L) {z_brxh(r1, r3, target(L)); } + +inline void Assembler::z_brxle(Register r1, Register r3, address a) {emit_32( BRXLE_ZOPC | reg(r1, 8, 32) | reg(r3, 12, 32) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 32));} +inline void Assembler::z_brxle(Register r1, Register r3, Label& L) {z_brxle(r1, r3, target(L)); } + +inline void Assembler::z_brxhg(Register r1, Register r3, address a) {emit_48( BRXHG_ZOPC | reg(r1, 8, 48) | reg(r3, 12, 48) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 48));} +inline void Assembler::z_brxhg(Register r1, Register r3, Label& L) {z_brxhg(r1, r3, target(L)); } + +inline void Assembler::z_brxlg(Register r1, Register r3, address a) {emit_48( BRXLG_ZOPC | reg(r1, 8, 48) | reg(r3, 12, 48) | simm16(RelAddr::pcrel_off16(a, pc()), 16, 48));} +inline void Assembler::z_brxlg(Register r1, Register r3, Label& L) {z_brxlg(r1, r3, target(L)); } + +inline void Assembler::z_flogr(Register r1, Register r2) { emit_32( FLOGR_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_popcnt(Register r1, Register r2) { emit_32( POPCNT_ZOPC | reg(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_ahhhr(Register r1, Register r2, Register r3) { emit_32( AHHHR_ZOPC | reg(r3, 16, 32) | reg(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_ahhlr(Register r1, Register r2, Register r3) { emit_32( AHHLR_ZOPC | reg(r3, 16, 32) | reg(r1, 24, 32) | reg(r2, 28, 32)); } + +inline void Assembler::z_tam() { emit_16( TAM_ZOPC); } +inline void Assembler::z_stck(int64_t d2, Register b2) { emit_32( STCK_ZOPC | uimm12(d2, 20, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_stckf(int64_t d2, Register b2) { emit_32( STCKF_ZOPC | uimm12(d2, 20, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_stmg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( STMG_ZOPC | simm20(d2) | reg(r1, 8, 48) | reg(r3,12,48)| reg(b2,16,48) ); } +inline void Assembler::z_lmg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( LMG_ZOPC | simm20(d2) | reg(r1, 8, 48) | reg(r3,12,48)| reg(b2,16,48) ); } + +inline void Assembler::z_cs(Register r1, Register r3, int64_t d2, Register b2) { emit_32( CS_ZOPC | regt(r1, 8, 32) | reg(r3, 12, 32) | reg(b2, 16, 32) | uimm12(d2, 20, 32)); } +inline void Assembler::z_csy(Register r1, Register r3, int64_t d2, Register b2) { emit_48( CSY_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | reg(b2, 16, 48) | simm20(d2)); } +inline void Assembler::z_csg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( CSG_ZOPC | regt(r1, 8, 48) | reg(r3, 12, 48) | reg(b2, 16, 48) | simm20(d2)); } +inline void Assembler::z_cs( Register r1, Register r3, const Address& a) { assert(!a.has_index(), "Cannot encode index"); z_cs( r1, r3, a.disp(), a.baseOrR0()); } +inline void Assembler::z_csy(Register r1, Register r3, const Address& a) { assert(!a.has_index(), "Cannot encode index"); z_csy(r1, r3, a.disp(), a.baseOrR0()); } +inline void Assembler::z_csg(Register r1, Register r3, const Address& a) { assert(!a.has_index(), "Cannot encode index"); z_csg(r1, r3, a.disp(), a.baseOrR0()); } + +inline void Assembler::z_cvd(Register r1, int64_t d2, Register x2, Register b2) { emit_32( CVD_ZOPC | regt(r1, 8, 32) | reg(x2, 12, 32) | reg(b2, 16, 32) | uimm12(d2, 20, 32)); } +inline void Assembler::z_cvdg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CVDG_ZOPC | regt(r1, 8, 48) | reg(x2, 12, 48) | reg(b2, 16, 48) | simm20(d2)); } + + +//------------------------------- +// FLOAT INSTRUCTIONS +//------------------------------- + +//---------------- +// LOAD +//---------------- +inline void Assembler::z_ler( FloatRegister r1, FloatRegister r2) { emit_16( LER_ZOPC | fregt(r1,8,16) | freg(r2,12,16)); } +inline void Assembler::z_ldr( FloatRegister r1, FloatRegister r2) { emit_16( LDR_ZOPC | fregt(r1,8,16) | freg(r2,12,16)); } +inline void Assembler::z_ldebr(FloatRegister r1, FloatRegister r2) { emit_32( LDEBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); } +inline void Assembler::z_ledbr(FloatRegister r1, FloatRegister r2) { emit_32( LEDBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); } +inline void Assembler::z_le( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_32( LE_ZOPC | fregt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_ley(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( LEY_ZOPC | fregt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_ld( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_32( LD_ZOPC | fregt(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_ldy(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( LDY_ZOPC | fregt(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_le( FloatRegister r1, const Address &a) { z_le( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_ley(FloatRegister r1, const Address &a) { z_ley(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_ld( FloatRegister r1, const Address &a) { z_ld( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_ldy(FloatRegister r1, const Address &a) { z_ldy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } + +inline void Assembler::z_lzdr(FloatRegister r1) { emit_32( LZDR_ZOPC | fregt(r1, 24, 32)); } +inline void Assembler::z_lzer(FloatRegister f1) { emit_32( LZER_ZOPC | fregt(f1, 24, 32)); } + + +//----------------- +// STORE +//----------------- +inline void Assembler::z_ste( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_32( STE_ZOPC | freg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_stey(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( STEY_ZOPC | freg(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_std( FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_32( STD_ZOPC | freg(r1, 8, 32) | uimm12(d2, 20, 32) | reg(x2, 12, 32) | regz(b2, 16, 32)); } +inline void Assembler::z_stdy(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( STDY_ZOPC | freg(r1, 8, 48) | simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_ste( FloatRegister r1, const Address &a) { z_ste( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_stey(FloatRegister r1, const Address &a) { z_stey(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_std( FloatRegister r1, const Address &a) { z_std( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_stdy(FloatRegister r1, const Address &a) { z_stdy(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } + + +//--------------- +// ADD +//--------------- +inline void Assembler::z_aebr( FloatRegister f1, FloatRegister f2) { emit_32( AEBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));} +inline void Assembler::z_adbr( FloatRegister f1, FloatRegister f2) { emit_32( ADBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));} +inline void Assembler::z_aeb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( AEB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));} +inline void Assembler::z_adb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( ADB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));} +inline void Assembler::z_aeb( FloatRegister r1, const Address& a) { z_aeb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_adb( FloatRegister r1, const Address& a) { z_adb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } + + +//--------------- +// SUB +//--------------- +inline void Assembler::z_sebr( FloatRegister f1, FloatRegister f2) { emit_32( SEBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));} +inline void Assembler::z_sdbr( FloatRegister f1, FloatRegister f2) { emit_32( SDBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));} +inline void Assembler::z_seb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( SEB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));} +inline void Assembler::z_sdb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( SDB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));} +inline void Assembler::z_seb( FloatRegister r1, const Address& a) { z_seb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_sdb( FloatRegister r1, const Address& a) { z_sdb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } + +inline void Assembler::z_lcebr(FloatRegister r1, FloatRegister r2) { emit_32( LCEBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); } +inline void Assembler::z_lcdbr(FloatRegister r1, FloatRegister r2) { emit_32( LCDBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); } + +inline void Assembler::z_lpdbr( FloatRegister fr1, FloatRegister fr2) { emit_32( LPDBR_ZOPC | fregt( fr1, 24,32) | freg((fr2 == fnoreg) ? fr1:fr2, 28, 32)); } + + +//--------------- +// MUL +//--------------- +inline void Assembler::z_meebr(FloatRegister f1, FloatRegister f2) { emit_32( MEEBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));} +inline void Assembler::z_mdbr( FloatRegister f1, FloatRegister f2) { emit_32( MDBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));} +inline void Assembler::z_meeb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( MEEB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));} +inline void Assembler::z_mdb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( MDB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));} +inline void Assembler::z_meeb( FloatRegister r1, const Address& a) { z_meeb( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_mdb( FloatRegister r1, const Address& a) { z_mdb( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } + + +//--------------- +// DIV +//--------------- +inline void Assembler::z_debr( FloatRegister f1, FloatRegister f2) { emit_32( DEBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));} +inline void Assembler::z_ddbr( FloatRegister f1, FloatRegister f2) { emit_32( DDBR_ZOPC | fregt( f1, 24, 32) | freg( f2, 28, 32));} +inline void Assembler::z_deb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( DEB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));} +inline void Assembler::z_ddb( FloatRegister f1, int64_t d2, Register x2, Register b2 ) { emit_48( DDB_ZOPC | fregt( f1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));} +inline void Assembler::z_deb( FloatRegister r1, const Address& a) { z_deb( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_ddb( FloatRegister r1, const Address& a) { z_ddb( r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } + + +//--------------- +// square root +//--------------- +inline void Assembler::z_sqdbr(FloatRegister f1, FloatRegister f2) { emit_32(SQDBR_ZOPC | fregt(f1, 24, 32) | freg(f2, 28, 32)); } +inline void Assembler::z_sqdb( FloatRegister fr1, int64_t d2, Register x2, Register b2 ) { emit_48( SQDB_ZOPC | fregt( fr1, 8, 48) | uimm12( d2, 20, 48) | reg( x2, 12, 48) | regz( b2, 16, 48));} +inline void Assembler::z_sqdb( FloatRegister fr1, int64_t d2, Register b2) { z_sqdb( fr1, d2, Z_R0, b2);} + + +//--------------- +// CMP +//--------------- +inline void Assembler::z_cebr(FloatRegister r1, FloatRegister r2) { emit_32( CEBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); } +inline void Assembler::z_ceb(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( CEB_ZOPC | fregt(r1, 8, 48) | uimm12(d2, 20, 48) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_ceb(FloatRegister r1, const Address &a) { z_ceb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } +inline void Assembler::z_cdbr(FloatRegister r1, FloatRegister r2) { emit_32( CDBR_ZOPC | fregt(r1, 24, 32) | freg(r2, 28, 32)); } +inline void Assembler::z_cdb(FloatRegister r1, int64_t d2, Register x2, Register b2) { emit_48( CDB_ZOPC | fregt(r1, 8, 48) | uimm12(d2, 20, 48) | reg(x2, 12, 48) | regz(b2, 16, 48)); } +inline void Assembler::z_cdb(FloatRegister r1, const Address &a) { z_cdb(r1, a.disp(), a.indexOrR0(), a.baseOrR0()); } + + +//------------------------------------ +// FLOAT <-> INT conversion +//------------------------------------ +inline void Assembler::z_ldgr(FloatRegister r1, Register r2) { emit_32( LDGR_ZOPC | fregt(r1, 24, 32) | reg(r2, 28, 32)); } +inline void Assembler::z_lgdr(Register r1, FloatRegister r2) { emit_32( LGDR_ZOPC | regt( r1, 24, 32) | freg(r2, 28, 32)); } + +inline void Assembler::z_cefbr( FloatRegister r1, Register r2) { emit_32( CEFBR_ZOPC | fregt( r1, 24, 32) | reg( r2, 28, 32)); } +inline void Assembler::z_cdfbr( FloatRegister r1, Register r2) { emit_32( CDFBR_ZOPC | fregt( r1, 24, 32) | reg( r2, 28, 32)); } +inline void Assembler::z_cegbr( FloatRegister r1, Register r2) { emit_32( CEGBR_ZOPC | fregt( r1, 24, 32) | reg( r2, 28, 32)); } +inline void Assembler::z_cdgbr( FloatRegister r1, Register r2) { emit_32( CDGBR_ZOPC | fregt( r1, 24, 32) | reg( r2, 28, 32)); } + +inline void Assembler::z_cfebr(Register r1, FloatRegister r2, RoundingMode m) { emit_32( CFEBR_ZOPC | regt(r1, 24, 32) | rounding_mode(m, 16, 32) | freg(r2, 28, 32)); } +inline void Assembler::z_cfdbr(Register r1, FloatRegister r2, RoundingMode m) { emit_32( CFDBR_ZOPC | regt(r1, 24, 32) | rounding_mode(m, 16, 32) | freg(r2, 28, 32)); } +inline void Assembler::z_cgebr(Register r1, FloatRegister r2, RoundingMode m) { emit_32( CGEBR_ZOPC | regt(r1, 24, 32) | rounding_mode(m, 16, 32) | freg(r2, 28, 32)); } +inline void Assembler::z_cgdbr(Register r1, FloatRegister r2, RoundingMode m) { emit_32( CGDBR_ZOPC | regt(r1, 24, 32) | rounding_mode(m, 16, 32) | freg(r2, 28, 32)); } + + + inline void Assembler::z_layz(Register r1, int64_t d2, Register b2) { z_layz(r1, d2, Z_R0, b2); } + inline void Assembler::z_lay(Register r1, int64_t d2, Register b2) { z_lay( r1, d2, Z_R0, b2); } + inline void Assembler::z_laz(Register r1, int64_t d2, Register b2) { z_laz( r1, d2, Z_R0, b2); } + inline void Assembler::z_la(Register r1, int64_t d2, Register b2) { z_la( r1, d2, Z_R0, b2); } + inline void Assembler::z_l(Register r1, int64_t d2, Register b2) { z_l( r1, d2, Z_R0, b2); } + inline void Assembler::z_ly(Register r1, int64_t d2, Register b2) { z_ly( r1, d2, Z_R0, b2); } + inline void Assembler::z_lg(Register r1, int64_t d2, Register b2) { z_lg( r1, d2, Z_R0, b2); } + inline void Assembler::z_st(Register r1, int64_t d2, Register b2) { z_st( r1, d2, Z_R0, b2); } + inline void Assembler::z_sty(Register r1, int64_t d2, Register b2) { z_sty( r1, d2, Z_R0, b2); } + inline void Assembler::z_stg(Register r1, int64_t d2, Register b2) { z_stg( r1, d2, Z_R0, b2); } + inline void Assembler::z_lgf(Register r1, int64_t d2, Register b2) { z_lgf( r1, d2, Z_R0, b2); } + inline void Assembler::z_lgh(Register r1, int64_t d2, Register b2) { z_lgh( r1, d2, Z_R0, b2); } + inline void Assembler::z_llgh(Register r1, int64_t d2, Register b2) { z_llgh(r1, d2, Z_R0, b2); } + inline void Assembler::z_llgf(Register r1, int64_t d2, Register b2) { z_llgf(r1, d2, Z_R0, b2); } + inline void Assembler::z_lgb(Register r1, int64_t d2, Register b2) { z_lgb( r1, d2, Z_R0, b2); } + inline void Assembler::z_cl( Register r1, int64_t d2, Register b2) { z_cl( r1, d2, Z_R0, b2); } + inline void Assembler::z_c(Register r1, int64_t d2, Register b2) { z_c( r1, d2, Z_R0, b2); } + inline void Assembler::z_cg(Register r1, int64_t d2, Register b2) { z_cg( r1, d2, Z_R0, b2); } + inline void Assembler::z_sh(Register r1, int64_t d2, Register b2) { z_sh( r1, d2, Z_R0, b2); } + inline void Assembler::z_shy(Register r1, int64_t d2, Register b2) { z_shy( r1, d2, Z_R0, b2); } + inline void Assembler::z_ste(FloatRegister r1, int64_t d2, Register b2) { z_ste( r1, d2, Z_R0, b2); } + inline void Assembler::z_std(FloatRegister r1, int64_t d2, Register b2) { z_std( r1, d2, Z_R0, b2); } + inline void Assembler::z_stdy(FloatRegister r1, int64_t d2, Register b2) { z_stdy(r1, d2, Z_R0, b2); } + inline void Assembler::z_stey(FloatRegister r1, int64_t d2, Register b2) { z_stey(r1, d2, Z_R0, b2); } + inline void Assembler::z_ld(FloatRegister r1, int64_t d2, Register b2) { z_ld( r1, d2, Z_R0, b2); } + inline void Assembler::z_ldy(FloatRegister r1, int64_t d2, Register b2) { z_ldy( r1, d2, Z_R0, b2); } + inline void Assembler::z_le(FloatRegister r1, int64_t d2, Register b2) { z_le( r1, d2, Z_R0, b2); } + inline void Assembler::z_ley(FloatRegister r1, int64_t d2, Register b2) { z_ley( r1, d2, Z_R0, b2); } + inline void Assembler::z_agf(Register r1, int64_t d2, Register b2) { z_agf( r1, d2, Z_R0, b2); } + inline void Assembler::z_cvd(Register r1, int64_t d2, Register b2) { z_cvd( r1, d2, Z_R0, b2); } + inline void Assembler::z_cvdg(Register r1, int64_t d2, Register b2) { z_cvdg(r1, d2, Z_R0, b2); } + +// signed comparison +inline void Assembler::z_crj(Register r1, Register r2, branch_condition m3, Label& L) { z_crj( r1, r2, m3, target(L)); } +inline void Assembler::z_cgrj(Register r1, Register r2, branch_condition m3, Label& L) { z_cgrj( r1, r2, m3, target(L)); } +inline void Assembler::z_cij(Register r1, int64_t i2, branch_condition m3, Label& L) { z_cij( r1, i2, m3, target(L)); } +inline void Assembler::z_cgij(Register r1, int64_t i2, branch_condition m3, Label& L) { z_cgij( r1, i2, m3, target(L)); } +// unsigned comparison +inline void Assembler::z_clrj(Register r1, Register r2, branch_condition m3, Label& L) { z_clrj( r1, r2, m3, target(L)); } +inline void Assembler::z_clgrj(Register r1, Register r2, branch_condition m3, Label& L) { z_clgrj(r1, r2, m3, target(L)); } +inline void Assembler::z_clij(Register r1, int64_t i2, branch_condition m3, Label& L) { z_clij( r1, i2, m3, target(L)); } +inline void Assembler::z_clgij(Register r1, int64_t i2, branch_condition m3, Label& L) { z_clgij(r1, i2, m3, target(L)); } + +// branch never (nop), branch always +inline void Assembler::z_nop() { z_bcr(bcondNop, Z_R0); } +inline void Assembler::z_br(Register r2) { assert(r2 != Z_R0, "nop if target is Z_R0, use z_nop() instead"); z_bcr(bcondAlways, r2 ); } + +inline void Assembler::z_exrl(Register r1, Label& L) { z_exrl(r1, target(L)); } // z10 +inline void Assembler::z_larl(Register r1, Label& L) { z_larl(r1, target(L)); } +inline void Assembler::z_bru( Label& L) { z_brc(bcondAlways,target(L)); } +inline void Assembler::z_brul( Label& L) { z_brcl(bcondAlways,target(L)); } +inline void Assembler::z_brul( address a) { z_brcl(bcondAlways,a); } +inline void Assembler::z_brh( Label& L) { z_brc(bcondHigh,target(L)); } +inline void Assembler::z_brl( Label& L) { z_brc(bcondLow,target(L)); } +inline void Assembler::z_bre( Label& L) { z_brc(bcondEqual,target(L)); } +inline void Assembler::z_brnh( Label& L) { z_brc(bcondNotHigh,target(L)); } +inline void Assembler::z_brnl( Label& L) { z_brc(bcondNotLow,target(L)); } +inline void Assembler::z_brne( Label& L) { z_brc(bcondNotEqual,target(L)); } +inline void Assembler::z_brz( Label& L) { z_brc(bcondZero,target(L)); } +inline void Assembler::z_brnz( Label& L) { z_brc(bcondNotZero,target(L)); } +inline void Assembler::z_braz( Label& L) { z_brc(bcondAllZero,target(L)); } +inline void Assembler::z_brnaz( Label& L) { z_brc(bcondNotAllZero,target(L)); } +inline void Assembler::z_brnp( Label& L) { z_brc( bcondNotPositive, target( L)); } +inline void Assembler::z_btrue( Label& L) { z_brc(bcondAllOne,target(L)); } +inline void Assembler::z_bfalse(Label& L) { z_brc(bcondAllZero,target(L)); } +inline void Assembler::z_brno( Label& L) { z_brc(bcondNotOrdered,target(L)); } +inline void Assembler::z_brc( branch_condition m, Label& L) { z_brc(m, target(L)); } +inline void Assembler::z_brcl(branch_condition m, Label& L) { z_brcl(m, target(L)); } + + +// Instruction must start at passed address. +// Extra check for illtraps with ID. +inline int Assembler::instr_len(unsigned char *instr) { + switch ((*instr) >> 6) { + case 0: return 2; + case 1: // fallthru + case 2: return 4; + case 3: return 6; + default: + // Control can't reach here. + // The switch expression examines just the leftmost two bytes + // of the main opcode. So the range of values is just [0..3]. + // Having a default clause makes the compiler happy. + ShouldNotReachHere(); + return 0; + } +} + +// Move instr at pc right-justified into passed long int. +// Return instr len in bytes as function result. +// Note: 2-byte instr don't really need to be accessed unsigned +// because leftmost two bits are always zero. We use +// unsigned here for reasons of uniformity. +inline unsigned int Assembler::get_instruction(unsigned char *pc, unsigned long *instr) { + unsigned int len = instr_len(pc); + switch (len) { + case 2: + *instr = *(unsigned short*) pc; break; + case 4: + *instr = *(unsigned int*) pc; break; + case 6: + // Must compose this case. Can't read 8 bytes and then cut off + // the rightmost two bytes. Could potentially access + // unallocated storage. + *instr = ((unsigned long)(*(unsigned int*) pc)) << 16 | + ((unsigned long)*(unsigned short*) (pc + 4)); break; + default: + // Control can't reach here. + // The length as returned from instr_len() can only be 2, 4, or 6 bytes. + // Having a default clause makes the compiler happy. + ShouldNotReachHere(); + break; + } + return len; +} + +// Check if instruction is the expected one. +// Instruction is passed right-justified in inst. +inline bool Assembler::is_equal(unsigned long inst, unsigned long idef) { + unsigned long imask; + + if ((idef >> 32) != 0) { // 6byte instructions + switch (idef >> 40) { // select mask by main opcode + case 0xc0: + case 0xc2: + case 0xc4: + case 0xc6: imask = RIL_MASK; break; + case 0xec: + if ((idef & 0x00ffL) < 0x0080L) { + imask = RIE_MASK; + break; + } + // Fallthru for other sub opcodes. + default: +#ifdef ASSERT + tty->print_cr("inst = %16.16lx, idef = %16.16lx, imask unspecified\n", inst, idef); + tty->flush(); +#endif + ShouldNotReachHere(); + return 0; + } + } else { // 4-byte instructions + switch (idef >> 24) { // Select mask by main opcode. + case 0x84: + case 0x85: imask = RSI_MASK; break; + case 0xa5: + case 0xa7: imask = RI_MASK; break; + case 0xb9: imask = RRE_MASK; break; // RRE_MASK or RRF_MASK. Opcode fields are at same bit positions. + default: { +#ifdef ASSERT + tty->print_cr("inst = %16.16lx, idef = %16.16lx, imask unspecified\n", inst, idef); + tty->flush(); +#endif + ShouldNotReachHere(); + return 0; + } + } + } + return (inst & imask) == idef; +} + +inline bool Assembler::is_equal(unsigned long inst, unsigned long idef, unsigned long imask) { + assert(imask != 0, "valid instruction mask required"); + return (inst & imask) == idef; +} + +// Check if instruction is the expected one. +// Instruction is passed left-justified at inst. +inline bool Assembler::is_equal(address iloc, unsigned long idef) { + unsigned long inst; + get_instruction(iloc, &inst); + return is_equal(inst, idef); +} + +inline bool Assembler::is_equal(address iloc, unsigned long idef, unsigned long imask) { + unsigned long inst; + get_instruction(iloc, &inst); + return is_equal(inst, idef, imask); +} + +inline bool Assembler::is_sigtrap_range_check(address pc) { + return (is_equal(pc, CLFIT_ZOPC, RIE_MASK) || is_equal(pc, CLRT_ZOPC, RRE_MASK)); +} + +inline bool Assembler::is_sigtrap_zero_check(address pc) { + return (is_equal(pc, CGIT_ZOPC, RIE_MASK) || is_equal(pc, CIT_ZOPC, RIE_MASK)); +} + +#endif // CPU_S390_VM_ASSEMBLER_S390_INLINE_HPP diff --git a/hotspot/src/cpu/s390/vm/bytes_s390.hpp b/hotspot/src/cpu/s390/vm/bytes_s390.hpp new file mode 100644 index 00000000000..6209624d335 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/bytes_s390.hpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_BYTES_S390_HPP +#define CPU_S390_VM_BYTES_S390_HPP + +#include "memory/allocation.hpp" + +class Bytes: AllStatic { + public: + // Efficient reading and writing of unaligned unsigned data in + // platform-specific byte ordering. + + // Use regular load and store for unaligned access. + // + // On z/Architecture, unaligned loads and stores are supported when using the + // "traditional" load (LH, L/LY, LG) and store (STH, ST/STY, STG) instructions. + // The penalty for unaligned access is just very few (two or three) ticks, + // plus another few (two or three) ticks if the access crosses a cache line boundary. + // + // In short, it makes no sense on z/Architecture to piecemeal get or put unaligned data. + + // Returns true if the byte ordering used by Java is different from + // the native byte ordering of the underlying machine. + // z/Arch is big endian, thus, a swap between native and Java ordering + // is always a no-op. + static inline bool is_Java_byte_ordering_different() { return false; } + + // Only swap on little endian machines => suffix `_le'. + static inline u2 swap_u2_le(u2 x) { return x; } + static inline u4 swap_u4_le(u4 x) { return x; } + static inline u8 swap_u8_le(u8 x) { return x; } + + static inline u2 get_native_u2(address p) { return *(u2*)p; } + static inline u4 get_native_u4(address p) { return *(u4*)p; } + static inline u8 get_native_u8(address p) { return *(u8*)p; } + + static inline void put_native_u2(address p, u2 x) { *(u2*)p = x; } + static inline void put_native_u4(address p, u4 x) { *(u4*)p = x; } + static inline void put_native_u8(address p, u8 x) { *(u8*)p = x; } + +#include "bytes_linux_s390.inline.hpp" + + // Efficient reading and writing of unaligned unsigned data in Java byte ordering (i.e. big-endian ordering) + static inline u2 get_Java_u2(address p) { return get_native_u2(p); } + static inline u4 get_Java_u4(address p) { return get_native_u4(p); } + static inline u8 get_Java_u8(address p) { return get_native_u8(p); } + + static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, x); } + static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, x); } + static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, x); } +}; + +#endif // CPU_S390_VM_BYTES_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/c1_CodeStubs_s390.cpp b/hotspot/src/cpu/s390/vm/c1_CodeStubs_s390.cpp new file mode 100644 index 00000000000..4d429960798 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/c1_CodeStubs_s390.cpp @@ -0,0 +1,497 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "nativeInst_s390.hpp" +#include "runtime/sharedRuntime.hpp" +#include "utilities/macros.hpp" +#include "vmreg_s390.inline.hpp" +#if INCLUDE_ALL_GCS +#include "gc/g1/g1SATBCardTableModRefBS.hpp" +#endif // INCLUDE_ALL_GCS + +#define __ ce->masm()-> +#undef CHECK_BAILOUT +#define CHECK_BAILOUT() { if (ce->compilation()->bailed_out()) return; } + +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, + bool throw_index_out_of_bounds_exception) : + _throw_index_out_of_bounds_exception(throw_index_out_of_bounds_exception), + _index(index) { + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); +} + +void RangeCheckStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_info->deoptimize_on_exception()) { + address a = Runtime1::entry_for (Runtime1::predicate_failed_trap_id); + ce->emit_call_c(a); + CHECK_BAILOUT(); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); + return; + } + + // Pass the array index in Z_R1_scratch which is not managed by linear scan. + if (_index->is_cpu_register()) { + __ lgr_if_needed(Z_R1_scratch, _index->as_register()); + } else { + __ load_const_optimized(Z_R1_scratch, _index->as_jint()); + } + + Runtime1::StubID stub_id; + if (_throw_index_out_of_bounds_exception) { + stub_id = Runtime1::throw_index_exception_id; + } else { + stub_id = Runtime1::throw_range_check_failed_id; + } + ce->emit_call_c(Runtime1::entry_for (stub_id)); + CHECK_BAILOUT(); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { + _info = new CodeEmitInfo(info); +} + +void PredicateFailedStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + address a = Runtime1::entry_for (Runtime1::predicate_failed_trap_id); + ce->emit_call_c(a); + CHECK_BAILOUT(); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +void CounterOverflowStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + Metadata *m = _method->as_constant_ptr()->as_metadata(); + bool success = __ set_metadata_constant(m, Z_R1_scratch); + if (!success) { + ce->compilation()->bailout("const section overflow"); + return; + } + ce->store_parameter(/*_method->as_register()*/ Z_R1_scratch, 1); + ce->store_parameter(_bci, 0); + ce->emit_call_c(Runtime1::entry_for (Runtime1::counter_overflow_id)); + CHECK_BAILOUT(); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ branch_optimized(Assembler::bcondAlways, _continuation); +} + +void DivByZeroStub::emit_code(LIR_Assembler* ce) { + if (_offset != -1) { + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + } + __ bind(_entry); + ce->emit_call_c(Runtime1::entry_for (Runtime1::throw_div0_exception_id)); + CHECK_BAILOUT(); + ce->add_call_info_here(_info); + debug_only(__ should_not_reach_here()); +} + +void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { + address a; + if (_info->deoptimize_on_exception()) { + // Deoptimize, do not throw the exception, because it is probably wrong to do it here. + a = Runtime1::entry_for (Runtime1::predicate_failed_trap_id); + } else { + a = Runtime1::entry_for (Runtime1::throw_null_pointer_exception_id); + } + + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + __ bind(_entry); + ce->emit_call_c(a); + CHECK_BAILOUT(); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +// Note: pass object in Z_R1_scratch +void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_obj->is_valid()) { + __ z_lgr(Z_R1_scratch, _obj->as_register()); // _obj contains the optional argument to the stub + } + address a = Runtime1::entry_for (_stub); + ce->emit_call_c(a); + CHECK_BAILOUT(); + ce->add_call_info_here(_info); + debug_only(__ should_not_reach_here()); +} + +NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) { + _result = result; + _klass = klass; + _klass_reg = klass_reg; + _info = new CodeEmitInfo(info); + assert(stub_id == Runtime1::new_instance_id || + stub_id == Runtime1::fast_new_instance_id || + stub_id == Runtime1::fast_new_instance_init_check_id, + "need new_instance id"); + _stub_id = stub_id; +} + +void NewInstanceStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + assert(_klass_reg->as_register() == Z_R11, "call target expects klass in Z_R11"); + address a = Runtime1::entry_for (_stub_id); + ce->emit_call_c(a); + CHECK_BAILOUT(); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == Z_R2, "callee returns result in Z_R2,"); + __ z_brul(_continuation); +} + +NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _length = length; + _result = result; + _info = new CodeEmitInfo(info); +} + +void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + assert(_klass_reg->as_register() == Z_R11, "call target expects klass in Z_R11"); + __ lgr_if_needed(Z_R13, _length->as_register()); + address a = Runtime1::entry_for (Runtime1::new_type_array_id); + ce->emit_call_c(a); + CHECK_BAILOUT(); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == Z_R2, "callee returns result in Z_R2,"); + __ z_brul(_continuation); +} + +NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _length = length; + _result = result; + _info = new CodeEmitInfo(info); +} + +void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + assert(_klass_reg->as_register() == Z_R11, "call target expects klass in Z_R11"); + __ lgr_if_needed(Z_R13, _length->as_register()); + address a = Runtime1::entry_for (Runtime1::new_object_array_id); + ce->emit_call_c(a); + CHECK_BAILOUT(); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == Z_R2, "callee returns result in Z_R2,"); + __ z_brul(_continuation); +} + +MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) + : MonitorAccessStub(obj_reg, lock_reg) { + _info = new CodeEmitInfo(info); +} + +void MonitorEnterStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + Runtime1::StubID enter_id; + if (ce->compilation()->has_fpu_code()) { + enter_id = Runtime1::monitorenter_id; + } else { + enter_id = Runtime1::monitorenter_nofpu_id; + } + __ lgr_if_needed(Z_R1_scratch, _obj_reg->as_register()); + __ lgr_if_needed(Z_R13, _lock_reg->as_register()); // See LIRGenerator::syncTempOpr(). + ce->emit_call_c(Runtime1::entry_for (enter_id)); + CHECK_BAILOUT(); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ branch_optimized(Assembler::bcondAlways, _continuation); +} + +void MonitorExitStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + // Move address of the BasicObjectLock into Z_R1_scratch. + if (_compute_lock) { + // Lock_reg was destroyed by fast unlocking attempt => recompute it. + ce->monitor_address(_monitor_ix, FrameMap::as_opr(Z_R1_scratch)); + } else { + __ lgr_if_needed(Z_R1_scratch, _lock_reg->as_register()); + } + // Note: non-blocking leaf routine => no call info needed. + Runtime1::StubID exit_id; + if (ce->compilation()->has_fpu_code()) { + exit_id = Runtime1::monitorexit_id; + } else { + exit_id = Runtime1::monitorexit_nofpu_id; + } + ce->emit_call_c(Runtime1::entry_for (exit_id)); + CHECK_BAILOUT(); + __ branch_optimized(Assembler::bcondAlways, _continuation); +} + +// Implementation of patching: +// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes). +// - Replace original code with a call to the stub. +// At Runtime: +// - call to stub, jump to runtime. +// - in runtime: Preserve all registers (especially objects, i.e., source and destination object). +// - in runtime: After initializing class, restore original code, reexecute instruction. + +int PatchingStub::_patch_info_offset = - (12 /* load const */ + 2 /*BASR*/); + +void PatchingStub::align_patch_site(MacroAssembler* masm) { +#ifndef PRODUCT + const char* bc; + switch (_id) { + case access_field_id: bc = "patch site (access_field)"; break; + case load_klass_id: bc = "patch site (load_klass)"; break; + case load_mirror_id: bc = "patch site (load_mirror)"; break; + case load_appendix_id: bc = "patch site (load_appendix)"; break; + default: bc = "patch site (unknown patch id)"; break; + } + masm->block_comment(bc); +#endif + + masm->align(round_to(NativeGeneralJump::instruction_size, wordSize)); +} + +void PatchingStub::emit_code(LIR_Assembler* ce) { + // Copy original code here. + assert(NativeGeneralJump::instruction_size <= _bytes_to_copy && _bytes_to_copy <= 0xFF, + "not enough room for call"); + + NearLabel call_patch; + + int being_initialized_entry = __ offset(); + + if (_id == load_klass_id) { + // Produce a copy of the load klass instruction for use by the case being initialized. +#ifdef ASSERT + address start = __ pc(); +#endif + AddressLiteral addrlit((intptr_t)0, metadata_Relocation::spec(_index)); + __ load_const(_obj, addrlit); + +#ifdef ASSERT + for (int i = 0; i < _bytes_to_copy; i++) { + address ptr = (address)(_pc_start + i); + int a_byte = (*ptr) & 0xFF; + assert(a_byte == *start++, "should be the same code"); + } +#endif + } else if (_id == load_mirror_id || _id == load_appendix_id) { + // Produce a copy of the load mirror instruction for use by the case being initialized. +#ifdef ASSERT + address start = __ pc(); +#endif + AddressLiteral addrlit((intptr_t)0, oop_Relocation::spec(_index)); + __ load_const(_obj, addrlit); + +#ifdef ASSERT + for (int i = 0; i < _bytes_to_copy; i++) { + address ptr = (address)(_pc_start + i); + int a_byte = (*ptr) & 0xFF; + assert(a_byte == *start++, "should be the same code"); + } +#endif + } else { + // Make a copy the code which is going to be patched. + for (int i = 0; i < _bytes_to_copy; i++) { + address ptr = (address)(_pc_start + i); + int a_byte = (*ptr) & 0xFF; + __ emit_int8 (a_byte); + } + } + + address end_of_patch = __ pc(); + int bytes_to_skip = 0; + if (_id == load_mirror_id) { + int offset = __ offset(); + if (CommentedAssembly) { + __ block_comment(" being_initialized check"); + } + + // Static field accesses have special semantics while the class + // initializer is being run, so we emit a test which can be used to + // check that this code is being executed by the initializing + // thread. + assert(_obj != noreg, "must be a valid register"); + assert(_index >= 0, "must have oop index"); + __ z_lg(Z_R1_scratch, java_lang_Class::klass_offset_in_bytes(), _obj); + __ z_cg(Z_thread, Address(Z_R1_scratch, InstanceKlass::init_thread_offset())); + __ branch_optimized(Assembler::bcondNotEqual, call_patch); + + // Load_klass patches may execute the patched code before it's + // copied back into place so we need to jump back into the main + // code of the nmethod to continue execution. + __ branch_optimized(Assembler::bcondAlways, _patch_site_continuation); + + // Make sure this extra code gets skipped. + bytes_to_skip += __ offset() - offset; + } + + // Now emit the patch record telling the runtime how to find the + // pieces of the patch. We only need 3 bytes but to help the disassembler + // we make the data look like a the following add instruction: + // A R1, D2(X2, B2) + // which requires 4 bytes. + int sizeof_patch_record = 4; + bytes_to_skip += sizeof_patch_record; + + // Emit the offsets needed to find the code to patch. + int being_initialized_entry_offset = __ offset() - being_initialized_entry + sizeof_patch_record; + + // Emit the patch record: opcode of the add followed by 3 bytes patch record data. + __ emit_int8((int8_t)(A_ZOPC>>24)); + __ emit_int8(being_initialized_entry_offset); + __ emit_int8(bytes_to_skip); + __ emit_int8(_bytes_to_copy); + address patch_info_pc = __ pc(); + assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info"); + + address entry = __ pc(); + NativeGeneralJump::insert_unconditional((address)_pc_start, entry); + address target = NULL; + relocInfo::relocType reloc_type = relocInfo::none; + switch (_id) { + case access_field_id: target = Runtime1::entry_for (Runtime1::access_field_patching_id); break; + case load_klass_id: target = Runtime1::entry_for (Runtime1::load_klass_patching_id); reloc_type = relocInfo::metadata_type; break; + case load_mirror_id: target = Runtime1::entry_for (Runtime1::load_mirror_patching_id); reloc_type = relocInfo::oop_type; break; + case load_appendix_id: target = Runtime1::entry_for (Runtime1::load_appendix_patching_id); reloc_type = relocInfo::oop_type; break; + default: ShouldNotReachHere(); + } + __ bind(call_patch); + + if (CommentedAssembly) { + __ block_comment("patch entry point"); + } + // Cannot use call_c_opt() because its size is not constant. + __ load_const(Z_R1_scratch, target); // Must not optimize in order to keep constant _patch_info_offset constant. + __ z_basr(Z_R14, Z_R1_scratch); + assert(_patch_info_offset == (patch_info_pc - __ pc()), "must not change"); + ce->add_call_info_here(_info); + __ z_brcl(Assembler::bcondAlways, _patch_site_entry); + if (_id == load_klass_id || _id == load_mirror_id || _id == load_appendix_id) { + CodeSection* cs = __ code_section(); + address pc = (address)_pc_start; + RelocIterator iter(cs, pc, pc + 1); + relocInfo::change_reloc_info_for_address(&iter, (address) pc, reloc_type, relocInfo::none); + } +} + +void DeoptimizeStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + __ load_const_optimized(Z_R1_scratch, _trap_request); // Pass trap request in Z_R1_scratch. + ce->emit_call_c(Runtime1::entry_for (Runtime1::deoptimize_id)); + CHECK_BAILOUT(); + ce->add_call_info_here(_info); + DEBUG_ONLY(__ should_not_reach_here()); +} + +void ArrayCopyStub::emit_code(LIR_Assembler* ce) { + // Slow case: call to native. + __ bind(_entry); + __ lgr_if_needed(Z_ARG1, src()->as_register()); + __ lgr_if_needed(Z_ARG2, src_pos()->as_register()); + __ lgr_if_needed(Z_ARG3, dst()->as_register()); + __ lgr_if_needed(Z_ARG4, dst_pos()->as_register()); + __ lgr_if_needed(Z_ARG5, length()->as_register()); + + // Must align calls sites, otherwise they can't be updated atomically on MP hardware. + ce->align_call(lir_static_call); + + assert((__ offset() + NativeCall::call_far_pcrelative_displacement_offset) % NativeCall::call_far_pcrelative_displacement_alignment == 0, + "must be aligned"); + + ce->emit_static_call_stub(); + + // Prepend each BRASL with a nop. + __ relocate(relocInfo::static_call_type); + __ z_nop(); + __ z_brasl(Z_R14, SharedRuntime::get_resolve_static_call_stub()); + ce->add_call_info_here(info()); + ce->verify_oop_map(info()); + +#ifndef PRODUCT + __ load_const_optimized(Z_R1_scratch, (address)&Runtime1::_arraycopy_slowcase_cnt); + __ add2mem_32(Address(Z_R1_scratch), 1, Z_R0_scratch); +#endif + + __ branch_optimized(Assembler::bcondAlways, _continuation); +} + + +/////////////////////////////////////////////////////////////////////////////////// +#if INCLUDE_ALL_GCS + +void G1PreBarrierStub::emit_code(LIR_Assembler* ce) { + // At this point we know that marking is in progress. + // If do_load() is true then we have to emit the + // load of the previous value; otherwise it has already + // been loaded into _pre_val. + __ bind(_entry); + ce->check_reserved_argument_area(16); // RT stub needs 2 spill slots. + assert(pre_val()->is_register(), "Precondition."); + + Register pre_val_reg = pre_val()->as_register(); + + if (do_load()) { + ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/); + } + + __ z_ltgr(Z_R1_scratch, pre_val_reg); // Pass oop in Z_R1_scratch to Runtime1::g1_pre_barrier_slow_id. + __ branch_optimized(Assembler::bcondZero, _continuation); + ce->emit_call_c(Runtime1::entry_for (Runtime1::g1_pre_barrier_slow_id)); + CHECK_BAILOUT(); + __ branch_optimized(Assembler::bcondAlways, _continuation); +} + +void G1PostBarrierStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + ce->check_reserved_argument_area(16); // RT stub needs 2 spill slots. + assert(addr()->is_register(), "Precondition."); + assert(new_val()->is_register(), "Precondition."); + Register new_val_reg = new_val()->as_register(); + __ z_ltgr(new_val_reg, new_val_reg); + __ branch_optimized(Assembler::bcondZero, _continuation); + __ z_lgr(Z_R1_scratch, addr()->as_pointer_register()); + ce->emit_call_c(Runtime1::entry_for (Runtime1::g1_post_barrier_slow_id)); + CHECK_BAILOUT(); + __ branch_optimized(Assembler::bcondAlways, _continuation); +} + +#endif // INCLUDE_ALL_GCS + +#undef __ diff --git a/hotspot/src/cpu/s390/vm/c1_Defs_s390.hpp b/hotspot/src/cpu/s390/vm/c1_Defs_s390.hpp new file mode 100644 index 00000000000..e5bcc6776a6 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/c1_Defs_s390.hpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_C1_DEFS_S390_HPP +#define CPU_S390_VM_C1_DEFS_S390_HPP + +// Native word offsets from memory address (big endian). +enum { + pd_lo_word_offset_in_bytes = BytesPerInt, + pd_hi_word_offset_in_bytes = 0 +}; + +// Explicit rounding operations are not required to implement the strictFP mode. +enum { + pd_strict_fp_requires_explicit_rounding = false +}; + +// registers +enum { + pd_nof_cpu_regs_frame_map = 16, // Number of registers used during code emission. + // Treat all registers as caller save (values of callee save are hard to find if caller is in runtime). + // unallocated: Z_thread, Z_fp, Z_SP, Z_R0_scratch, Z_R1_scratch, Z_R14 + pd_nof_cpu_regs_unallocated = 6, + pd_nof_caller_save_cpu_regs_frame_map = pd_nof_cpu_regs_frame_map - pd_nof_cpu_regs_unallocated, // Number of cpu registers killed by calls. + pd_nof_cpu_regs_reg_alloc = pd_nof_caller_save_cpu_regs_frame_map, // Number of registers that are visible to register allocator. + pd_nof_cpu_regs_linearscan = pd_nof_cpu_regs_frame_map,// Number of registers visible linear scan. + pd_first_cpu_reg = 0, + pd_last_cpu_reg = 9, // Others are unallocated (see FrameMap::initialize()). + + pd_nof_fpu_regs_frame_map = 16, // Number of registers used during code emission. + pd_nof_fcpu_regs_unallocated = 1, // Leave Z_F15 unallocated and use it as scratch register. + pd_nof_caller_save_fpu_regs_frame_map = pd_nof_fpu_regs_frame_map - pd_nof_fcpu_regs_unallocated, // Number of fpu registers killed by calls. + pd_nof_fpu_regs_reg_alloc = pd_nof_caller_save_fpu_regs_frame_map, // Number of registers that are visible to register allocator. + pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // Number of registers visible to linear scan. + pd_first_fpu_reg = pd_nof_cpu_regs_frame_map, + pd_last_fpu_reg = pd_first_fpu_reg + pd_nof_fpu_regs_frame_map - pd_nof_fcpu_regs_unallocated - 1, + + pd_nof_xmm_regs_linearscan = 0, + pd_nof_caller_save_xmm_regs = 0, + pd_first_xmm_reg = -1, + pd_last_xmm_reg = -1 +}; + +// For debug info: a float value in a register is saved in single precision by runtime stubs. +enum { + pd_float_saved_as_double = false +}; + +#endif // CPU_S390_VM_C1_DEFS_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/c1_FpuStackSim_s390.hpp b/hotspot/src/cpu/s390/vm/c1_FpuStackSim_s390.hpp new file mode 100644 index 00000000000..b791562ed03 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/c1_FpuStackSim_s390.hpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_C1_FPUSTACKSIM_S390_HPP +#define CPU_S390_VM_C1_FPUSTACKSIM_S390_HPP + +// No FPU stack on ZARCH_64 +class FpuStackSim; + +#endif // CPU_S390_VM_C1_FPUSTACKSIM_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/c1_FrameMap_s390.cpp b/hotspot/src/cpu/s390/vm/c1_FrameMap_s390.cpp new file mode 100644 index 00000000000..f19b2d59902 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/c1_FrameMap_s390.cpp @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIR.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_s390.inline.hpp" + + +const int FrameMap::pd_c_runtime_reserved_arg_size = 7; + +LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool outgoing) { + LIR_Opr opr = LIR_OprFact::illegalOpr; + VMReg r_1 = reg->first(); + VMReg r_2 = reg->second(); + if (r_1->is_stack()) { + // Convert stack slot to an SP offset. + // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value + // so we must add it in here. + int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; + opr = LIR_OprFact::address(new LIR_Address(Z_SP_opr, st_off, type)); + } else if (r_1->is_Register()) { + Register reg = r_1->as_Register(); + if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) { + opr = as_long_opr(reg); + } else if (type == T_OBJECT || type == T_ARRAY) { + opr = as_oop_opr(reg); + } else if (type == T_METADATA) { + opr = as_metadata_opr(reg); + } else { + opr = as_opr(reg); + } + } else if (r_1->is_FloatRegister()) { + assert(type == T_DOUBLE || type == T_FLOAT, "wrong type"); + FloatRegister f = r_1->as_FloatRegister(); + if (type == T_FLOAT) { + opr = as_float_opr(f); + } else { + opr = as_double_opr(f); + } + } else { + ShouldNotReachHere(); + } + return opr; +} + +// FrameMap +//-------------------------------------------------------- + +FloatRegister FrameMap::_fpu_rnr2reg [FrameMap::nof_fpu_regs]; // mapping c1 regnr. -> FloatRegister +int FrameMap::_fpu_reg2rnr [FrameMap::nof_fpu_regs]; // mapping assembler encoding -> c1 regnr. + +// Some useful constant RInfo's: +LIR_Opr FrameMap::Z_R0_opr; +LIR_Opr FrameMap::Z_R1_opr; +LIR_Opr FrameMap::Z_R2_opr; +LIR_Opr FrameMap::Z_R3_opr; +LIR_Opr FrameMap::Z_R4_opr; +LIR_Opr FrameMap::Z_R5_opr; +LIR_Opr FrameMap::Z_R6_opr; +LIR_Opr FrameMap::Z_R7_opr; +LIR_Opr FrameMap::Z_R8_opr; +LIR_Opr FrameMap::Z_R9_opr; +LIR_Opr FrameMap::Z_R10_opr; +LIR_Opr FrameMap::Z_R11_opr; +LIR_Opr FrameMap::Z_R12_opr; +LIR_Opr FrameMap::Z_R13_opr; +LIR_Opr FrameMap::Z_R14_opr; +LIR_Opr FrameMap::Z_R15_opr; + +LIR_Opr FrameMap::Z_R0_oop_opr; +LIR_Opr FrameMap::Z_R1_oop_opr; +LIR_Opr FrameMap::Z_R2_oop_opr; +LIR_Opr FrameMap::Z_R3_oop_opr; +LIR_Opr FrameMap::Z_R4_oop_opr; +LIR_Opr FrameMap::Z_R5_oop_opr; +LIR_Opr FrameMap::Z_R6_oop_opr; +LIR_Opr FrameMap::Z_R7_oop_opr; +LIR_Opr FrameMap::Z_R8_oop_opr; +LIR_Opr FrameMap::Z_R9_oop_opr; +LIR_Opr FrameMap::Z_R10_oop_opr; +LIR_Opr FrameMap::Z_R11_oop_opr; +LIR_Opr FrameMap::Z_R12_oop_opr; +LIR_Opr FrameMap::Z_R13_oop_opr; +LIR_Opr FrameMap::Z_R14_oop_opr; +LIR_Opr FrameMap::Z_R15_oop_opr; + +LIR_Opr FrameMap::Z_R0_metadata_opr; +LIR_Opr FrameMap::Z_R1_metadata_opr; +LIR_Opr FrameMap::Z_R2_metadata_opr; +LIR_Opr FrameMap::Z_R3_metadata_opr; +LIR_Opr FrameMap::Z_R4_metadata_opr; +LIR_Opr FrameMap::Z_R5_metadata_opr; +LIR_Opr FrameMap::Z_R6_metadata_opr; +LIR_Opr FrameMap::Z_R7_metadata_opr; +LIR_Opr FrameMap::Z_R8_metadata_opr; +LIR_Opr FrameMap::Z_R9_metadata_opr; +LIR_Opr FrameMap::Z_R10_metadata_opr; +LIR_Opr FrameMap::Z_R11_metadata_opr; +LIR_Opr FrameMap::Z_R12_metadata_opr; +LIR_Opr FrameMap::Z_R13_metadata_opr; +LIR_Opr FrameMap::Z_R14_metadata_opr; +LIR_Opr FrameMap::Z_R15_metadata_opr; + +LIR_Opr FrameMap::Z_SP_opr; +LIR_Opr FrameMap::Z_FP_opr; + +LIR_Opr FrameMap::Z_R2_long_opr; +LIR_Opr FrameMap::Z_R10_long_opr; +LIR_Opr FrameMap::Z_R11_long_opr; + +LIR_Opr FrameMap::Z_F0_opr; +LIR_Opr FrameMap::Z_F0_double_opr; + + +LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, }; +LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, }; + + +// c1 rnr -> FloatRegister +FloatRegister FrameMap::nr2floatreg (int rnr) { + assert(_init_done, "tables not initialized"); + debug_only(fpu_range_check(rnr);) + return _fpu_rnr2reg[rnr]; +} + +void FrameMap::map_float_register(int rnr, FloatRegister reg) { + debug_only(fpu_range_check(rnr);) + debug_only(fpu_range_check(reg->encoding());) + _fpu_rnr2reg[rnr] = reg; // mapping c1 regnr. -> FloatRegister + _fpu_reg2rnr[reg->encoding()] = rnr; // mapping assembler encoding -> c1 regnr. +} + +void FrameMap::initialize() { + assert(!_init_done, "once"); + + DEBUG_ONLY(int allocated = 0;) + DEBUG_ONLY(int unallocated = 0;) + + // Register usage: + // Z_thread (Z_R8) + // Z_fp (Z_R9) + // Z_SP (Z_R15) + DEBUG_ONLY(allocated++); map_register(0, Z_R2); + DEBUG_ONLY(allocated++); map_register(1, Z_R3); + DEBUG_ONLY(allocated++); map_register(2, Z_R4); + DEBUG_ONLY(allocated++); map_register(3, Z_R5); + DEBUG_ONLY(allocated++); map_register(4, Z_R6); + DEBUG_ONLY(allocated++); map_register(5, Z_R7); + DEBUG_ONLY(allocated++); map_register(6, Z_R10); + DEBUG_ONLY(allocated++); map_register(7, Z_R11); + DEBUG_ONLY(allocated++); map_register(8, Z_R12); + DEBUG_ONLY(allocated++); map_register(9, Z_R13); // <- last register visible in RegAlloc + DEBUG_ONLY(unallocated++); map_register(11, Z_R0); // Z_R0_scratch + DEBUG_ONLY(unallocated++); map_register(12, Z_R1); // Z_R1_scratch + DEBUG_ONLY(unallocated++); map_register(10, Z_R14); // return pc; TODO: Try to let c1/c2 allocate R14. + + // The following registers are usually unavailable. + DEBUG_ONLY(unallocated++); map_register(13, Z_R8); + DEBUG_ONLY(unallocated++); map_register(14, Z_R9); + DEBUG_ONLY(unallocated++); map_register(15, Z_R15); + assert(allocated-1 == pd_last_cpu_reg, "wrong number/mapping of allocated CPU registers"); + assert(unallocated == pd_nof_cpu_regs_unallocated, "wrong number of unallocated CPU registers"); + assert(nof_cpu_regs == allocated+unallocated, "wrong number of CPU registers"); + + int j = 0; + for (int i = 0; i < nof_fpu_regs; i++) { + if (as_FloatRegister(i) == Z_fscratch_1) continue; // unallocated + map_float_register(j++, as_FloatRegister(i)); + } + assert(j == nof_fpu_regs-1, "missed one fpu reg?"); + map_float_register(j++, Z_fscratch_1); + + _init_done = true; + + Z_R0_opr = as_opr(Z_R0); + Z_R1_opr = as_opr(Z_R1); + Z_R2_opr = as_opr(Z_R2); + Z_R3_opr = as_opr(Z_R3); + Z_R4_opr = as_opr(Z_R4); + Z_R5_opr = as_opr(Z_R5); + Z_R6_opr = as_opr(Z_R6); + Z_R7_opr = as_opr(Z_R7); + Z_R8_opr = as_opr(Z_R8); + Z_R9_opr = as_opr(Z_R9); + Z_R10_opr = as_opr(Z_R10); + Z_R11_opr = as_opr(Z_R11); + Z_R12_opr = as_opr(Z_R12); + Z_R13_opr = as_opr(Z_R13); + Z_R14_opr = as_opr(Z_R14); + Z_R15_opr = as_opr(Z_R15); + + Z_R0_oop_opr = as_oop_opr(Z_R0); + Z_R1_oop_opr = as_oop_opr(Z_R1); + Z_R2_oop_opr = as_oop_opr(Z_R2); + Z_R3_oop_opr = as_oop_opr(Z_R3); + Z_R4_oop_opr = as_oop_opr(Z_R4); + Z_R5_oop_opr = as_oop_opr(Z_R5); + Z_R6_oop_opr = as_oop_opr(Z_R6); + Z_R7_oop_opr = as_oop_opr(Z_R7); + Z_R8_oop_opr = as_oop_opr(Z_R8); + Z_R9_oop_opr = as_oop_opr(Z_R9); + Z_R10_oop_opr = as_oop_opr(Z_R10); + Z_R11_oop_opr = as_oop_opr(Z_R11); + Z_R12_oop_opr = as_oop_opr(Z_R12); + Z_R13_oop_opr = as_oop_opr(Z_R13); + Z_R14_oop_opr = as_oop_opr(Z_R14); + Z_R15_oop_opr = as_oop_opr(Z_R15); + + Z_R0_metadata_opr = as_metadata_opr(Z_R0); + Z_R1_metadata_opr = as_metadata_opr(Z_R1); + Z_R2_metadata_opr = as_metadata_opr(Z_R2); + Z_R3_metadata_opr = as_metadata_opr(Z_R3); + Z_R4_metadata_opr = as_metadata_opr(Z_R4); + Z_R5_metadata_opr = as_metadata_opr(Z_R5); + Z_R6_metadata_opr = as_metadata_opr(Z_R6); + Z_R7_metadata_opr = as_metadata_opr(Z_R7); + Z_R8_metadata_opr = as_metadata_opr(Z_R8); + Z_R9_metadata_opr = as_metadata_opr(Z_R9); + Z_R10_metadata_opr = as_metadata_opr(Z_R10); + Z_R11_metadata_opr = as_metadata_opr(Z_R11); + Z_R12_metadata_opr = as_metadata_opr(Z_R12); + Z_R13_metadata_opr = as_metadata_opr(Z_R13); + Z_R14_metadata_opr = as_metadata_opr(Z_R14); + Z_R15_metadata_opr = as_metadata_opr(Z_R15); + + // TODO: needed? Or can we make Z_R9 available for linear scan allocation. + Z_FP_opr = as_pointer_opr(Z_fp); + Z_SP_opr = as_pointer_opr(Z_SP); + + Z_R2_long_opr = LIR_OprFact::double_cpu(cpu_reg2rnr(Z_R2), cpu_reg2rnr(Z_R2)); + Z_R10_long_opr = LIR_OprFact::double_cpu(cpu_reg2rnr(Z_R10), cpu_reg2rnr(Z_R10)); + Z_R11_long_opr = LIR_OprFact::double_cpu(cpu_reg2rnr(Z_R11), cpu_reg2rnr(Z_R11)); + + Z_F0_opr = as_float_opr(Z_F0); + Z_F0_double_opr = as_double_opr(Z_F0); + + // All allocated cpu regs are caller saved. + for (int c1rnr = 0; c1rnr < max_nof_caller_save_cpu_regs; c1rnr++) { + _caller_save_cpu_regs[c1rnr] = as_opr(cpu_rnr2reg(c1rnr)); + } + + // All allocated fpu regs are caller saved. + for (int c1rnr = 0; c1rnr < nof_caller_save_fpu_regs; c1rnr++) { + _caller_save_fpu_regs[c1rnr] = as_float_opr(nr2floatreg(c1rnr)); + } +} + +Address FrameMap::make_new_address(ByteSize sp_offset) const { + return Address(Z_SP, sp_offset); +} + +VMReg FrameMap::fpu_regname (int n) { + return nr2floatreg(n)->as_VMReg(); +} + +LIR_Opr FrameMap::stack_pointer() { + return Z_SP_opr; +} + +// JSR 292 +// On ZARCH_64, there is no need to save the SP, because neither +// method handle intrinsics nor compiled lambda forms modify it. +LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() { + return LIR_OprFact::illegalOpr; +} + +bool FrameMap::validate_frame() { + return true; +} diff --git a/hotspot/src/cpu/s390/vm/c1_FrameMap_s390.hpp b/hotspot/src/cpu/s390/vm/c1_FrameMap_s390.hpp new file mode 100644 index 00000000000..8c0c1b596cf --- /dev/null +++ b/hotspot/src/cpu/s390/vm/c1_FrameMap_s390.hpp @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_C1_FRAMEMAP_S390_HPP +#define CPU_S390_VM_C1_FRAMEMAP_S390_HPP + + public: + + enum { + nof_reg_args = 5, // Registers Z_ARG1 - Z_ARG5 are available for parameter passing. + first_available_sp_in_frame = frame::z_abi_16_size, + frame_pad_in_bytes = 0 + }; + + static const int pd_c_runtime_reserved_arg_size; + + static LIR_Opr Z_R0_opr; + static LIR_Opr Z_R1_opr; + static LIR_Opr Z_R2_opr; + static LIR_Opr Z_R3_opr; + static LIR_Opr Z_R4_opr; + static LIR_Opr Z_R5_opr; + static LIR_Opr Z_R6_opr; + static LIR_Opr Z_R7_opr; + static LIR_Opr Z_R8_opr; + static LIR_Opr Z_R9_opr; + static LIR_Opr Z_R10_opr; + static LIR_Opr Z_R11_opr; + static LIR_Opr Z_R12_opr; + static LIR_Opr Z_R13_opr; + static LIR_Opr Z_R14_opr; + static LIR_Opr Z_R15_opr; + + static LIR_Opr Z_R0_oop_opr; + static LIR_Opr Z_R1_oop_opr; + static LIR_Opr Z_R2_oop_opr; + static LIR_Opr Z_R3_oop_opr; + static LIR_Opr Z_R4_oop_opr; + static LIR_Opr Z_R5_oop_opr; + static LIR_Opr Z_R6_oop_opr; + static LIR_Opr Z_R7_oop_opr; + static LIR_Opr Z_R8_oop_opr; + static LIR_Opr Z_R9_oop_opr; + static LIR_Opr Z_R10_oop_opr; + static LIR_Opr Z_R11_oop_opr; + static LIR_Opr Z_R12_oop_opr; + static LIR_Opr Z_R13_oop_opr; + static LIR_Opr Z_R14_oop_opr; + static LIR_Opr Z_R15_oop_opr; + + static LIR_Opr Z_R0_metadata_opr; + static LIR_Opr Z_R1_metadata_opr; + static LIR_Opr Z_R2_metadata_opr; + static LIR_Opr Z_R3_metadata_opr; + static LIR_Opr Z_R4_metadata_opr; + static LIR_Opr Z_R5_metadata_opr; + static LIR_Opr Z_R6_metadata_opr; + static LIR_Opr Z_R7_metadata_opr; + static LIR_Opr Z_R8_metadata_opr; + static LIR_Opr Z_R9_metadata_opr; + static LIR_Opr Z_R10_metadata_opr; + static LIR_Opr Z_R11_metadata_opr; + static LIR_Opr Z_R12_metadata_opr; + static LIR_Opr Z_R13_metadata_opr; + static LIR_Opr Z_R14_metadata_opr; + static LIR_Opr Z_R15_metadata_opr; + + static LIR_Opr Z_SP_opr; + static LIR_Opr Z_FP_opr; + + static LIR_Opr Z_R2_long_opr; + static LIR_Opr Z_R10_long_opr; + static LIR_Opr Z_R11_long_opr; + + static LIR_Opr Z_F0_opr; + static LIR_Opr Z_F0_double_opr; + + private: + static FloatRegister _fpu_rnr2reg [FrameMap::nof_fpu_regs]; // mapping c1 regnr. -> FloatRegister + static int _fpu_reg2rnr [FrameMap::nof_fpu_regs]; // mapping assembler encoding -> c1 regnr. + + static void map_float_register(int rnr, FloatRegister reg); + + // FloatRegister -> c1 rnr + static int fpu_reg2rnr (FloatRegister reg) { + assert(_init_done, "tables not initialized"); + int c1rnr = _fpu_reg2rnr[reg->encoding()]; + debug_only(fpu_range_check(c1rnr);) + return c1rnr; + } + + public: + + static LIR_Opr as_long_opr(Register r) { + return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); + } + static LIR_Opr as_pointer_opr(Register r) { + return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); + } + + static LIR_Opr as_float_opr(FloatRegister r) { + return LIR_OprFact::single_fpu(fpu_reg2rnr(r)); + } + static LIR_Opr as_double_opr(FloatRegister r) { + return LIR_OprFact::double_fpu(fpu_reg2rnr(r)); + } + + static FloatRegister nr2floatreg (int rnr); + + static VMReg fpu_regname (int n); + + // No callee saved registers (saved values are not accessible if callee is in runtime). + static bool is_caller_save_register (LIR_Opr opr) { return true; } + static bool is_caller_save_register (Register r) { return true; } + + static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; } + static int last_cpu_reg() { return pd_last_cpu_reg; } + +#endif // CPU_S390_VM_C1_FRAMEMAP_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/c1_LIRAssembler_s390.cpp b/hotspot/src/cpu/s390/vm/c1_LIRAssembler_s390.cpp new file mode 100644 index 00000000000..b7d50636a89 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/c1_LIRAssembler_s390.cpp @@ -0,0 +1,3037 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_Compilation.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "c1/c1_ValueStack.hpp" +#include "ci/ciArrayKlass.hpp" +#include "ci/ciInstance.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/cardTableModRefBS.hpp" +#include "nativeInst_s390.hpp" +#include "oops/objArrayKlass.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_s390.inline.hpp" + +#define __ _masm-> + +#ifndef PRODUCT +#undef __ +#define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm) : _masm)-> +#endif + +//------------------------------------------------------------ + +bool LIR_Assembler::is_small_constant(LIR_Opr opr) { + // Not used on ZARCH_64 + ShouldNotCallThis(); + return false; +} + +LIR_Opr LIR_Assembler::receiverOpr() { + return FrameMap::Z_R2_oop_opr; +} + +LIR_Opr LIR_Assembler::osrBufferPointer() { + return FrameMap::Z_R2_opr; +} + +int LIR_Assembler::initial_frame_size_in_bytes() const { + return in_bytes(frame_map()->framesize_in_bytes()); +} + +// Inline cache check: done before the frame is built. +// The inline cached class is in Z_inline_cache(Z_R9). +// We fetch the class of the receiver and compare it with the cached class. +// If they do not match we jump to the slow case. +int LIR_Assembler::check_icache() { + Register receiver = receiverOpr()->as_register(); + int offset = __ offset(); + __ inline_cache_check(receiver, Z_inline_cache); + return offset; +} + +void LIR_Assembler::osr_entry() { + // On-stack-replacement entry sequence (interpreter frame layout described in interpreter_sparc.cpp): + // + // 1. Create a new compiled activation. + // 2. Initialize local variables in the compiled activation. The expression stack must be empty + // at the osr_bci; it is not initialized. + // 3. Jump to the continuation address in compiled code to resume execution. + + // OSR entry point + offsets()->set_value(CodeOffsets::OSR_Entry, code_offset()); + BlockBegin* osr_entry = compilation()->hir()->osr_entry(); + ValueStack* entry_state = osr_entry->end()->state(); + int number_of_locks = entry_state->locks_size(); + + // Create a frame for the compiled activation. + __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); + + // OSR buffer is + // + // locals[nlocals-1..0] + // monitors[number_of_locks-1..0] + // + // Locals is a direct copy of the interpreter frame so in the osr buffer + // the first slot in the local array is the last local from the interpreter + // and the last slot is local[0] (receiver) from the interpreter + // + // Similarly with locks. The first lock slot in the osr buffer is the nth lock + // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock + // in the interpreter frame (the method lock if a sync method) + + // Initialize monitors in the compiled activation. + // I0: pointer to osr buffer + // + // All other registers are dead at this point and the locals will be + // copied into place by code emitted in the IR. + + Register OSR_buf = osrBufferPointer()->as_register(); + { assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below"); + int monitor_offset = BytesPerWord * method()->max_locals() + + (2 * BytesPerWord) * (number_of_locks - 1); + // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in + // the OSR buffer using 2 word entries: first the lock and then + // the oop. + for (int i = 0; i < number_of_locks; i++) { + int slot_offset = monitor_offset - ((i * 2) * BytesPerWord); + // Verify the interpreter's monitor has a non-null object. + __ asm_assert_mem8_isnot_zero(slot_offset + 1*BytesPerWord, OSR_buf, "locked object is NULL", __LINE__); + // Copy the lock field into the compiled activation. + __ z_lg(Z_R1_scratch, slot_offset + 0, OSR_buf); + __ z_stg(Z_R1_scratch, frame_map()->address_for_monitor_lock(i)); + __ z_lg(Z_R1_scratch, slot_offset + 1*BytesPerWord, OSR_buf); + __ z_stg(Z_R1_scratch, frame_map()->address_for_monitor_object(i)); + } + } +} + +// -------------------------------------------------------------------------------------------- + +address LIR_Assembler::emit_call_c(address a) { + __ align_call_far_patchable(__ pc()); + address call_addr = __ call_c_opt(a); + if (call_addr == NULL) { + bailout("const section overflow"); + } + return call_addr; +} + +int LIR_Assembler::emit_exception_handler() { + // If the last instruction is a call (typically to do a throw which + // is coming at the end after block reordering) the return address + // must still point into the code area in order to avoid assertion + // failures when searching for the corresponding bci. => Add a nop. + // (was bug 5/14/1999 - gri) + __ nop(); + + // Generate code for exception handler. + address handler_base = __ start_a_stub(exception_handler_size); + if (handler_base == NULL) { + // Not enough space left for the handler. + bailout("exception handler overflow"); + return -1; + } + + int offset = code_offset(); + + address a = Runtime1::entry_for (Runtime1::handle_exception_from_callee_id); + address call_addr = emit_call_c(a); + CHECK_BAILOUT_(-1); + __ should_not_reach_here(); + guarantee(code_offset() - offset <= exception_handler_size, "overflow"); + __ end_a_stub(); + + return offset; +} + +// Emit the code to remove the frame from the stack in the exception +// unwind path. +int LIR_Assembler::emit_unwind_handler() { +#ifndef PRODUCT + if (CommentedAssembly) { + _masm->block_comment("Unwind handler"); + } +#endif + + int offset = code_offset(); + Register exception_oop_callee_saved = Z_R10; // Z_R10 is callee-saved. + Register Rtmp1 = Z_R11; + Register Rtmp2 = Z_R12; + + // Fetch the exception from TLS and clear out exception related thread state. + Address exc_oop_addr = Address(Z_thread, JavaThread::exception_oop_offset()); + Address exc_pc_addr = Address(Z_thread, JavaThread::exception_pc_offset()); + __ z_lg(Z_EXC_OOP, exc_oop_addr); + __ clear_mem(exc_oop_addr, sizeof(oop)); + __ clear_mem(exc_pc_addr, sizeof(intptr_t)); + + __ bind(_unwind_handler_entry); + __ verify_not_null_oop(Z_EXC_OOP); + if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { + __ lgr_if_needed(exception_oop_callee_saved, Z_EXC_OOP); // Preserve the exception. + } + + // Preform needed unlocking. + MonitorExitStub* stub = NULL; + if (method()->is_synchronized()) { + // Runtime1::monitorexit_id expects lock address in Z_R1_scratch. + LIR_Opr lock = FrameMap::as_opr(Z_R1_scratch); + monitor_address(0, lock); + stub = new MonitorExitStub(lock, true, 0); + __ unlock_object(Rtmp1, Rtmp2, lock->as_register(), *stub->entry()); + __ bind(*stub->continuation()); + } + + if (compilation()->env()->dtrace_method_probes()) { + ShouldNotReachHere(); // Not supported. +#if 0 + __ mov(rdi, r15_thread); + __ mov_metadata(rsi, method()->constant_encoding()); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit))); +#endif + } + + if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { + __ lgr_if_needed(Z_EXC_OOP, exception_oop_callee_saved); // Restore the exception. + } + + // Remove the activation and dispatch to the unwind handler. + __ pop_frame(); + __ z_lg(Z_EXC_PC, _z_abi16(return_pc), Z_SP); + + // Z_EXC_OOP: exception oop + // Z_EXC_PC: exception pc + + // Dispatch to the unwind logic. + __ load_const_optimized(Z_R5, Runtime1::entry_for (Runtime1::unwind_exception_id)); + __ z_br(Z_R5); + + // Emit the slow path assembly. + if (stub != NULL) { + stub->emit_code(this); + } + + return offset; +} + +int LIR_Assembler::emit_deopt_handler() { + // If the last instruction is a call (typically to do a throw which + // is coming at the end after block reordering) the return address + // must still point into the code area in order to avoid assertion + // failures when searching for the corresponding bci. => Add a nop. + // (was bug 5/14/1999 - gri) + __ nop(); + + // Generate code for exception handler. + address handler_base = __ start_a_stub(deopt_handler_size); + if (handler_base == NULL) { + // Not enough space left for the handler. + bailout("deopt handler overflow"); + return -1; + } int offset = code_offset(); + // Size must be constant (see HandlerImpl::emit_deopt_handler). + __ load_const(Z_R1_scratch, SharedRuntime::deopt_blob()->unpack()); + __ call(Z_R1_scratch); + guarantee(code_offset() - offset <= deopt_handler_size, "overflow"); + __ end_a_stub(); + + return offset; +} + +void LIR_Assembler::jobject2reg(jobject o, Register reg) { + if (o == NULL) { + __ clear_reg(reg, true/*64bit*/, false/*set cc*/); // Must not kill cc set by cmove. + } else { + AddressLiteral a = __ allocate_oop_address(o); + bool success = __ load_oop_from_toc(reg, a, reg); + if (!success) { + bailout("const section overflow"); + } + } +} + +void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) { + // Allocate a new index in table to hold the object once it's been patched. + int oop_index = __ oop_recorder()->allocate_oop_index(NULL); + PatchingStub* patch = new PatchingStub(_masm, patching_id(info), oop_index); + + AddressLiteral addrlit((intptr_t)0, oop_Relocation::spec(oop_index)); + assert(addrlit.rspec().type() == relocInfo::oop_type, "must be an oop reloc"); + // The NULL will be dynamically patched later so the sequence to + // load the address literal must not be optimized. + __ load_const(reg, addrlit); + + patching_epilog(patch, lir_patch_normal, reg, info); +} + +void LIR_Assembler::metadata2reg(Metadata* md, Register reg) { + bool success = __ set_metadata_constant(md, reg); + if (!success) { + bailout("const section overflow"); + return; + } +} + +void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo *info) { + // Allocate a new index in table to hold the klass once it's been patched. + int index = __ oop_recorder()->allocate_metadata_index(NULL); + PatchingStub* patch = new PatchingStub(_masm, PatchingStub::load_klass_id, index); + AddressLiteral addrlit((intptr_t)0, metadata_Relocation::spec(index)); + assert(addrlit.rspec().type() == relocInfo::metadata_type, "must be an metadata reloc"); + // The NULL will be dynamically patched later so the sequence to + // load the address literal must not be optimized. + __ load_const(reg, addrlit); + + patching_epilog(patch, lir_patch_normal, reg, info); +} + +void LIR_Assembler::emit_op3(LIR_Op3* op) { + switch (op->code()) { + case lir_idiv: + case lir_irem: + arithmetic_idiv(op->code(), + op->in_opr1(), + op->in_opr2(), + op->in_opr3(), + op->result_opr(), + op->info()); + break; + default: ShouldNotReachHere(); break; + } +} + + +void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { +#ifdef ASSERT + assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label"); + if (op->block() != NULL) { _branch_target_blocks.append(op->block()); } + if (op->ublock() != NULL) { _branch_target_blocks.append(op->ublock()); } +#endif + + if (op->cond() == lir_cond_always) { + if (op->info() != NULL) { add_debug_info_for_branch(op->info()); } + __ branch_optimized(Assembler::bcondAlways, *(op->label())); + } else { + Assembler::branch_condition acond = Assembler::bcondZero; + if (op->code() == lir_cond_float_branch) { + assert(op->ublock() != NULL, "must have unordered successor"); + __ branch_optimized(Assembler::bcondNotOrdered, *(op->ublock()->label())); + } + switch (op->cond()) { + case lir_cond_equal: acond = Assembler::bcondEqual; break; + case lir_cond_notEqual: acond = Assembler::bcondNotEqual; break; + case lir_cond_less: acond = Assembler::bcondLow; break; + case lir_cond_lessEqual: acond = Assembler::bcondNotHigh; break; + case lir_cond_greaterEqual: acond = Assembler::bcondNotLow; break; + case lir_cond_greater: acond = Assembler::bcondHigh; break; + case lir_cond_belowEqual: acond = Assembler::bcondNotHigh; break; + case lir_cond_aboveEqual: acond = Assembler::bcondNotLow; break; + default: ShouldNotReachHere(); + } + __ branch_optimized(acond,*(op->label())); + } +} + + +void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + LIR_Opr src = op->in_opr(); + LIR_Opr dest = op->result_opr(); + + switch (op->bytecode()) { + case Bytecodes::_i2l: + __ move_reg_if_needed(dest->as_register_lo(), T_LONG, src->as_register(), T_INT); + break; + + case Bytecodes::_l2i: + __ move_reg_if_needed(dest->as_register(), T_INT, src->as_register_lo(), T_LONG); + break; + + case Bytecodes::_i2b: + __ move_reg_if_needed(dest->as_register(), T_BYTE, src->as_register(), T_INT); + break; + + case Bytecodes::_i2c: + __ move_reg_if_needed(dest->as_register(), T_CHAR, src->as_register(), T_INT); + break; + + case Bytecodes::_i2s: + __ move_reg_if_needed(dest->as_register(), T_SHORT, src->as_register(), T_INT); + break; + + case Bytecodes::_f2d: + assert(dest->is_double_fpu(), "check"); + __ move_freg_if_needed(dest->as_double_reg(), T_DOUBLE, src->as_float_reg(), T_FLOAT); + break; + + case Bytecodes::_d2f: + assert(dest->is_single_fpu(), "check"); + __ move_freg_if_needed(dest->as_float_reg(), T_FLOAT, src->as_double_reg(), T_DOUBLE); + break; + + case Bytecodes::_i2f: + __ z_cefbr(dest->as_float_reg(), src->as_register()); + break; + + case Bytecodes::_i2d: + __ z_cdfbr(dest->as_double_reg(), src->as_register()); + break; + + case Bytecodes::_l2f: + __ z_cegbr(dest->as_float_reg(), src->as_register_lo()); + break; + case Bytecodes::_l2d: + __ z_cdgbr(dest->as_double_reg(), src->as_register_lo()); + break; + + case Bytecodes::_f2i: + case Bytecodes::_f2l: { + Label done; + FloatRegister Rsrc = src->as_float_reg(); + Register Rdst = (op->bytecode() == Bytecodes::_f2i ? dest->as_register() : dest->as_register_lo()); + __ clear_reg(Rdst, true, false); + __ z_cebr(Rsrc, Rsrc); + __ z_brno(done); // NaN -> 0 + if (op->bytecode() == Bytecodes::_f2i) { + __ z_cfebr(Rdst, Rsrc, Assembler::to_zero); + } else { // op->bytecode() == Bytecodes::_f2l + __ z_cgebr(Rdst, Rsrc, Assembler::to_zero); + } + __ bind(done); + } + break; + + case Bytecodes::_d2i: + case Bytecodes::_d2l: { + Label done; + FloatRegister Rsrc = src->as_double_reg(); + Register Rdst = (op->bytecode() == Bytecodes::_d2i ? dest->as_register() : dest->as_register_lo()); + __ clear_reg(Rdst, true, false); // Don't set CC. + __ z_cdbr(Rsrc, Rsrc); + __ z_brno(done); // NaN -> 0 + if (op->bytecode() == Bytecodes::_d2i) { + __ z_cfdbr(Rdst, Rsrc, Assembler::to_zero); + } else { // Bytecodes::_d2l + __ z_cgdbr(Rdst, Rsrc, Assembler::to_zero); + } + __ bind(done); + } + break; + + default: ShouldNotReachHere(); + } +} + +void LIR_Assembler::align_call(LIR_Code code) { + // End of call instruction must be 4 byte aligned. + int offset = __ offset(); + switch (code) { + case lir_icvirtual_call: + offset += MacroAssembler::load_const_from_toc_size(); + // no break + case lir_static_call: + case lir_optvirtual_call: + case lir_dynamic_call: + offset += NativeCall::call_far_pcrelative_displacement_offset; + break; + case lir_virtual_call: // currently, sparc-specific for niagara + default: ShouldNotReachHere(); + } + if ((offset & (NativeCall::call_far_pcrelative_displacement_alignment-1)) != 0) { + __ nop(); + } +} + +void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { + assert((__ offset() + NativeCall::call_far_pcrelative_displacement_offset) % NativeCall::call_far_pcrelative_displacement_alignment == 0, + "must be aligned (offset=%d)", __ offset()); + assert(rtype == relocInfo::none || + rtype == relocInfo::opt_virtual_call_type || + rtype == relocInfo::static_call_type, "unexpected rtype"); + // Prepend each BRASL with a nop. + __ relocate(rtype); + __ z_nop(); + __ z_brasl(Z_R14, op->addr()); + add_call_info(code_offset(), op->info()); +} + +void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { + address virtual_call_oop_addr = NULL; + AddressLiteral empty_ic((address) Universe::non_oop_word()); + virtual_call_oop_addr = __ pc(); + bool success = __ load_const_from_toc(Z_inline_cache, empty_ic); + if (!success) { + bailout("const section overflow"); + return; + } + + // CALL to fixup routine. Fixup routine uses ScopeDesc info + // to determine who we intended to call. + __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr)); + call(op, relocInfo::none); +} + +// not supported +void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { + ShouldNotReachHere(); +} + +void LIR_Assembler::move_regs(Register from_reg, Register to_reg) { + if (from_reg != to_reg) __ z_lgr(to_reg, from_reg); +} + +void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) { + assert(src->is_constant(), "should not call otherwise"); + assert(dest->is_stack(), "should not call otherwise"); + LIR_Const* c = src->as_constant_ptr(); + + unsigned int lmem = 0; + unsigned int lcon = 0; + int64_t cbits = 0; + Address dest_addr; + switch (c->type()) { + case T_INT: // fall through + case T_FLOAT: + dest_addr = frame_map()->address_for_slot(dest->single_stack_ix()); + lmem = 4; lcon = 4; cbits = c->as_jint_bits(); + break; + + case T_ADDRESS: + dest_addr = frame_map()->address_for_slot(dest->single_stack_ix()); + lmem = 8; lcon = 4; cbits = c->as_jint_bits(); + break; + + case T_OBJECT: + dest_addr = frame_map()->address_for_slot(dest->single_stack_ix()); + if (c->as_jobject() == NULL) { + __ store_const(dest_addr, (int64_t)NULL_WORD, 8, 8); + } else { + jobject2reg(c->as_jobject(), Z_R1_scratch); + __ reg2mem_opt(Z_R1_scratch, dest_addr, true); + } + return; + + case T_LONG: // fall through + case T_DOUBLE: + dest_addr = frame_map()->address_for_slot(dest->double_stack_ix()); + lmem = 8; lcon = 8; cbits = (int64_t)(c->as_jlong_bits()); + break; + + default: + ShouldNotReachHere(); + } + + __ store_const(dest_addr, cbits, lmem, lcon); +} + +void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) { + assert(src->is_constant(), "should not call otherwise"); + assert(dest->is_address(), "should not call otherwise"); + // See special case in LIRGenerator::do_StoreIndexed. + // T_BYTE: Special case for card mark store. + assert(type == T_BYTE || !dest->as_address_ptr()->index()->is_valid(), "not supported"); + LIR_Const* c = src->as_constant_ptr(); + Address addr = as_Address(dest->as_address_ptr()); + + int store_offset = -1; + unsigned int lmem = 0; + unsigned int lcon = 0; + int64_t cbits = 0; + switch (type) { + case T_INT: // fall through + case T_FLOAT: + lmem = 4; lcon = 4; cbits = c->as_jint_bits(); + break; + + case T_ADDRESS: + lmem = 8; lcon = 4; cbits = c->as_jint_bits(); + break; + + case T_OBJECT: // fall through + case T_ARRAY: + if (c->as_jobject() == NULL) { + if (UseCompressedOops && !wide) { + store_offset = __ store_const(addr, (int32_t)NULL_WORD, 4, 4); + } else { + store_offset = __ store_const(addr, (int64_t)NULL_WORD, 8, 8); + } + } else { + jobject2reg(c->as_jobject(), Z_R1_scratch); + if (UseCompressedOops && !wide) { + __ encode_heap_oop(Z_R1_scratch); + store_offset = __ reg2mem_opt(Z_R1_scratch, addr, false); + } else { + store_offset = __ reg2mem_opt(Z_R1_scratch, addr, true); + } + } + assert(store_offset >= 0, "check"); + break; + + case T_LONG: // fall through + case T_DOUBLE: + lmem = 8; lcon = 8; cbits = (int64_t)(c->as_jlong_bits()); + break; + + case T_BOOLEAN: // fall through + case T_BYTE: + lmem = 1; lcon = 1; cbits = (int8_t)(c->as_jint()); + break; + + case T_CHAR: // fall through + case T_SHORT: + lmem = 2; lcon = 2; cbits = (int16_t)(c->as_jint()); + break; + + default: + ShouldNotReachHere(); + }; + + // Index register is normally not supported, but for + // LIRGenerator::CardTableModRef_post_barrier we make an exception. + if (type == T_BYTE && dest->as_address_ptr()->index()->is_valid()) { + __ load_const_optimized(Z_R0_scratch, (int8_t)(c->as_jint())); + store_offset = __ offset(); + if (Immediate::is_uimm12(addr.disp())) { + __ z_stc(Z_R0_scratch, addr); + } else { + __ z_stcy(Z_R0_scratch, addr); + } + } + + if (store_offset == -1) { + store_offset = __ store_const(addr, cbits, lmem, lcon); + assert(store_offset >= 0, "check"); + } + + if (info != NULL) { + add_debug_info_for_null_check(store_offset, info); + } +} + +void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { + assert(src->is_constant(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + LIR_Const* c = src->as_constant_ptr(); + + switch (c->type()) { + case T_INT: { + assert(patch_code == lir_patch_none, "no patching handled here"); + __ load_const_optimized(dest->as_register(), c->as_jint()); + break; + } + + case T_ADDRESS: { + assert(patch_code == lir_patch_none, "no patching handled here"); + __ load_const_optimized(dest->as_register(), c->as_jint()); + break; + } + + case T_LONG: { + assert(patch_code == lir_patch_none, "no patching handled here"); + __ load_const_optimized(dest->as_register_lo(), (intptr_t)c->as_jlong()); + break; + } + + case T_OBJECT: { + if (patch_code != lir_patch_none) { + jobject2reg_with_patching(dest->as_register(), info); + } else { + jobject2reg(c->as_jobject(), dest->as_register()); + } + break; + } + + case T_METADATA: { + if (patch_code != lir_patch_none) { + klass2reg_with_patching(dest->as_register(), info); + } else { + metadata2reg(c->as_metadata(), dest->as_register()); + } + break; + } + + case T_FLOAT: { + Register toc_reg = Z_R1_scratch; + __ load_toc(toc_reg); + address const_addr = __ float_constant(c->as_jfloat()); + if (const_addr == NULL) { + bailout("const section overflow"); + break; + } + int displ = const_addr - _masm->code()->consts()->start(); + if (dest->is_single_fpu()) { + __ z_ley(dest->as_float_reg(), displ, toc_reg); + } else { + assert(dest->is_single_cpu(), "Must be a cpu register."); + __ z_ly(dest->as_register(), displ, toc_reg); + } + } + break; + + case T_DOUBLE: { + Register toc_reg = Z_R1_scratch; + __ load_toc(toc_reg); + address const_addr = __ double_constant(c->as_jdouble()); + if (const_addr == NULL) { + bailout("const section overflow"); + break; + } + int displ = const_addr - _masm->code()->consts()->start(); + if (dest->is_double_fpu()) { + __ z_ldy(dest->as_double_reg(), displ, toc_reg); + } else { + assert(dest->is_double_cpu(), "Must be a long register."); + __ z_lg(dest->as_register_lo(), displ, toc_reg); + } + } + break; + + default: + ShouldNotReachHere(); + } +} + +Address LIR_Assembler::as_Address(LIR_Address* addr) { + if (addr->base()->is_illegal()) { + Unimplemented(); + } + + Register base = addr->base()->as_pointer_register(); + + if (addr->index()->is_illegal()) { + return Address(base, addr->disp()); + } else if (addr->index()->is_cpu_register()) { + Register index = addr->index()->as_pointer_register(); + return Address(base, index, addr->disp()); + } else if (addr->index()->is_constant()) { + intptr_t addr_offset = addr->index()->as_constant_ptr()->as_jint() + addr->disp(); + return Address(base, addr_offset); + } else { + ShouldNotReachHere(); + return Address(); + } +} + +void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { + switch (type) { + case T_INT: + case T_FLOAT: { + Register tmp = Z_R1_scratch; + Address from = frame_map()->address_for_slot(src->single_stack_ix()); + Address to = frame_map()->address_for_slot(dest->single_stack_ix()); + __ mem2reg_opt(tmp, from, false); + __ reg2mem_opt(tmp, to, false); + break; + } + case T_ADDRESS: + case T_OBJECT: { + Register tmp = Z_R1_scratch; + Address from = frame_map()->address_for_slot(src->single_stack_ix()); + Address to = frame_map()->address_for_slot(dest->single_stack_ix()); + __ mem2reg_opt(tmp, from, true); + __ reg2mem_opt(tmp, to, true); + break; + } + case T_LONG: + case T_DOUBLE: { + Register tmp = Z_R1_scratch; + Address from = frame_map()->address_for_double_slot(src->double_stack_ix()); + Address to = frame_map()->address_for_double_slot(dest->double_stack_ix()); + __ mem2reg_opt(tmp, from, true); + __ reg2mem_opt(tmp, to, true); + break; + } + + default: + ShouldNotReachHere(); + } +} + +// 4-byte accesses only! Don't use it to access 8 bytes! +Address LIR_Assembler::as_Address_hi(LIR_Address* addr) { + ShouldNotCallThis(); + return 0; // unused +} + +// 4-byte accesses only! Don't use it to access 8 bytes! +Address LIR_Assembler::as_Address_lo(LIR_Address* addr) { + ShouldNotCallThis(); + return 0; // unused +} + +void LIR_Assembler::mem2reg(LIR_Opr src_opr, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, + CodeEmitInfo* info, bool wide, bool unaligned) { + + assert(type != T_METADATA, "load of metadata ptr not supported"); + LIR_Address* addr = src_opr->as_address_ptr(); + LIR_Opr to_reg = dest; + + Register src = addr->base()->as_pointer_register(); + Register disp_reg = Z_R0; + int disp_value = addr->disp(); + bool needs_patching = (patch_code != lir_patch_none); + + if (addr->base()->type() == T_OBJECT) { + __ verify_oop(src); + } + + PatchingStub* patch = NULL; + if (needs_patching) { + patch = new PatchingStub(_masm, PatchingStub::access_field_id); + assert(!to_reg->is_double_cpu() || + patch_code == lir_patch_none || + patch_code == lir_patch_normal, "patching doesn't match register"); + } + + if (addr->index()->is_illegal()) { + if (!Immediate::is_simm20(disp_value)) { + if (needs_patching) { + __ load_const(Z_R1_scratch, (intptr_t)0); + } else { + __ load_const_optimized(Z_R1_scratch, disp_value); + } + disp_reg = Z_R1_scratch; + disp_value = 0; + } + } else { + if (!Immediate::is_simm20(disp_value)) { + __ load_const_optimized(Z_R1_scratch, disp_value); + __ z_la(Z_R1_scratch, 0, Z_R1_scratch, addr->index()->as_register()); + disp_reg = Z_R1_scratch; + disp_value = 0; + } + disp_reg = addr->index()->as_pointer_register(); + } + + // Remember the offset of the load. The patching_epilog must be done + // before the call to add_debug_info, otherwise the PcDescs don't get + // entered in increasing order. + int offset = code_offset(); + + assert(disp_reg != Z_R0 || Immediate::is_simm20(disp_value), "should have set this up"); + + bool short_disp = Immediate::is_uimm12(disp_value); + + switch (type) { + case T_BOOLEAN: // fall through + case T_BYTE : __ z_lb(dest->as_register(), disp_value, disp_reg, src); break; + case T_CHAR : __ z_llgh(dest->as_register(), disp_value, disp_reg, src); break; + case T_SHORT : + if (short_disp) { + __ z_lh(dest->as_register(), disp_value, disp_reg, src); + } else { + __ z_lhy(dest->as_register(), disp_value, disp_reg, src); + } + break; + case T_INT : + if (short_disp) { + __ z_l(dest->as_register(), disp_value, disp_reg, src); + } else { + __ z_ly(dest->as_register(), disp_value, disp_reg, src); + } + break; + case T_ADDRESS: + if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) { + __ z_llgf(dest->as_register(), disp_value, disp_reg, src); + __ decode_klass_not_null(dest->as_register()); + } else { + __ z_lg(dest->as_register(), disp_value, disp_reg, src); + } + break; + case T_ARRAY : // fall through + case T_OBJECT: + { + if (UseCompressedOops && !wide) { + __ z_llgf(dest->as_register(), disp_value, disp_reg, src); + __ oop_decoder(dest->as_register(), dest->as_register(), true); + } else { + __ z_lg(dest->as_register(), disp_value, disp_reg, src); + } + break; + } + case T_FLOAT: + if (short_disp) { + __ z_le(dest->as_float_reg(), disp_value, disp_reg, src); + } else { + __ z_ley(dest->as_float_reg(), disp_value, disp_reg, src); + } + break; + case T_DOUBLE: + if (short_disp) { + __ z_ld(dest->as_double_reg(), disp_value, disp_reg, src); + } else { + __ z_ldy(dest->as_double_reg(), disp_value, disp_reg, src); + } + break; + case T_LONG : __ z_lg(dest->as_register_lo(), disp_value, disp_reg, src); break; + default : ShouldNotReachHere(); + } + if (type == T_ARRAY || type == T_OBJECT) { + __ verify_oop(dest->as_register()); + } + + if (patch != NULL) { + patching_epilog(patch, patch_code, src, info); + } + if (info != NULL) add_debug_info_for_null_check(offset, info); +} + +void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) { + assert(src->is_stack(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + + if (dest->is_single_cpu()) { + if (type == T_ARRAY || type == T_OBJECT) { + __ mem2reg_opt(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()), true); + __ verify_oop(dest->as_register()); + } else if (type == T_METADATA) { + __ mem2reg_opt(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()), true); + } else { + __ mem2reg_opt(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()), false); + } + } else if (dest->is_double_cpu()) { + Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix()); + __ mem2reg_opt(dest->as_register_lo(), src_addr_LO, true); + } else if (dest->is_single_fpu()) { + Address src_addr = frame_map()->address_for_slot(src->single_stack_ix()); + __ mem2freg_opt(dest->as_float_reg(), src_addr, false); + } else if (dest->is_double_fpu()) { + Address src_addr = frame_map()->address_for_slot(src->double_stack_ix()); + __ mem2freg_opt(dest->as_double_reg(), src_addr, true); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) { + assert(src->is_register(), "should not call otherwise"); + assert(dest->is_stack(), "should not call otherwise"); + + if (src->is_single_cpu()) { + const Address dst = frame_map()->address_for_slot(dest->single_stack_ix()); + if (type == T_OBJECT || type == T_ARRAY) { + __ verify_oop(src->as_register()); + __ reg2mem_opt(src->as_register(), dst, true); + } else if (type == T_METADATA) { + __ reg2mem_opt(src->as_register(), dst, true); + } else { + __ reg2mem_opt(src->as_register(), dst, false); + } + } else if (src->is_double_cpu()) { + Address dstLO = frame_map()->address_for_slot(dest->double_stack_ix()); + __ reg2mem_opt(src->as_register_lo(), dstLO, true); + } else if (src->is_single_fpu()) { + Address dst_addr = frame_map()->address_for_slot(dest->single_stack_ix()); + __ freg2mem_opt(src->as_float_reg(), dst_addr, false); + } else if (src->is_double_fpu()) { + Address dst_addr = frame_map()->address_for_slot(dest->double_stack_ix()); + __ freg2mem_opt(src->as_double_reg(), dst_addr, true); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::reg2reg(LIR_Opr from_reg, LIR_Opr to_reg) { + if (from_reg->is_float_kind() && to_reg->is_float_kind()) { + if (from_reg->is_double_fpu()) { + // double to double moves + assert(to_reg->is_double_fpu(), "should match"); + __ z_ldr(to_reg->as_double_reg(), from_reg->as_double_reg()); + } else { + // float to float moves + assert(to_reg->is_single_fpu(), "should match"); + __ z_ler(to_reg->as_float_reg(), from_reg->as_float_reg()); + } + } else if (!from_reg->is_float_kind() && !to_reg->is_float_kind()) { + if (from_reg->is_double_cpu()) { + __ z_lgr(to_reg->as_pointer_register(), from_reg->as_pointer_register()); + } else if (to_reg->is_double_cpu()) { + // int to int moves + __ z_lgr(to_reg->as_register_lo(), from_reg->as_register()); + } else { + // int to int moves + __ z_lgr(to_reg->as_register(), from_reg->as_register()); + } + } else { + ShouldNotReachHere(); + } + if (to_reg->type() == T_OBJECT || to_reg->type() == T_ARRAY) { + __ verify_oop(to_reg->as_register()); + } +} + +void LIR_Assembler::reg2mem(LIR_Opr from, LIR_Opr dest_opr, BasicType type, + LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, + bool wide, bool unaligned) { + assert(type != T_METADATA, "store of metadata ptr not supported"); + LIR_Address* addr = dest_opr->as_address_ptr(); + + Register dest = addr->base()->as_pointer_register(); + Register disp_reg = Z_R0; + int disp_value = addr->disp(); + bool needs_patching = (patch_code != lir_patch_none); + + if (addr->base()->is_oop_register()) { + __ verify_oop(dest); + } + + PatchingStub* patch = NULL; + if (needs_patching) { + patch = new PatchingStub(_masm, PatchingStub::access_field_id); + assert(!from->is_double_cpu() || + patch_code == lir_patch_none || + patch_code == lir_patch_normal, "patching doesn't match register"); + } + + assert(!needs_patching || (!Immediate::is_simm20(disp_value) && addr->index()->is_illegal()), "assumption"); + if (addr->index()->is_illegal()) { + if (!Immediate::is_simm20(disp_value)) { + if (needs_patching) { + __ load_const(Z_R1_scratch, (intptr_t)0); + } else { + __ load_const_optimized(Z_R1_scratch, disp_value); + } + disp_reg = Z_R1_scratch; + disp_value = 0; + } + } else { + if (!Immediate::is_simm20(disp_value)) { + __ load_const_optimized(Z_R1_scratch, disp_value); + __ z_la(Z_R1_scratch, 0, Z_R1_scratch, addr->index()->as_register()); + disp_reg = Z_R1_scratch; + disp_value = 0; + } + disp_reg = addr->index()->as_pointer_register(); + } + + assert(disp_reg != Z_R0 || Immediate::is_simm20(disp_value), "should have set this up"); + + if (type == T_ARRAY || type == T_OBJECT) { + __ verify_oop(from->as_register()); + } + + bool short_disp = Immediate::is_uimm12(disp_value); + + // Remember the offset of the store. The patching_epilog must be done + // before the call to add_debug_info_for_null_check, otherwise the PcDescs don't get + // entered in increasing order. + int offset = code_offset(); + switch (type) { + case T_BOOLEAN: // fall through + case T_BYTE : + if (short_disp) { + __ z_stc(from->as_register(), disp_value, disp_reg, dest); + } else { + __ z_stcy(from->as_register(), disp_value, disp_reg, dest); + } + break; + case T_CHAR : // fall through + case T_SHORT : + if (short_disp) { + __ z_sth(from->as_register(), disp_value, disp_reg, dest); + } else { + __ z_sthy(from->as_register(), disp_value, disp_reg, dest); + } + break; + case T_INT : + if (short_disp) { + __ z_st(from->as_register(), disp_value, disp_reg, dest); + } else { + __ z_sty(from->as_register(), disp_value, disp_reg, dest); + } + break; + case T_LONG : __ z_stg(from->as_register_lo(), disp_value, disp_reg, dest); break; + case T_ADDRESS: __ z_stg(from->as_register(), disp_value, disp_reg, dest); break; + break; + case T_ARRAY : // fall through + case T_OBJECT: + { + if (UseCompressedOops && !wide) { + Register compressed_src = Z_R14; + __ z_lgr(compressed_src, from->as_register()); + __ encode_heap_oop(compressed_src); + offset = code_offset(); + if (short_disp) { + __ z_st(compressed_src, disp_value, disp_reg, dest); + } else { + __ z_sty(compressed_src, disp_value, disp_reg, dest); + } + } else { + __ z_stg(from->as_register(), disp_value, disp_reg, dest); + } + break; + } + case T_FLOAT : + if (short_disp) { + __ z_ste(from->as_float_reg(), disp_value, disp_reg, dest); + } else { + __ z_stey(from->as_float_reg(), disp_value, disp_reg, dest); + } + break; + case T_DOUBLE: + if (short_disp) { + __ z_std(from->as_double_reg(), disp_value, disp_reg, dest); + } else { + __ z_stdy(from->as_double_reg(), disp_value, disp_reg, dest); + } + break; + default: ShouldNotReachHere(); + } + + if (patch != NULL) { + patching_epilog(patch, patch_code, dest, info); + } + + if (info != NULL) add_debug_info_for_null_check(offset, info); +} + + +void LIR_Assembler::return_op(LIR_Opr result) { + assert(result->is_illegal() || + (result->is_single_cpu() && result->as_register() == Z_R2) || + (result->is_double_cpu() && result->as_register_lo() == Z_R2) || + (result->is_single_fpu() && result->as_float_reg() == Z_F0) || + (result->is_double_fpu() && result->as_double_reg() == Z_F0), "convention"); + + AddressLiteral pp(os::get_polling_page()); + __ load_const_optimized(Z_R1_scratch, pp); + + // Pop the frame before the safepoint code. + int retPC_offset = initial_frame_size_in_bytes() + _z_abi16(return_pc); + if (Displacement::is_validDisp(retPC_offset)) { + __ z_lg(Z_R14, retPC_offset, Z_SP); + __ add2reg(Z_SP, initial_frame_size_in_bytes()); + } else { + __ add2reg(Z_SP, initial_frame_size_in_bytes()); + __ restore_return_pc(); + } + + // We need to mark the code position where the load from the safepoint + // polling page was emitted as relocInfo::poll_return_type here. + __ relocate(relocInfo::poll_return_type); + __ load_from_polling_page(Z_R1_scratch); + + __ z_br(Z_R14); // Return to caller. +} + +int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { + AddressLiteral pp(os::get_polling_page()); + __ load_const_optimized(tmp->as_register_lo(), pp); + guarantee(info != NULL, "Shouldn't be NULL"); + add_debug_info_for_branch(info); + int offset = __ offset(); + __ relocate(relocInfo::poll_type); + __ load_from_polling_page(tmp->as_register_lo()); + return offset; +} + +void LIR_Assembler::emit_static_call_stub() { + + // Stub is fixed up when the corresponding call is converted from calling + // compiled code to calling interpreted code. + + address call_pc = __ pc(); + address stub = __ start_a_stub(call_stub_size); + if (stub == NULL) { + bailout("static call stub overflow"); + return; + } + + int start = __ offset(); + + __ relocate(static_stub_Relocation::spec(call_pc)); + + // See also Matcher::interpreter_method_oop_reg(). + AddressLiteral meta = __ allocate_metadata_address(NULL); + bool success = __ load_const_from_toc(Z_method, meta); + + __ set_inst_mark(); + AddressLiteral a((address)-1); + success = success && __ load_const_from_toc(Z_R1, a); + if (!success) { + bailout("const section overflow"); + return; + } + + __ z_br(Z_R1); + assert(__ offset() - start <= call_stub_size, "stub too big"); + __ end_a_stub(); // Update current stubs pointer and restore insts_end. +} + +void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) { + bool unsigned_comp = condition == lir_cond_belowEqual || condition == lir_cond_aboveEqual; + if (opr1->is_single_cpu()) { + Register reg1 = opr1->as_register(); + if (opr2->is_single_cpu()) { + // cpu register - cpu register + if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) { + __ z_clgr(reg1, opr2->as_register()); + } else { + assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY, "cmp int, oop?"); + if (unsigned_comp) { + __ z_clr(reg1, opr2->as_register()); + } else { + __ z_cr(reg1, opr2->as_register()); + } + } + } else if (opr2->is_stack()) { + // cpu register - stack + if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) { + __ z_cg(reg1, frame_map()->address_for_slot(opr2->single_stack_ix())); + } else { + if (unsigned_comp) { + __ z_cly(reg1, frame_map()->address_for_slot(opr2->single_stack_ix())); + } else { + __ z_cy(reg1, frame_map()->address_for_slot(opr2->single_stack_ix())); + } + } + } else if (opr2->is_constant()) { + // cpu register - constant + LIR_Const* c = opr2->as_constant_ptr(); + if (c->type() == T_INT) { + if (unsigned_comp) { + __ z_clfi(reg1, c->as_jint()); + } else { + __ z_cfi(reg1, c->as_jint()); + } + } else if (c->type() == T_OBJECT || c->type() == T_ARRAY) { + // In 64bit oops are single register. + jobject o = c->as_jobject(); + if (o == NULL) { + __ z_ltgr(reg1, reg1); + } else { + jobject2reg(o, Z_R1_scratch); + __ z_cgr(reg1, Z_R1_scratch); + } + } else { + fatal("unexpected type: %s", basictype_to_str(c->type())); + } + // cpu register - address + } else if (opr2->is_address()) { + if (op->info() != NULL) { + add_debug_info_for_null_check_here(op->info()); + } + if (unsigned_comp) { + __ z_cly(reg1, as_Address(opr2->as_address_ptr())); + } else { + __ z_cy(reg1, as_Address(opr2->as_address_ptr())); + } + } else { + ShouldNotReachHere(); + } + + } else if (opr1->is_double_cpu()) { + assert(!unsigned_comp, "unexpected"); + Register xlo = opr1->as_register_lo(); + Register xhi = opr1->as_register_hi(); + if (opr2->is_double_cpu()) { + __ z_cgr(xlo, opr2->as_register_lo()); + } else if (opr2->is_constant()) { + // cpu register - constant 0 + assert(opr2->as_jlong() == (jlong)0, "only handles zero"); + __ z_ltgr(xlo, xlo); + } else { + ShouldNotReachHere(); + } + + } else if (opr1->is_single_fpu()) { + if (opr2->is_single_fpu()) { + __ z_cebr(opr1->as_float_reg(), opr2->as_float_reg()); + } else { + // stack slot + Address addr = frame_map()->address_for_slot(opr2->single_stack_ix()); + if (Immediate::is_uimm12(addr.disp())) { + __ z_ceb(opr1->as_float_reg(), addr); + } else { + __ z_ley(Z_fscratch_1, addr); + __ z_cebr(opr1->as_float_reg(), Z_fscratch_1); + } + } + } else if (opr1->is_double_fpu()) { + if (opr2->is_double_fpu()) { + __ z_cdbr(opr1->as_double_reg(), opr2->as_double_reg()); + } else { + // stack slot + Address addr = frame_map()->address_for_slot(opr2->double_stack_ix()); + if (Immediate::is_uimm12(addr.disp())) { + __ z_cdb(opr1->as_double_reg(), addr); + } else { + __ z_ldy(Z_fscratch_1, addr); + __ z_cdbr(opr1->as_double_reg(), Z_fscratch_1); + } + } + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op) { + Label done; + Register dreg = dst->as_register(); + + if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) { + assert((left->is_single_fpu() && right->is_single_fpu()) || + (left->is_double_fpu() && right->is_double_fpu()), "unexpected operand types"); + bool is_single = left->is_single_fpu(); + bool is_unordered_less = (code == lir_ucmp_fd2i); + FloatRegister lreg = is_single ? left->as_float_reg() : left->as_double_reg(); + FloatRegister rreg = is_single ? right->as_float_reg() : right->as_double_reg(); + if (is_single) { + __ z_cebr(lreg, rreg); + } else { + __ z_cdbr(lreg, rreg); + } + if (VM_Version::has_LoadStoreConditional()) { + Register one = Z_R0_scratch; + Register minus_one = Z_R1_scratch; + __ z_lghi(minus_one, -1); + __ z_lghi(one, 1); + __ z_lghi(dreg, 0); + __ z_locgr(dreg, one, is_unordered_less ? Assembler::bcondHigh : Assembler::bcondHighOrNotOrdered); + __ z_locgr(dreg, minus_one, is_unordered_less ? Assembler::bcondLowOrNotOrdered : Assembler::bcondLow); + } else { + __ clear_reg(dreg, true, false); + __ z_bre(done); // if (left == right) dst = 0 + + // if (left > right || ((code ~= cmpg) && (left <> right)) dst := 1 + __ z_lhi(dreg, 1); + __ z_brc(is_unordered_less ? Assembler::bcondHigh : Assembler::bcondHighOrNotOrdered, done); + + // if (left < right || ((code ~= cmpl) && (left <> right)) dst := -1 + __ z_lhi(dreg, -1); + } + } else { + assert(code == lir_cmp_l2i, "check"); + if (VM_Version::has_LoadStoreConditional()) { + Register one = Z_R0_scratch; + Register minus_one = Z_R1_scratch; + __ z_cgr(left->as_register_lo(), right->as_register_lo()); + __ z_lghi(minus_one, -1); + __ z_lghi(one, 1); + __ z_lghi(dreg, 0); + __ z_locgr(dreg, one, Assembler::bcondHigh); + __ z_locgr(dreg, minus_one, Assembler::bcondLow); + } else { + __ z_cgr(left->as_register_lo(), right->as_register_lo()); + __ z_lghi(dreg, 0); // eq value + __ z_bre(done); + __ z_lghi(dreg, 1); // gt value + __ z_brh(done); + __ z_lghi(dreg, -1); // lt value + } + } + __ bind(done); +} + +// result = condition ? opr1 : opr2 +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { + Assembler::branch_condition acond = Assembler::bcondEqual, ncond = Assembler::bcondNotEqual; + switch (condition) { + case lir_cond_equal: acond = Assembler::bcondEqual; ncond = Assembler::bcondNotEqual; break; + case lir_cond_notEqual: acond = Assembler::bcondNotEqual; ncond = Assembler::bcondEqual; break; + case lir_cond_less: acond = Assembler::bcondLow; ncond = Assembler::bcondNotLow; break; + case lir_cond_lessEqual: acond = Assembler::bcondNotHigh; ncond = Assembler::bcondHigh; break; + case lir_cond_greaterEqual: acond = Assembler::bcondNotLow; ncond = Assembler::bcondLow; break; + case lir_cond_greater: acond = Assembler::bcondHigh; ncond = Assembler::bcondNotHigh; break; + case lir_cond_belowEqual: acond = Assembler::bcondNotHigh; ncond = Assembler::bcondHigh; break; + case lir_cond_aboveEqual: acond = Assembler::bcondNotLow; ncond = Assembler::bcondLow; break; + default: ShouldNotReachHere(); + } + + if (opr1->is_cpu_register()) { + reg2reg(opr1, result); + } else if (opr1->is_stack()) { + stack2reg(opr1, result, result->type()); + } else if (opr1->is_constant()) { + const2reg(opr1, result, lir_patch_none, NULL); + } else { + ShouldNotReachHere(); + } + + if (VM_Version::has_LoadStoreConditional() && !opr2->is_constant()) { + // Optimized version that does not require a branch. + if (opr2->is_single_cpu()) { + assert(opr2->cpu_regnr() != result->cpu_regnr(), "opr2 already overwritten by previous move"); + __ z_locgr(result->as_register(), opr2->as_register(), ncond); + } else if (opr2->is_double_cpu()) { + assert(opr2->cpu_regnrLo() != result->cpu_regnrLo() && opr2->cpu_regnrLo() != result->cpu_regnrHi(), "opr2 already overwritten by previous move"); + assert(opr2->cpu_regnrHi() != result->cpu_regnrLo() && opr2->cpu_regnrHi() != result->cpu_regnrHi(), "opr2 already overwritten by previous move"); + __ z_locgr(result->as_register_lo(), opr2->as_register_lo(), ncond); + } else if (opr2->is_single_stack()) { + __ z_loc(result->as_register(), frame_map()->address_for_slot(opr2->single_stack_ix()), ncond); + } else if (opr2->is_double_stack()) { + __ z_locg(result->as_register_lo(), frame_map()->address_for_slot(opr2->double_stack_ix()), ncond); + } else { + ShouldNotReachHere(); + } + } else { + Label skip; + __ z_brc(acond, skip); + if (opr2->is_cpu_register()) { + reg2reg(opr2, result); + } else if (opr2->is_stack()) { + stack2reg(opr2, result, result->type()); + } else if (opr2->is_constant()) { + const2reg(opr2, result, lir_patch_none, NULL); + } else { + ShouldNotReachHere(); + } + __ bind(skip); + } +} + +void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, + CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); + + if (left->is_single_cpu()) { + assert(left == dest, "left and dest must be equal"); + Register lreg = left->as_register(); + + if (right->is_single_cpu()) { + // cpu register - cpu register + Register rreg = right->as_register(); + switch (code) { + case lir_add: __ z_ar (lreg, rreg); break; + case lir_sub: __ z_sr (lreg, rreg); break; + case lir_mul: __ z_msr(lreg, rreg); break; + default: ShouldNotReachHere(); + } + + } else if (right->is_stack()) { + // cpu register - stack + Address raddr = frame_map()->address_for_slot(right->single_stack_ix()); + switch (code) { + case lir_add: __ z_ay(lreg, raddr); break; + case lir_sub: __ z_sy(lreg, raddr); break; + default: ShouldNotReachHere(); + } + + } else if (right->is_constant()) { + // cpu register - constant + jint c = right->as_constant_ptr()->as_jint(); + switch (code) { + case lir_add: __ z_agfi(lreg, c); break; + case lir_sub: __ z_agfi(lreg, -c); break; // note: -min_jint == min_jint + case lir_mul: __ z_msfi(lreg, c); break; + default: ShouldNotReachHere(); + } + + } else { + ShouldNotReachHere(); + } + + } else if (left->is_double_cpu()) { + assert(left == dest, "left and dest must be equal"); + Register lreg_lo = left->as_register_lo(); + Register lreg_hi = left->as_register_hi(); + + if (right->is_double_cpu()) { + // cpu register - cpu register + Register rreg_lo = right->as_register_lo(); + Register rreg_hi = right->as_register_hi(); + assert_different_registers(lreg_lo, rreg_lo); + switch (code) { + case lir_add: + __ z_agr(lreg_lo, rreg_lo); + break; + case lir_sub: + __ z_sgr(lreg_lo, rreg_lo); + break; + case lir_mul: + __ z_msgr(lreg_lo, rreg_lo); + break; + default: + ShouldNotReachHere(); + } + + } else if (right->is_constant()) { + // cpu register - constant + jlong c = right->as_constant_ptr()->as_jlong_bits(); + switch (code) { + case lir_add: __ z_agfi(lreg_lo, c); break; + case lir_sub: + if (c != min_jint) { + __ z_agfi(lreg_lo, -c); + } else { + // -min_jint cannot be represented as simm32 in z_agfi + // min_jint sign extended: 0xffffffff80000000 + // -min_jint as 64 bit integer: 0x0000000080000000 + // 0x80000000 can be represented as uimm32 in z_algfi + // lreg_lo := lreg_lo + -min_jint == lreg_lo + 0x80000000 + __ z_algfi(lreg_lo, UCONST64(0x80000000)); + } + break; + case lir_mul: __ z_msgfi(lreg_lo, c); break; + default: + ShouldNotReachHere(); + } + + } else { + ShouldNotReachHere(); + } + + } else if (left->is_single_fpu()) { + assert(left == dest, "left and dest must be equal"); + FloatRegister lreg = left->as_float_reg(); + FloatRegister rreg = right->is_single_fpu() ? right->as_float_reg() : fnoreg; + Address raddr; + + if (rreg == fnoreg) { + assert(right->is_single_stack(), "constants should be loaded into register"); + raddr = frame_map()->address_for_slot(right->single_stack_ix()); + if (!Immediate::is_uimm12(raddr.disp())) { + __ mem2freg_opt(rreg = Z_fscratch_1, raddr, false); + } + } + + if (rreg != fnoreg) { + switch (code) { + case lir_add: __ z_aebr(lreg, rreg); break; + case lir_sub: __ z_sebr(lreg, rreg); break; + case lir_mul_strictfp: // fall through + case lir_mul: __ z_meebr(lreg, rreg); break; + case lir_div_strictfp: // fall through + case lir_div: __ z_debr(lreg, rreg); break; + default: ShouldNotReachHere(); + } + } else { + switch (code) { + case lir_add: __ z_aeb(lreg, raddr); break; + case lir_sub: __ z_seb(lreg, raddr); break; + case lir_mul_strictfp: // fall through + case lir_mul: __ z_meeb(lreg, raddr); break; + case lir_div_strictfp: // fall through + case lir_div: __ z_deb(lreg, raddr); break; + default: ShouldNotReachHere(); + } + } + } else if (left->is_double_fpu()) { + assert(left == dest, "left and dest must be equal"); + FloatRegister lreg = left->as_double_reg(); + FloatRegister rreg = right->is_double_fpu() ? right->as_double_reg() : fnoreg; + Address raddr; + + if (rreg == fnoreg) { + assert(right->is_double_stack(), "constants should be loaded into register"); + raddr = frame_map()->address_for_slot(right->double_stack_ix()); + if (!Immediate::is_uimm12(raddr.disp())) { + __ mem2freg_opt(rreg = Z_fscratch_1, raddr, true); + } + } + + if (rreg != fnoreg) { + switch (code) { + case lir_add: __ z_adbr(lreg, rreg); break; + case lir_sub: __ z_sdbr(lreg, rreg); break; + case lir_mul_strictfp: // fall through + case lir_mul: __ z_mdbr(lreg, rreg); break; + case lir_div_strictfp: // fall through + case lir_div: __ z_ddbr(lreg, rreg); break; + default: ShouldNotReachHere(); + } + } else { + switch (code) { + case lir_add: __ z_adb(lreg, raddr); break; + case lir_sub: __ z_sdb(lreg, raddr); break; + case lir_mul_strictfp: // fall through + case lir_mul: __ z_mdb(lreg, raddr); break; + case lir_div_strictfp: // fall through + case lir_div: __ z_ddb(lreg, raddr); break; + default: ShouldNotReachHere(); + } + } + } else if (left->is_address()) { + assert(left == dest, "left and dest must be equal"); + assert(code == lir_add, "unsupported operation"); + assert(right->is_constant(), "unsupported operand"); + jint c = right->as_constant_ptr()->as_jint(); + LIR_Address* lir_addr = left->as_address_ptr(); + Address addr = as_Address(lir_addr); + switch (lir_addr->type()) { + case T_INT: + __ add2mem_32(addr, c, Z_R1_scratch); + break; + case T_LONG: + __ add2mem_64(addr, c, Z_R1_scratch); + break; + default: + ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::fpop() { + // do nothing +} + +void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr thread, LIR_Opr dest, LIR_Op* op) { + switch (code) { + case lir_sqrt: { + assert(!thread->is_valid(), "there is no need for a thread_reg for dsqrt"); + FloatRegister src_reg = value->as_double_reg(); + FloatRegister dst_reg = dest->as_double_reg(); + __ z_sqdbr(dst_reg, src_reg); + break; + } + case lir_abs: { + assert(!thread->is_valid(), "there is no need for a thread_reg for fabs"); + FloatRegister src_reg = value->as_double_reg(); + FloatRegister dst_reg = dest->as_double_reg(); + __ z_lpdbr(dst_reg, src_reg); + break; + } + default: { + ShouldNotReachHere(); + break; + } + } +} + +void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) { + if (left->is_single_cpu()) { + Register reg = left->as_register(); + if (right->is_constant()) { + int val = right->as_constant_ptr()->as_jint(); + switch (code) { + case lir_logic_and: __ z_nilf(reg, val); break; + case lir_logic_or: __ z_oilf(reg, val); break; + case lir_logic_xor: __ z_xilf(reg, val); break; + default: ShouldNotReachHere(); + } + } else if (right->is_stack()) { + Address raddr = frame_map()->address_for_slot(right->single_stack_ix()); + switch (code) { + case lir_logic_and: __ z_ny(reg, raddr); break; + case lir_logic_or: __ z_oy(reg, raddr); break; + case lir_logic_xor: __ z_xy(reg, raddr); break; + default: ShouldNotReachHere(); + } + } else { + Register rright = right->as_register(); + switch (code) { + case lir_logic_and: __ z_nr(reg, rright); break; + case lir_logic_or : __ z_or(reg, rright); break; + case lir_logic_xor: __ z_xr(reg, rright); break; + default: ShouldNotReachHere(); + } + } + move_regs(reg, dst->as_register()); + } else { + Register l_lo = left->as_register_lo(); + if (right->is_constant()) { + __ load_const_optimized(Z_R1_scratch, right->as_constant_ptr()->as_jlong()); + switch (code) { + case lir_logic_and: + __ z_ngr(l_lo, Z_R1_scratch); + break; + case lir_logic_or: + __ z_ogr(l_lo, Z_R1_scratch); + break; + case lir_logic_xor: + __ z_xgr(l_lo, Z_R1_scratch); + break; + default: ShouldNotReachHere(); + } + } else { + Register r_lo; + if (right->type() == T_OBJECT || right->type() == T_ARRAY) { + r_lo = right->as_register(); + } else { + r_lo = right->as_register_lo(); + } + switch (code) { + case lir_logic_and: + __ z_ngr(l_lo, r_lo); + break; + case lir_logic_or: + __ z_ogr(l_lo, r_lo); + break; + case lir_logic_xor: + __ z_xgr(l_lo, r_lo); + break; + default: ShouldNotReachHere(); + } + } + + Register dst_lo = dst->as_register_lo(); + + move_regs(l_lo, dst_lo); + } +} + +// See operand selection in LIRGenerator::do_ArithmeticOp_Int(). +void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr temp, LIR_Opr result, CodeEmitInfo* info) { + if (left->is_double_cpu()) { + // 64 bit integer case + assert(left->is_double_cpu(), "left must be register"); + assert(right->is_double_cpu() || is_power_of_2_long(right->as_jlong()), + "right must be register or power of 2 constant"); + assert(result->is_double_cpu(), "result must be register"); + + Register lreg = left->as_register_lo(); + Register dreg = result->as_register_lo(); + + if (right->is_constant()) { + // Convert division by a power of two into some shifts and logical operations. + Register treg1 = Z_R0_scratch; + Register treg2 = Z_R1_scratch; + jlong divisor = right->as_jlong(); + jlong log_divisor = log2_long(right->as_jlong()); + + if (divisor == min_jlong) { + // Min_jlong is special. Result is '0' except for min_jlong/min_jlong = 1. + if (dreg == lreg) { + NearLabel done; + __ load_const_optimized(treg2, min_jlong); + __ z_cgr(lreg, treg2); + __ z_lghi(dreg, 0); // Preserves condition code. + __ z_brne(done); + __ z_lghi(dreg, 1); // min_jlong / min_jlong = 1 + __ bind(done); + } else { + assert_different_registers(dreg, lreg); + NearLabel done; + __ z_lghi(dreg, 0); + __ compare64_and_branch(lreg, min_jlong, Assembler::bcondNotEqual, done); + __ z_lghi(dreg, 1); + __ bind(done); + } + return; + } + __ move_reg_if_needed(dreg, T_LONG, lreg, T_LONG); + if (divisor == 2) { + __ z_srlg(treg2, dreg, 63); // dividend < 0 ? 1 : 0 + } else { + __ z_srag(treg2, dreg, 63); // dividend < 0 ? -1 : 0 + __ and_imm(treg2, divisor - 1, treg1, true); + } + if (code == lir_idiv) { + __ z_agr(dreg, treg2); + __ z_srag(dreg, dreg, log_divisor); + } else { + assert(code == lir_irem, "check"); + __ z_agr(treg2, dreg); + __ and_imm(treg2, ~(divisor - 1), treg1, true); + __ z_sgr(dreg, treg2); + } + return; + } + + // Divisor is not a power of 2 constant. + Register rreg = right->as_register_lo(); + Register treg = temp->as_register_lo(); + assert(right->is_double_cpu(), "right must be register"); + assert(lreg == Z_R11, "see ldivInOpr()"); + assert(rreg != lreg, "right register must not be same as left register"); + assert((code == lir_idiv && dreg == Z_R11 && treg == Z_R10) || + (code == lir_irem && dreg == Z_R10 && treg == Z_R11), "see ldivInOpr(), ldivOutOpr(), lremOutOpr()"); + + Register R1 = lreg->predecessor(); + Register R2 = rreg; + assert(code != lir_idiv || lreg==dreg, "see code below"); + if (code == lir_idiv) { + __ z_lcgr(lreg, lreg); + } else { + __ clear_reg(dreg, true, false); + } + NearLabel done; + __ compare64_and_branch(R2, -1, Assembler::bcondEqual, done); + if (code == lir_idiv) { + __ z_lcgr(lreg, lreg); // Revert lcgr above. + } + if (ImplicitDiv0Checks) { + // No debug info because the idiv won't trap. + // Add_debug_info_for_div0 would instantiate another DivByZeroStub, + // which is unnecessary, too. + add_debug_info_for_div0(__ offset(), info); + } + __ z_dsgr(R1, R2); + __ bind(done); + return; + } + + // 32 bit integer case + + assert(left->is_single_cpu(), "left must be register"); + assert(right->is_single_cpu() || is_power_of_2(right->as_jint()), "right must be register or power of 2 constant"); + assert(result->is_single_cpu(), "result must be register"); + + Register lreg = left->as_register(); + Register dreg = result->as_register(); + + if (right->is_constant()) { + // Convert division by a power of two into some shifts and logical operations. + Register treg1 = Z_R0_scratch; + Register treg2 = Z_R1_scratch; + jlong divisor = right->as_jint(); + jlong log_divisor = log2_long(right->as_jint()); + __ move_reg_if_needed(dreg, T_LONG, lreg, T_INT); // sign extend + if (divisor == 2) { + __ z_srlg(treg2, dreg, 63); // dividend < 0 ? 1 : 0 + } else { + __ z_srag(treg2, dreg, 63); // dividend < 0 ? -1 : 0 + __ and_imm(treg2, divisor - 1, treg1, true); + } + if (code == lir_idiv) { + __ z_agr(dreg, treg2); + __ z_srag(dreg, dreg, log_divisor); + } else { + assert(code == lir_irem, "check"); + __ z_agr(treg2, dreg); + __ and_imm(treg2, ~(divisor - 1), treg1, true); + __ z_sgr(dreg, treg2); + } + return; + } + + // Divisor is not a power of 2 constant. + Register rreg = right->as_register(); + Register treg = temp->as_register(); + assert(right->is_single_cpu(), "right must be register"); + assert(lreg == Z_R11, "left register must be rax,"); + assert(rreg != lreg, "right register must not be same as left register"); + assert((code == lir_idiv && dreg == Z_R11 && treg == Z_R10) + || (code == lir_irem && dreg == Z_R10 && treg == Z_R11), "see divInOpr(), divOutOpr(), remOutOpr()"); + + Register R1 = lreg->predecessor(); + Register R2 = rreg; + __ move_reg_if_needed(lreg, T_LONG, lreg, T_INT); // sign extend + if (ImplicitDiv0Checks) { + // No debug info because the idiv won't trap. + // Add_debug_info_for_div0 would instantiate another DivByZeroStub, + // which is unnecessary, too. + add_debug_info_for_div0(__ offset(), info); + } + __ z_dsgfr(R1, R2); +} + +void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) { + assert(exceptionOop->as_register() == Z_EXC_OOP, "should match"); + assert(exceptionPC->as_register() == Z_EXC_PC, "should match"); + + // Exception object is not added to oop map by LinearScan + // (LinearScan assumes that no oops are in fixed registers). + info->add_register_oop(exceptionOop); + + // Reuse the debug info from the safepoint poll for the throw op itself. + __ get_PC(Z_EXC_PC); + add_call_info(__ offset(), info); // for exception handler + address stub = Runtime1::entry_for (compilation()->has_fpu_code() ? Runtime1::handle_exception_id + : Runtime1::handle_exception_nofpu_id); + emit_call_c(stub); +} + +void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) { + assert(exceptionOop->as_register() == Z_EXC_OOP, "should match"); + + __ branch_optimized(Assembler::bcondAlways, _unwind_handler_entry); +} + +void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { + ciArrayKlass* default_type = op->expected_type(); + Register src = op->src()->as_register(); + Register dst = op->dst()->as_register(); + Register src_pos = op->src_pos()->as_register(); + Register dst_pos = op->dst_pos()->as_register(); + Register length = op->length()->as_register(); + Register tmp = op->tmp()->as_register(); + + CodeStub* stub = op->stub(); + int flags = op->flags(); + BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; + if (basic_type == T_ARRAY) basic_type = T_OBJECT; + + // If we don't know anything, just go through the generic arraycopy. + if (default_type == NULL) { + Label done; + // Save outgoing arguments in callee saved registers (C convention) in case + // a call to System.arraycopy is needed. + Register callee_saved_src = Z_R10; + Register callee_saved_src_pos = Z_R11; + Register callee_saved_dst = Z_R12; + Register callee_saved_dst_pos = Z_R13; + Register callee_saved_length = Z_ARG5; // Z_ARG5 == Z_R6 is callee saved. + + __ lgr_if_needed(callee_saved_src, src); + __ lgr_if_needed(callee_saved_src_pos, src_pos); + __ lgr_if_needed(callee_saved_dst, dst); + __ lgr_if_needed(callee_saved_dst_pos, dst_pos); + __ lgr_if_needed(callee_saved_length, length); + + // C function requires 64 bit values. + __ z_lgfr(src_pos, src_pos); + __ z_lgfr(dst_pos, dst_pos); + __ z_lgfr(length, length); + + address C_entry = CAST_FROM_FN_PTR(address, Runtime1::arraycopy); + + address copyfunc_addr = StubRoutines::generic_arraycopy(); + + // Pass arguments: may push as this is not a safepoint; SP must be fix at each safepoint. + + // The arguments are in the corresponding registers. + assert(Z_ARG1 == src, "assumption"); + assert(Z_ARG2 == src_pos, "assumption"); + assert(Z_ARG3 == dst, "assumption"); + assert(Z_ARG4 == dst_pos, "assumption"); + assert(Z_ARG5 == length, "assumption"); + if (copyfunc_addr == NULL) { // Use C version if stub was not generated. + emit_call_c(C_entry); + } else { +#ifndef PRODUCT + if (PrintC1Statistics) { + __ load_const_optimized(Z_R1_scratch, (address)&Runtime1::_generic_arraycopystub_cnt); + __ add2mem_32(Address(Z_R1_scratch), 1, Z_R0_scratch); + } +#endif + emit_call_c(copyfunc_addr); + } + CHECK_BAILOUT(); + + __ compare32_and_branch(Z_RET, (intptr_t)0, Assembler::bcondEqual, *stub->continuation()); + + if (copyfunc_addr != NULL) { + __ z_lgr(tmp, Z_RET); + __ z_xilf(tmp, -1); + } + + // Restore values from callee saved registers so they are where the stub + // expects them. + __ lgr_if_needed(src, callee_saved_src); + __ lgr_if_needed(src_pos, callee_saved_src_pos); + __ lgr_if_needed(dst, callee_saved_dst); + __ lgr_if_needed(dst_pos, callee_saved_dst_pos); + __ lgr_if_needed(length, callee_saved_length); + + if (copyfunc_addr != NULL) { + __ z_sr(length, tmp); + __ z_ar(src_pos, tmp); + __ z_ar(dst_pos, tmp); + } + __ branch_optimized(Assembler::bcondAlways, *stub->entry()); + + __ bind(*stub->continuation()); + return; + } + + assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), "must be true at this point"); + + int elem_size = type2aelembytes(basic_type); + int shift_amount; + + switch (elem_size) { + case 1 : + shift_amount = 0; + break; + case 2 : + shift_amount = 1; + break; + case 4 : + shift_amount = 2; + break; + case 8 : + shift_amount = 3; + break; + default: + shift_amount = -1; + ShouldNotReachHere(); + } + + Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes()); + Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes()); + Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes()); + Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes()); + + // Length and pos's are all sign extended at this point on 64bit. + + // test for NULL + if (flags & LIR_OpArrayCopy::src_null_check) { + __ compareU64_and_branch(src, (intptr_t)0, Assembler::bcondZero, *stub->entry()); + } + if (flags & LIR_OpArrayCopy::dst_null_check) { + __ compareU64_and_branch(dst, (intptr_t)0, Assembler::bcondZero, *stub->entry()); + } + + // Check if negative. + if (flags & LIR_OpArrayCopy::src_pos_positive_check) { + __ compare32_and_branch(src_pos, (intptr_t)0, Assembler::bcondLow, *stub->entry()); + } + if (flags & LIR_OpArrayCopy::dst_pos_positive_check) { + __ compare32_and_branch(dst_pos, (intptr_t)0, Assembler::bcondLow, *stub->entry()); + } + + // If the compiler was not able to prove that exact type of the source or the destination + // of the arraycopy is an array type, check at runtime if the source or the destination is + // an instance type. + if (flags & LIR_OpArrayCopy::type_check) { + assert(Klass::_lh_neutral_value == 0, "or replace z_lt instructions"); + + if (!(flags & LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(tmp, dst); + __ z_lt(tmp, Address(tmp, in_bytes(Klass::layout_helper_offset()))); + __ branch_optimized(Assembler::bcondNotLow, *stub->entry()); + } + + if (!(flags & LIR_OpArrayCopy::src_objarray)) { + __ load_klass(tmp, src); + __ z_lt(tmp, Address(tmp, in_bytes(Klass::layout_helper_offset()))); + __ branch_optimized(Assembler::bcondNotLow, *stub->entry()); + } + } + + if (flags & LIR_OpArrayCopy::src_range_check) { + __ z_la(tmp, Address(src_pos, length)); + __ z_cl(tmp, src_length_addr); + __ branch_optimized(Assembler::bcondHigh, *stub->entry()); + } + if (flags & LIR_OpArrayCopy::dst_range_check) { + __ z_la(tmp, Address(dst_pos, length)); + __ z_cl(tmp, dst_length_addr); + __ branch_optimized(Assembler::bcondHigh, *stub->entry()); + } + + if (flags & LIR_OpArrayCopy::length_positive_check) { + __ z_ltr(length, length); + __ branch_optimized(Assembler::bcondNegative, *stub->entry()); + } + + // Stubs require 64 bit values. + __ z_lgfr(src_pos, src_pos); // int -> long + __ z_lgfr(dst_pos, dst_pos); // int -> long + __ z_lgfr(length, length); // int -> long + + if (flags & LIR_OpArrayCopy::type_check) { + // We don't know the array types are compatible. + if (basic_type != T_OBJECT) { + // Simple test for basic type arrays. + if (UseCompressedClassPointers) { + __ z_l(tmp, src_klass_addr); + __ z_c(tmp, dst_klass_addr); + } else { + __ z_lg(tmp, src_klass_addr); + __ z_cg(tmp, dst_klass_addr); + } + __ branch_optimized(Assembler::bcondNotEqual, *stub->entry()); + } else { + // For object arrays, if src is a sub class of dst then we can + // safely do the copy. + NearLabel cont, slow; + Register src_klass = Z_R1_scratch; + Register dst_klass = Z_R10; + + __ load_klass(src_klass, src); + __ load_klass(dst_klass, dst); + + __ check_klass_subtype_fast_path(src_klass, dst_klass, tmp, &cont, &slow, NULL); + + store_parameter(src_klass, 0); // sub + store_parameter(dst_klass, 1); // super + emit_call_c(Runtime1::entry_for (Runtime1::slow_subtype_check_id)); + CHECK_BAILOUT(); + // Sets condition code 0 for match (2 otherwise). + __ branch_optimized(Assembler::bcondEqual, cont); + + __ bind(slow); + + address copyfunc_addr = StubRoutines::checkcast_arraycopy(); + if (copyfunc_addr != NULL) { // use stub if available + // Src is not a sub class of dst so we have to do a + // per-element check. + + int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray; + if ((flags & mask) != mask) { + // Check that at least both of them object arrays. + assert(flags & mask, "one of the two should be known to be an object array"); + + if (!(flags & LIR_OpArrayCopy::src_objarray)) { + __ load_klass(tmp, src); + } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(tmp, dst); + } + Address klass_lh_addr(tmp, Klass::layout_helper_offset()); + jint objArray_lh = Klass::array_layout_helper(T_OBJECT); + __ load_const_optimized(Z_R1_scratch, objArray_lh); + __ z_c(Z_R1_scratch, klass_lh_addr); + __ branch_optimized(Assembler::bcondNotEqual, *stub->entry()); + } + + // Save outgoing arguments in callee saved registers (C convention) in case + // a call to System.arraycopy is needed. + Register callee_saved_src = Z_R10; + Register callee_saved_src_pos = Z_R11; + Register callee_saved_dst = Z_R12; + Register callee_saved_dst_pos = Z_R13; + Register callee_saved_length = Z_ARG5; // Z_ARG5 == Z_R6 is callee saved. + + __ lgr_if_needed(callee_saved_src, src); + __ lgr_if_needed(callee_saved_src_pos, src_pos); + __ lgr_if_needed(callee_saved_dst, dst); + __ lgr_if_needed(callee_saved_dst_pos, dst_pos); + __ lgr_if_needed(callee_saved_length, length); + + __ z_llgfr(length, length); // Higher 32bits must be null. + + __ z_sllg(Z_ARG1, src_pos, shift_amount); // index -> byte offset + __ z_sllg(Z_ARG2, dst_pos, shift_amount); // index -> byte offset + + __ z_la(Z_ARG1, Address(src, Z_ARG1, arrayOopDesc::base_offset_in_bytes(basic_type))); + assert_different_registers(Z_ARG1, dst, dst_pos, length); + __ z_la(Z_ARG2, Address(dst, Z_ARG2, arrayOopDesc::base_offset_in_bytes(basic_type))); + assert_different_registers(Z_ARG2, dst, length); + + __ z_lgr(Z_ARG3, length); + assert_different_registers(Z_ARG3, dst); + + __ load_klass(Z_ARG5, dst); + __ z_lg(Z_ARG5, Address(Z_ARG5, ObjArrayKlass::element_klass_offset())); + __ z_lg(Z_ARG4, Address(Z_ARG5, Klass::super_check_offset_offset())); + emit_call_c(copyfunc_addr); + CHECK_BAILOUT(); + +#ifndef PRODUCT + if (PrintC1Statistics) { + NearLabel failed; + __ compareU32_and_branch(Z_RET, (intptr_t)0, Assembler::bcondNotEqual, failed); + __ load_const_optimized(Z_R1_scratch, (address)&Runtime1::_arraycopy_checkcast_cnt); + __ add2mem_32(Address(Z_R1_scratch), 1, Z_R0_scratch); + __ bind(failed); + } +#endif + + __ compareU32_and_branch(Z_RET, (intptr_t)0, Assembler::bcondEqual, *stub->continuation()); + +#ifndef PRODUCT + if (PrintC1Statistics) { + __ load_const_optimized(Z_R1_scratch, (address)&Runtime1::_arraycopy_checkcast_attempt_cnt); + __ add2mem_32(Address(Z_R1_scratch), 1, Z_R0_scratch); + } +#endif + + __ z_lgr(tmp, Z_RET); + __ z_xilf(tmp, -1); + + // Restore previously spilled arguments + __ lgr_if_needed(src, callee_saved_src); + __ lgr_if_needed(src_pos, callee_saved_src_pos); + __ lgr_if_needed(dst, callee_saved_dst); + __ lgr_if_needed(dst_pos, callee_saved_dst_pos); + __ lgr_if_needed(length, callee_saved_length); + + __ z_sr(length, tmp); + __ z_ar(src_pos, tmp); + __ z_ar(dst_pos, tmp); + } + + __ branch_optimized(Assembler::bcondAlways, *stub->entry()); + + __ bind(cont); + } + } + +#ifdef ASSERT + if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { + // Sanity check the known type with the incoming class. For the + // primitive case the types must match exactly with src.klass and + // dst.klass each exactly matching the default type. For the + // object array case, if no type check is needed then either the + // dst type is exactly the expected type and the src type is a + // subtype which we can't check or src is the same array as dst + // but not necessarily exactly of type default_type. + NearLabel known_ok, halt; + metadata2reg(default_type->constant_encoding(), tmp); + if (UseCompressedClassPointers) { + __ encode_klass_not_null(tmp); + } + + if (basic_type != T_OBJECT) { + if (UseCompressedClassPointers) { __ z_c (tmp, dst_klass_addr); } + else { __ z_cg(tmp, dst_klass_addr); } + __ branch_optimized(Assembler::bcondNotEqual, halt); + if (UseCompressedClassPointers) { __ z_c (tmp, src_klass_addr); } + else { __ z_cg(tmp, src_klass_addr); } + __ branch_optimized(Assembler::bcondEqual, known_ok); + } else { + if (UseCompressedClassPointers) { __ z_c (tmp, dst_klass_addr); } + else { __ z_cg(tmp, dst_klass_addr); } + __ branch_optimized(Assembler::bcondEqual, known_ok); + __ compareU64_and_branch(src, dst, Assembler::bcondEqual, known_ok); + } + __ bind(halt); + __ stop("incorrect type information in arraycopy"); + __ bind(known_ok); + } +#endif + +#ifndef PRODUCT + if (PrintC1Statistics) { + __ load_const_optimized(Z_R1_scratch, Runtime1::arraycopy_count_address(basic_type)); + __ add2mem_32(Address(Z_R1_scratch), 1, Z_R0_scratch); + } +#endif + + __ z_sllg(tmp, src_pos, shift_amount); // index -> byte offset + __ z_sllg(Z_R1_scratch, dst_pos, shift_amount); // index -> byte offset + + assert_different_registers(Z_ARG1, dst, dst_pos, length); + __ z_la(Z_ARG1, Address(src, tmp, arrayOopDesc::base_offset_in_bytes(basic_type))); + assert_different_registers(Z_ARG2, length); + __ z_la(Z_ARG2, Address(dst, Z_R1_scratch, arrayOopDesc::base_offset_in_bytes(basic_type))); + __ lgr_if_needed(Z_ARG3, length); + + bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0; + bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0; + const char *name; + address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false); + __ call_VM_leaf(entry); + + __ bind(*stub->continuation()); +} + +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) { + if (dest->is_single_cpu()) { + if (left->type() == T_OBJECT) { + switch (code) { + case lir_shl: __ z_sllg (dest->as_register(), left->as_register(), 0, count->as_register()); break; + case lir_shr: __ z_srag (dest->as_register(), left->as_register(), 0, count->as_register()); break; + case lir_ushr: __ z_srlg (dest->as_register(), left->as_register(), 0, count->as_register()); break; + default: ShouldNotReachHere(); + } + } else { + assert(code == lir_shl || left == dest, "left and dest must be equal for 2 operand form right shifts"); + Register masked_count = Z_R1_scratch; + __ z_lr(masked_count, count->as_register()); + __ z_nill(masked_count, 31); + switch (code) { + case lir_shl: __ z_sllg (dest->as_register(), left->as_register(), 0, masked_count); break; + case lir_shr: __ z_sra (dest->as_register(), 0, masked_count); break; + case lir_ushr: __ z_srl (dest->as_register(), 0, masked_count); break; + default: ShouldNotReachHere(); + } + } + } else { + switch (code) { + case lir_shl: __ z_sllg (dest->as_register_lo(), left->as_register_lo(), 0, count->as_register()); break; + case lir_shr: __ z_srag (dest->as_register_lo(), left->as_register_lo(), 0, count->as_register()); break; + case lir_ushr: __ z_srlg (dest->as_register_lo(), left->as_register_lo(), 0, count->as_register()); break; + default: ShouldNotReachHere(); + } + } +} + +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) { + if (left->type() == T_OBJECT) { + count = count & 63; // Shouldn't shift by more than sizeof(intptr_t). + Register l = left->as_register(); + Register d = dest->as_register_lo(); + switch (code) { + case lir_shl: __ z_sllg (d, l, count); break; + case lir_shr: __ z_srag (d, l, count); break; + case lir_ushr: __ z_srlg (d, l, count); break; + default: ShouldNotReachHere(); + } + return; + } + if (dest->is_single_cpu()) { + assert(code == lir_shl || left == dest, "left and dest must be equal for 2 operand form right shifts"); + count = count & 0x1F; // Java spec + switch (code) { + case lir_shl: __ z_sllg (dest->as_register(), left->as_register(), count); break; + case lir_shr: __ z_sra (dest->as_register(), count); break; + case lir_ushr: __ z_srl (dest->as_register(), count); break; + default: ShouldNotReachHere(); + } + } else if (dest->is_double_cpu()) { + count = count & 63; // Java spec + Register l = left->as_pointer_register(); + Register d = dest->as_pointer_register(); + switch (code) { + case lir_shl: __ z_sllg (d, l, count); break; + case lir_shr: __ z_srag (d, l, count); break; + case lir_ushr: __ z_srlg (d, l, count); break; + default: ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { + if (op->init_check()) { + // Make sure klass is initialized & doesn't have finalizer. + const int state_offset = in_bytes(InstanceKlass::init_state_offset()); + Register iklass = op->klass()->as_register(); + add_debug_info_for_null_check_here(op->stub()->info()); + if (Immediate::is_uimm12(state_offset)) { + __ z_cli(state_offset, iklass, InstanceKlass::fully_initialized); + } else { + __ z_cliy(state_offset, iklass, InstanceKlass::fully_initialized); + } + __ branch_optimized(Assembler::bcondNotEqual, *op->stub()->entry()); // Use long branch, because slow_case might be far. + } + __ allocate_object(op->obj()->as_register(), + op->tmp1()->as_register(), + op->tmp2()->as_register(), + op->header_size(), + op->object_size(), + op->klass()->as_register(), + *op->stub()->entry()); + __ bind(*op->stub()->continuation()); + __ verify_oop(op->obj()->as_register()); +} + +void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { + Register len = op->len()->as_register(); + __ move_reg_if_needed(len, T_LONG, len, T_INT); // sign extend + + if (UseSlowPath || + (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) || + (!UseFastNewTypeArray && (op->type() != T_OBJECT && op->type() != T_ARRAY))) { + __ z_brul(*op->stub()->entry()); + } else { + __ allocate_array(op->obj()->as_register(), + op->len()->as_register(), + op->tmp1()->as_register(), + op->tmp2()->as_register(), + arrayOopDesc::header_size(op->type()), + type2aelembytes(op->type()), + op->klass()->as_register(), + *op->stub()->entry()); + } + __ bind(*op->stub()->continuation()); +} + +void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data, + Register recv, Register tmp1, Label* update_done) { + uint i; + for (i = 0; i < VirtualCallData::row_limit(); i++) { + Label next_test; + // See if the receiver is receiver[n]. + Address receiver_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))); + __ z_cg(recv, receiver_addr); + __ z_brne(next_test); + Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))); + __ add2mem_64(data_addr, DataLayout::counter_increment, tmp1); + __ branch_optimized(Assembler::bcondAlways, *update_done); + __ bind(next_test); + } + + // Didn't find receiver; find next empty slot and fill it in. + for (i = 0; i < VirtualCallData::row_limit(); i++) { + Label next_test; + Address recv_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))); + __ z_ltg(Z_R0_scratch, recv_addr); + __ z_brne(next_test); + __ z_stg(recv, recv_addr); + __ load_const_optimized(tmp1, DataLayout::counter_increment); + __ z_stg(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)), mdo); + __ branch_optimized(Assembler::bcondAlways, *update_done); + __ bind(next_test); + } +} + +void LIR_Assembler::setup_md_access(ciMethod* method, int bci, + ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias) { + Unimplemented(); +} + +void LIR_Assembler::store_parameter(Register r, int param_num) { + assert(param_num >= 0, "invalid num"); + int offset_in_bytes = param_num * BytesPerWord + FrameMap::first_available_sp_in_frame; + assert(offset_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); + __ z_stg(r, offset_in_bytes, Z_SP); +} + +void LIR_Assembler::store_parameter(jint c, int param_num) { + assert(param_num >= 0, "invalid num"); + int offset_in_bytes = param_num * BytesPerWord + FrameMap::first_available_sp_in_frame; + assert(offset_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); + __ store_const(Address(Z_SP, offset_in_bytes), c, Z_R1_scratch, true); +} + +void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) { + // We always need a stub for the failure case. + CodeStub* stub = op->stub(); + Register obj = op->object()->as_register(); + Register k_RInfo = op->tmp1()->as_register(); + Register klass_RInfo = op->tmp2()->as_register(); + Register dst = op->result_opr()->as_register(); + Register Rtmp1 = Z_R1_scratch; + ciKlass* k = op->klass(); + + assert(!op->tmp3()->is_valid(), "tmp3's not needed"); + + // Check if it needs to be profiled. + ciMethodData* md = NULL; + ciProfileData* data = NULL; + + if (op->should_profile()) { + ciMethod* method = op->profiled_method(); + assert(method != NULL, "Should have method"); + int bci = op->profiled_bci(); + md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + data = md->bci_to_data(bci); + assert(data != NULL, "need data for type check"); + assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); + } + + // Temp operands do not overlap with inputs, if this is their last + // use (end of range is exclusive), so a register conflict is possible. + if (obj == k_RInfo) { + k_RInfo = dst; + } else if (obj == klass_RInfo) { + klass_RInfo = dst; + } + assert_different_registers(obj, k_RInfo, klass_RInfo); + + if (op->should_profile()) { + NearLabel not_null; + __ compareU64_and_branch(obj, (intptr_t) 0, Assembler::bcondNotEqual, not_null); + // Object is null; update MDO and exit. + Register mdo = klass_RInfo; + metadata2reg(md->constant_encoding(), mdo); + Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::header_offset())); + int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant()); + __ or2mem_8(data_addr, header_bits); + __ branch_optimized(Assembler::bcondAlways, *obj_is_null); + __ bind(not_null); + } else { + __ compareU64_and_branch(obj, (intptr_t) 0, Assembler::bcondEqual, *obj_is_null); + } + + NearLabel profile_cast_failure, profile_cast_success; + Label *failure_target = op->should_profile() ? &profile_cast_failure : failure; + Label *success_target = op->should_profile() ? &profile_cast_success : success; + + // Patching may screw with our temporaries on sparc, + // so let's do it before loading the class. + if (k->is_loaded()) { + metadata2reg(k->constant_encoding(), k_RInfo); + } else { + klass2reg_with_patching(k_RInfo, op->info_for_patch()); + } + assert(obj != k_RInfo, "must be different"); + + __ verify_oop(obj); + + // Get object class. + // Not a safepoint as obj null check happens earlier. + if (op->fast_check()) { + if (UseCompressedClassPointers) { + __ load_klass(klass_RInfo, obj); + __ compareU64_and_branch(k_RInfo, klass_RInfo, Assembler::bcondNotEqual, *failure_target); + } else { + __ z_cg(k_RInfo, Address(obj, oopDesc::klass_offset_in_bytes())); + __ branch_optimized(Assembler::bcondNotEqual, *failure_target); + } + // Successful cast, fall through to profile or jump. + } else { + bool need_slow_path = !k->is_loaded() || + ((int) k->super_check_offset() == in_bytes(Klass::secondary_super_cache_offset())); + intptr_t super_check_offset = k->is_loaded() ? k->super_check_offset() : -1L; + __ load_klass(klass_RInfo, obj); + // Perform the fast part of the checking logic. + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, + (need_slow_path ? success_target : NULL), + failure_target, NULL, + RegisterOrConstant(super_check_offset)); + if (need_slow_path) { + // Call out-of-line instance of __ check_klass_subtype_slow_path(...): + address a = Runtime1::entry_for (Runtime1::slow_subtype_check_id); + store_parameter(klass_RInfo, 0); // sub + store_parameter(k_RInfo, 1); // super + emit_call_c(a); // Sets condition code 0 for match (2 otherwise). + CHECK_BAILOUT(); + __ branch_optimized(Assembler::bcondNotEqual, *failure_target); + // Fall through to success case. + } + } + + if (op->should_profile()) { + Register mdo = klass_RInfo, recv = k_RInfo; + assert_different_registers(obj, mdo, recv); + __ bind(profile_cast_success); + metadata2reg(md->constant_encoding(), mdo); + __ load_klass(recv, obj); + type_profile_helper(mdo, md, data, recv, Rtmp1, success); + __ branch_optimized(Assembler::bcondAlways, *success); + + __ bind(profile_cast_failure); + metadata2reg(md->constant_encoding(), mdo); + __ add2mem_64(Address(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())), -(int)DataLayout::counter_increment, Rtmp1); + __ branch_optimized(Assembler::bcondAlways, *failure); + } else { + __ branch_optimized(Assembler::bcondAlways, *success); + } +} + +void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { + LIR_Code code = op->code(); + if (code == lir_store_check) { + Register value = op->object()->as_register(); + Register array = op->array()->as_register(); + Register k_RInfo = op->tmp1()->as_register(); + Register klass_RInfo = op->tmp2()->as_register(); + Register Rtmp1 = Z_R1_scratch; + + CodeStub* stub = op->stub(); + + // Check if it needs to be profiled. + ciMethodData* md = NULL; + ciProfileData* data = NULL; + + assert_different_registers(value, k_RInfo, klass_RInfo); + + if (op->should_profile()) { + ciMethod* method = op->profiled_method(); + assert(method != NULL, "Should have method"); + int bci = op->profiled_bci(); + md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + data = md->bci_to_data(bci); + assert(data != NULL, "need data for type check"); + assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); + } + NearLabel profile_cast_success, profile_cast_failure, done; + Label *success_target = op->should_profile() ? &profile_cast_success : &done; + Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry(); + + if (op->should_profile()) { + NearLabel not_null; + __ compareU64_and_branch(value, (intptr_t) 0, Assembler::bcondNotEqual, not_null); + // Object is null; update MDO and exit. + Register mdo = klass_RInfo; + metadata2reg(md->constant_encoding(), mdo); + Address data_addr(mdo, md->byte_offset_of_slot(data, DataLayout::header_offset())); + int header_bits = DataLayout::flag_mask_to_header_mask(BitData::null_seen_byte_constant()); + __ or2mem_8(data_addr, header_bits); + __ branch_optimized(Assembler::bcondAlways, done); + __ bind(not_null); + } else { + __ compareU64_and_branch(value, (intptr_t) 0, Assembler::bcondEqual, done); + } + + add_debug_info_for_null_check_here(op->info_for_exception()); + __ load_klass(k_RInfo, array); + __ load_klass(klass_RInfo, value); + + // Get instance klass (it's already uncompressed). + __ z_lg(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset())); + // Perform the fast part of the checking logic. + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); + // Call out-of-line instance of __ check_klass_subtype_slow_path(...): + address a = Runtime1::entry_for (Runtime1::slow_subtype_check_id); + store_parameter(klass_RInfo, 0); // sub + store_parameter(k_RInfo, 1); // super + emit_call_c(a); // Sets condition code 0 for match (2 otherwise). + CHECK_BAILOUT(); + __ branch_optimized(Assembler::bcondNotEqual, *failure_target); + // Fall through to success case. + + if (op->should_profile()) { + Register mdo = klass_RInfo, recv = k_RInfo; + assert_different_registers(value, mdo, recv); + __ bind(profile_cast_success); + metadata2reg(md->constant_encoding(), mdo); + __ load_klass(recv, value); + type_profile_helper(mdo, md, data, recv, Rtmp1, &done); + __ branch_optimized(Assembler::bcondAlways, done); + + __ bind(profile_cast_failure); + metadata2reg(md->constant_encoding(), mdo); + __ add2mem_64(Address(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())), -(int)DataLayout::counter_increment, Rtmp1); + __ branch_optimized(Assembler::bcondAlways, *stub->entry()); + } + + __ bind(done); + } else { + if (code == lir_checkcast) { + Register obj = op->object()->as_register(); + Register dst = op->result_opr()->as_register(); + NearLabel success; + emit_typecheck_helper(op, &success, op->stub()->entry(), &success); + __ bind(success); + __ lgr_if_needed(dst, obj); + } else { + if (code == lir_instanceof) { + Register obj = op->object()->as_register(); + Register dst = op->result_opr()->as_register(); + NearLabel success, failure, done; + emit_typecheck_helper(op, &success, &failure, &failure); + __ bind(failure); + __ clear_reg(dst); + __ branch_optimized(Assembler::bcondAlways, done); + __ bind(success); + __ load_const_optimized(dst, 1); + __ bind(done); + } else { + ShouldNotReachHere(); + } + } + } +} + +void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { + Register addr = op->addr()->as_pointer_register(); + Register t1_cmp = Z_R1_scratch; + if (op->code() == lir_cas_long) { + assert(VM_Version::supports_cx8(), "wrong machine"); + Register cmp_value_lo = op->cmp_value()->as_register_lo(); + Register new_value_lo = op->new_value()->as_register_lo(); + __ z_lgr(t1_cmp, cmp_value_lo); + // Perform the compare and swap operation. + __ z_csg(t1_cmp, new_value_lo, 0, addr); + } else if (op->code() == lir_cas_int || op->code() == lir_cas_obj) { + Register cmp_value = op->cmp_value()->as_register(); + Register new_value = op->new_value()->as_register(); + if (op->code() == lir_cas_obj) { + if (UseCompressedOops) { + t1_cmp = op->tmp1()->as_register(); + Register t2_new = op->tmp2()->as_register(); + assert_different_registers(cmp_value, new_value, addr, t1_cmp, t2_new); + __ oop_encoder(t1_cmp, cmp_value, true /*maybe null*/); + __ oop_encoder(t2_new, new_value, true /*maybe null*/); + __ z_cs(t1_cmp, t2_new, 0, addr); + } else { + __ z_lgr(t1_cmp, cmp_value); + __ z_csg(t1_cmp, new_value, 0, addr); + } + } else { + __ z_lr(t1_cmp, cmp_value); + __ z_cs(t1_cmp, new_value, 0, addr); + } + } else { + ShouldNotReachHere(); // new lir_cas_?? + } +} + +void LIR_Assembler::set_24bit_FPU() { + ShouldNotCallThis(); // x86 only +} + +void LIR_Assembler::reset_FPU() { + ShouldNotCallThis(); // x86 only +} + +void LIR_Assembler::breakpoint() { + Unimplemented(); + // __ breakpoint_trap(); +} + +void LIR_Assembler::push(LIR_Opr opr) { + ShouldNotCallThis(); // unused +} + +void LIR_Assembler::pop(LIR_Opr opr) { + ShouldNotCallThis(); // unused +} + +void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst_opr) { + Address addr = frame_map()->address_for_monitor_lock(monitor_no); + __ add2reg(dst_opr->as_register(), addr.disp(), addr.base()); +} + +void LIR_Assembler::emit_lock(LIR_OpLock* op) { + Register obj = op->obj_opr()->as_register(); // May not be an oop. + Register hdr = op->hdr_opr()->as_register(); + Register lock = op->lock_opr()->as_register(); + if (!UseFastLocking) { + __ branch_optimized(Assembler::bcondAlways, *op->stub()->entry()); + } else if (op->code() == lir_lock) { + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + // Add debug info for NullPointerException only if one is possible. + if (op->info() != NULL) { + add_debug_info_for_null_check_here(op->info()); + } + __ lock_object(hdr, obj, lock, *op->stub()->entry()); + // done + } else if (op->code() == lir_unlock) { + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + __ unlock_object(hdr, obj, lock, *op->stub()->entry()); + } else { + ShouldNotReachHere(); + } + __ bind(*op->stub()->continuation()); +} + +void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { + ciMethod* method = op->profiled_method(); + int bci = op->profiled_bci(); + ciMethod* callee = op->profiled_callee(); + + // Update counter for all call types. + ciMethodData* md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + ciProfileData* data = md->bci_to_data(bci); + assert(data->is_CounterData(), "need CounterData for calls"); + assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); + Register mdo = op->mdo()->as_register(); + assert(op->tmp1()->is_double_cpu(), "tmp1 must be allocated"); + Register tmp1 = op->tmp1()->as_register_lo(); + metadata2reg(md->constant_encoding(), mdo); + + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + Bytecodes::Code bc = method->java_code_at_bci(bci); + const bool callee_is_static = callee->is_loaded() && callee->is_static(); + // Perform additional virtual call profiling for invokevirtual and + // invokeinterface bytecodes. + if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) && + !callee_is_static && // Required for optimized MH invokes. + C1ProfileVirtualCalls) { + assert(op->recv()->is_single_cpu(), "recv must be allocated"); + Register recv = op->recv()->as_register(); + assert_different_registers(mdo, tmp1, recv); + assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); + ciKlass* known_klass = op->known_holder(); + if (C1OptimizeVirtualCallProfiling && known_klass != NULL) { + // We know the type that will be seen at this call site; we can + // statically update the MethodData* rather than needing to do + // dynamic tests on the receiver type. + + // NOTE: we should probably put a lock around this search to + // avoid collisions by concurrent compilations. + ciVirtualCallData* vc_data = (ciVirtualCallData*) data; + uint i; + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (known_klass->equals(receiver)) { + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ add2mem_64(data_addr, DataLayout::counter_increment, tmp1); + return; + } + } + + // Receiver type not found in profile data. Select an empty slot. + + // Note that this is less efficient than it should be because it + // always does a write to the receiver part of the + // VirtualCallData rather than just the first time. + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (receiver == NULL) { + Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i))); + metadata2reg(known_klass->constant_encoding(), tmp1); + __ z_stg(tmp1, recv_addr); + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ add2mem_64(data_addr, DataLayout::counter_increment, tmp1); + return; + } + } + } else { + __ load_klass(recv, recv); + NearLabel update_done; + type_profile_helper(mdo, md, data, recv, tmp1, &update_done); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + __ add2mem_64(counter_addr, DataLayout::counter_increment, tmp1); + __ bind(update_done); + } + } else { + // static call + __ add2mem_64(counter_addr, DataLayout::counter_increment, tmp1); + } +} + +void LIR_Assembler::align_backward_branch_target() { + __ align(OptoLoopAlignment); +} + +void LIR_Assembler::emit_delay(LIR_OpDelay* op) { + ShouldNotCallThis(); // There are no delay slots on ZARCH_64. +} + +void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) { + assert(left->is_register(), "can only handle registers"); + + if (left->is_single_cpu()) { + __ z_lcr(dest->as_register(), left->as_register()); + } else if (left->is_single_fpu()) { + __ z_lcebr(dest->as_float_reg(), left->as_float_reg()); + } else if (left->is_double_fpu()) { + __ z_lcdbr(dest->as_double_reg(), left->as_double_reg()); + } else { + assert(left->is_double_cpu(), "Must be a long"); + __ z_lcgr(dest->as_register_lo(), left->as_register_lo()); + } +} + +void LIR_Assembler::fxch(int i) { + ShouldNotCallThis(); // x86 only +} + +void LIR_Assembler::fld(int i) { + ShouldNotCallThis(); // x86 only +} + +void LIR_Assembler::ffree(int i) { + ShouldNotCallThis(); // x86 only +} + +void LIR_Assembler::rt_call(LIR_Opr result, address dest, + const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) { + assert(!tmp->is_valid(), "don't need temporary"); + emit_call_c(dest); + CHECK_BAILOUT(); + if (info != NULL) { + add_call_info_here(info); + } +} + +void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { + ShouldNotCallThis(); // not needed on ZARCH_64 +} + +void LIR_Assembler::membar() { + __ z_fence(); +} + +void LIR_Assembler::membar_acquire() { + __ z_acquire(); +} + +void LIR_Assembler::membar_release() { + __ z_release(); +} + +void LIR_Assembler::membar_loadload() { + __ z_acquire(); +} + +void LIR_Assembler::membar_storestore() { + __ z_release(); +} + +void LIR_Assembler::membar_loadstore() { + __ z_acquire(); +} + +void LIR_Assembler::membar_storeload() { + __ z_fence(); +} + +void LIR_Assembler::on_spin_wait() { + Unimplemented(); +} + +void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest) { + LIR_Address* addr = addr_opr->as_address_ptr(); + assert(addr->scale() == LIR_Address::times_1, "scaling unsupported"); + __ load_address(dest->as_pointer_register(), as_Address(addr)); +} + +void LIR_Assembler::get_thread(LIR_Opr result_reg) { + ShouldNotCallThis(); // unused +} + +#ifdef ASSERT +// Emit run-time assertion. +void LIR_Assembler::emit_assert(LIR_OpAssert* op) { + Unimplemented(); +} +#endif + +void LIR_Assembler::peephole(LIR_List*) { + // Do nothing for now. +} + +void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp) { + assert(code == lir_xadd, "lir_xchg not supported"); + Address src_addr = as_Address(src->as_address_ptr()); + Register base = src_addr.base(); + intptr_t disp = src_addr.disp(); + if (src_addr.index()->is_valid()) { + // LAA and LAAG do not support index register. + __ load_address(Z_R1_scratch, src_addr); + base = Z_R1_scratch; + disp = 0; + } + if (data->type() == T_INT) { + __ z_laa(dest->as_register(), data->as_register(), disp, base); + } else if (data->type() == T_LONG) { + assert(data->as_register_lo() == data->as_register_hi(), "should be a single register"); + __ z_laag(dest->as_register_lo(), data->as_register_lo(), disp, base); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { + Register obj = op->obj()->as_register(); + Register tmp1 = op->tmp()->as_pointer_register(); + Register tmp2 = Z_R1_scratch; + Address mdo_addr = as_Address(op->mdp()->as_address_ptr()); + ciKlass* exact_klass = op->exact_klass(); + intptr_t current_klass = op->current_klass(); + bool not_null = op->not_null(); + bool no_conflict = op->no_conflict(); + + Label update, next, none, null_seen, init_klass; + + bool do_null = !not_null; + bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass; + bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set; + + assert(do_null || do_update, "why are we here?"); + assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?"); + + __ verify_oop(obj); + + if (do_null || tmp1 != obj DEBUG_ONLY(|| true)) { + __ z_ltgr(tmp1, obj); + } + if (do_null) { + __ z_brnz(update); + if (!TypeEntries::was_null_seen(current_klass)) { + __ z_lg(tmp1, mdo_addr); + __ z_oill(tmp1, TypeEntries::null_seen); + __ z_stg(tmp1, mdo_addr); + } + if (do_update) { + __ z_bru(next); + } + } else { + __ asm_assert_ne("unexpect null obj", __LINE__); + } + + __ bind(update); + + if (do_update) { +#ifdef ASSERT + if (exact_klass != NULL) { + __ load_klass(tmp1, tmp1); + metadata2reg(exact_klass->constant_encoding(), tmp2); + __ z_cgr(tmp1, tmp2); + __ asm_assert_eq("exact klass and actual klass differ", __LINE__); + } +#endif + + Label do_update; + __ z_lg(tmp2, mdo_addr); + + if (!no_conflict) { + if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) { + if (exact_klass != NULL) { + metadata2reg(exact_klass->constant_encoding(), tmp1); + } else { + __ load_klass(tmp1, tmp1); + } + + // Klass seen before: nothing to do (regardless of unknown bit). + __ z_lgr(Z_R0_scratch, tmp2); + assert(Immediate::is_uimm(~TypeEntries::type_klass_mask, 16), "or change following instruction"); + __ z_nill(Z_R0_scratch, TypeEntries::type_klass_mask & 0xFFFF); + __ compareU64_and_branch(Z_R0_scratch, tmp1, Assembler::bcondEqual, next); + + // Already unknown: Nothing to do anymore. + __ z_tmll(tmp2, TypeEntries::type_unknown); + __ z_brc(Assembler::bcondAllOne, next); + + if (TypeEntries::is_type_none(current_klass)) { + __ z_lgr(Z_R0_scratch, tmp2); + assert(Immediate::is_uimm(~TypeEntries::type_mask, 16), "or change following instruction"); + __ z_nill(Z_R0_scratch, TypeEntries::type_mask & 0xFFFF); + __ compareU64_and_branch(Z_R0_scratch, (intptr_t)0, Assembler::bcondEqual, init_klass); + } + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only"); + + // Already unknown: Nothing to do anymore. + __ z_tmll(tmp2, TypeEntries::type_unknown); + __ z_brc(Assembler::bcondAllOne, next); + } + + // Different than before. Cannot keep accurate profile. + __ z_oill(tmp2, TypeEntries::type_unknown); + __ z_bru(do_update); + } else { + // There's a single possible klass at this profile point. + assert(exact_klass != NULL, "should be"); + if (TypeEntries::is_type_none(current_klass)) { + metadata2reg(exact_klass->constant_encoding(), tmp1); + __ z_lgr(Z_R0_scratch, tmp2); + assert(Immediate::is_uimm(~TypeEntries::type_klass_mask, 16), "or change following instruction"); + __ z_nill(Z_R0_scratch, TypeEntries::type_klass_mask & 0xFFFF); + __ compareU64_and_branch(Z_R0_scratch, tmp1, Assembler::bcondEqual, next); +#ifdef ASSERT + { + Label ok; + __ z_lgr(Z_R0_scratch, tmp2); + assert(Immediate::is_uimm(~TypeEntries::type_mask, 16), "or change following instruction"); + __ z_nill(Z_R0_scratch, TypeEntries::type_mask & 0xFFFF); + __ compareU64_and_branch(Z_R0_scratch, (intptr_t)0, Assembler::bcondEqual, ok); + __ stop("unexpected profiling mismatch"); + __ bind(ok); + } +#endif + + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent"); + + // Already unknown: Nothing to do anymore. + __ z_tmll(tmp2, TypeEntries::type_unknown); + __ z_brc(Assembler::bcondAllOne, next); + __ z_oill(tmp2, TypeEntries::type_unknown); + __ z_bru(do_update); + } + } + + __ bind(init_klass); + // Combine klass and null_seen bit (only used if (tmp & type_mask)==0). + __ z_ogr(tmp2, tmp1); + + __ bind(do_update); + __ z_stg(tmp2, mdo_addr); + + __ bind(next); + } +} + +void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { + assert(op->crc()->is_single_cpu(), "crc must be register"); + assert(op->val()->is_single_cpu(), "byte value must be register"); + assert(op->result_opr()->is_single_cpu(), "result must be register"); + Register crc = op->crc()->as_register(); + Register val = op->val()->as_register(); + Register res = op->result_opr()->as_register(); + + assert_different_registers(val, crc, res); + + __ load_const_optimized(res, StubRoutines::crc_table_addr()); + __ not_(crc, noreg, false); // ~crc + __ update_byte_crc32(crc, val, res); + __ not_(res, crc, false); // ~crc +} + +#undef __ diff --git a/hotspot/src/cpu/s390/vm/c1_LIRAssembler_s390.hpp b/hotspot/src/cpu/s390/vm/c1_LIRAssembler_s390.hpp new file mode 100644 index 00000000000..eed06792467 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/c1_LIRAssembler_s390.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_C1_LIRASSEMBLER_S390_HPP +#define CPU_S390_VM_C1_LIRASSEMBLER_S390_HPP + + private: + + // Record the type of the receiver in ReceiverTypeData. + void type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data, + Register recv, Register tmp1, Label* update_done); + // Setup pointers to MDO, MDO slot, also compute offset bias to access the slot. + void setup_md_access(ciMethod* method, int bci, + ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias); + public: + address emit_call_c(address a); + + void store_parameter(Register r, int param_num); + void store_parameter(jint c, int param_num); + + void check_reserved_argument_area(int bytes) { + assert(bytes + FrameMap::first_available_sp_in_frame <= frame_map()->reserved_argument_area_size(), + "reserved_argument_area too small"); + } + + enum { + call_stub_size = 512, // See Compile::MAX_stubs_size and CompiledStaticCall::emit_to_interp_stub. + exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(128), + deopt_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(64) + }; + +#endif // CPU_S390_VM_C1_LIRASSEMBLER_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/c1_LIRGenerator_s390.cpp b/hotspot/src/cpu/s390/vm/c1_LIRGenerator_s390.cpp new file mode 100644 index 00000000000..d0d01107e58 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/c1_LIRGenerator_s390.cpp @@ -0,0 +1,1246 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_Compilation.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_Instruction.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_LIRGenerator.hpp" +#include "c1/c1_Runtime1.hpp" +#include "c1/c1_ValueStack.hpp" +#include "ci/ciArray.hpp" +#include "ci/ciObjArrayKlass.hpp" +#include "ci/ciTypeArrayKlass.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_s390.inline.hpp" + +#ifdef ASSERT +#define __ gen()->lir(__FILE__, __LINE__)-> +#else +#define __ gen()->lir()-> +#endif + +void LIRItem::load_byte_item() { + // Byte loads use same registers as other loads. + load_item(); +} + +void LIRItem::load_nonconstant(int bits) { + LIR_Opr r = value()->operand(); + if (_gen->can_inline_as_constant(value(), bits)) { + if (!r->is_constant()) { + r = LIR_OprFact::value_type(value()->type()); + } + _result = r; + } else { + load_item(); + } +} + +inline void load_int_as_long(LIR_List *ll, LIRItem &li, LIR_Opr dst) { + LIR_Opr r = li.value()->operand(); + if (r->is_constant()) { + // Constants get loaded with sign extend on this platform. + ll->move(li.result(), dst); + } else { + if (!r->is_register()) { + li.load_item_force(dst); + } + LIR_Opr dst_l = FrameMap::as_long_opr(dst->as_register()); + ll->convert(Bytecodes::_i2l, li.result(), dst_l); // Convert. + } +} + +//-------------------------------------------------------------- +// LIRGenerator +//-------------------------------------------------------------- + +LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::as_oop_opr(Z_EXC_OOP); } +LIR_Opr LIRGenerator::exceptionPcOpr() { return FrameMap::as_opr(Z_EXC_PC); } +LIR_Opr LIRGenerator::divInOpr() { return FrameMap::Z_R11_opr; } +LIR_Opr LIRGenerator::divOutOpr() { return FrameMap::Z_R11_opr; } +LIR_Opr LIRGenerator::remOutOpr() { return FrameMap::Z_R10_opr; } +LIR_Opr LIRGenerator::ldivInOpr() { return FrameMap::Z_R11_long_opr; } +LIR_Opr LIRGenerator::ldivOutOpr() { return FrameMap::Z_R11_long_opr; } +LIR_Opr LIRGenerator::lremOutOpr() { return FrameMap::Z_R10_long_opr; } +LIR_Opr LIRGenerator::syncLockOpr() { return new_register(T_INT); } +LIR_Opr LIRGenerator::syncTempOpr() { return FrameMap::Z_R13_opr; } +LIR_Opr LIRGenerator::getThreadTemp() { return LIR_OprFact::illegalOpr; } + +LIR_Opr LIRGenerator::result_register_for (ValueType* type, bool callee) { + LIR_Opr opr; + switch (type->tag()) { + case intTag: opr = FrameMap::Z_R2_opr; break; + case objectTag: opr = FrameMap::Z_R2_oop_opr; break; + case longTag: opr = FrameMap::Z_R2_long_opr; break; + case floatTag: opr = FrameMap::Z_F0_opr; break; + case doubleTag: opr = FrameMap::Z_F0_double_opr; break; + + case addressTag: + default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr; + } + + assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch"); + return opr; +} + +LIR_Opr LIRGenerator::rlock_byte(BasicType type) { + return new_register(T_INT); +} + +//--------- Loading items into registers. -------------------------------- + +// z/Architecture cannot inline all constants. +bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const { + if (v->type()->as_IntConstant() != NULL) { + return Immediate::is_simm16(v->type()->as_IntConstant()->value()); + } else if (v->type()->as_LongConstant() != NULL) { + return Immediate::is_simm16(v->type()->as_LongConstant()->value()); + } else if (v->type()->as_ObjectConstant() != NULL) { + return v->type()->as_ObjectConstant()->value()->is_null_object(); + } else { + return false; + } +} + +bool LIRGenerator::can_inline_as_constant(Value i, int bits) const { + if (i->type()->as_IntConstant() != NULL) { + return Assembler::is_simm(i->type()->as_IntConstant()->value(), bits); + } else if (i->type()->as_LongConstant() != NULL) { + return Assembler::is_simm(i->type()->as_LongConstant()->value(), bits); + } else { + return can_store_as_constant(i, as_BasicType(i->type())); + } +} + +bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { + if (c->type() == T_INT) { + return Immediate::is_simm20(c->as_jint()); + } else if (c->type() == T_LONG) { + return Immediate::is_simm20(c->as_jlong()); + } + return false; +} + +LIR_Opr LIRGenerator::safepoint_poll_register() { + return new_register(longType); +} + +LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, + int shift, int disp, BasicType type) { + assert(base->is_register(), "must be"); + if (index->is_constant()) { + intptr_t large_disp = ((intx)(index->as_constant_ptr()->as_jint()) << shift) + disp; + if (Displacement::is_validDisp(large_disp)) { + return new LIR_Address(base, large_disp, type); + } + // Index is illegal so replace it with the displacement loaded into a register. + index = new_pointer_register(); + __ move(LIR_OprFact::intptrConst(large_disp), index); + return new LIR_Address(base, index, type); + } else { + if (shift > 0) { + LIR_Opr tmp = new_pointer_register(); + __ shift_left(index, shift, tmp); + index = tmp; + } + return new LIR_Address(base, index, disp, type); + } +} + +LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, + BasicType type, bool needs_card_mark) { + int elem_size = type2aelembytes(type); + int shift = exact_log2(elem_size); + int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type); + + LIR_Address* addr; + if (index_opr->is_constant()) { + addr = new LIR_Address(array_opr, + offset_in_bytes + (intx)(index_opr->as_jint()) * elem_size, type); + } else { + if (index_opr->type() == T_INT) { + LIR_Opr tmp = new_register(T_LONG); + __ convert(Bytecodes::_i2l, index_opr, tmp); + index_opr = tmp; + } + if (shift > 0) { + __ shift_left(index_opr, shift, index_opr); + } + addr = new LIR_Address(array_opr, + index_opr, + offset_in_bytes, type); + } + if (needs_card_mark) { + // This store will need a precise card mark, so go ahead and + // compute the full adddres instead of computing once for the + // store and again for the card mark. + LIR_Opr tmp = new_pointer_register(); + __ leal(LIR_OprFact::address(addr), tmp); + return new LIR_Address(tmp, type); + } else { + return addr; + } +} + +LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { + LIR_Opr r = LIR_OprFact::illegalOpr; + if (type == T_LONG) { + r = LIR_OprFact::longConst(x); + } else if (type == T_INT) { + r = LIR_OprFact::intConst(x); + } else { + ShouldNotReachHere(); + } + return r; +} + +void LIRGenerator::increment_counter(address counter, BasicType type, int step) { + LIR_Opr pointer = new_pointer_register(); + __ move(LIR_OprFact::intptrConst(counter), pointer); + LIR_Address* addr = new LIR_Address(pointer, type); + increment_counter(addr, step); +} + +void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr); +} + +void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { + LIR_Opr scratch = FrameMap::Z_R1_opr; + __ load(new LIR_Address(base, disp, T_INT), scratch, info); + __ cmp(condition, scratch, c); +} + +void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { + __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info); +} + +void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, LIR_Opr disp, BasicType type, CodeEmitInfo* info) { + __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info); +} + +bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) { + if (tmp->is_valid()) { + if (is_power_of_2(c + 1)) { + __ move(left, tmp); + __ shift_left(left, log2_intptr(c + 1), left); + __ sub(left, tmp, result); + return true; + } else if (is_power_of_2(c - 1)) { + __ move(left, tmp); + __ shift_left(left, log2_intptr(c - 1), left); + __ add(left, tmp, result); + return true; + } + } + return false; +} + +void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) { + BasicType type = item->type(); + __ store(item, new LIR_Address(FrameMap::Z_SP_opr, in_bytes(offset_from_sp), type)); +} + +//---------------------------------------------------------------------- +// visitor functions +//---------------------------------------------------------------------- + +void LIRGenerator::do_StoreIndexed(StoreIndexed* x) { + assert(x->is_pinned(),""); + bool needs_range_check = x->compute_needs_range_check(); + bool use_length = x->length() != NULL; + bool obj_store = x->elt_type() == T_ARRAY || x->elt_type() == T_OBJECT; + bool needs_store_check = obj_store && (x->value()->as_Constant() == NULL || + !get_jobject_constant(x->value())->is_null_object() || + x->should_profile()); + + LIRItem array(x->array(), this); + LIRItem index(x->index(), this); + LIRItem value(x->value(), this); + LIRItem length(this); + + array.load_item(); + index.load_nonconstant(20); + + if (use_length && needs_range_check) { + length.set_instruction(x->length()); + length.load_item(); + } + if (needs_store_check) { + value.load_item(); + } else { + value.load_for_store(x->elt_type()); + } + + set_no_result(x); + + // The CodeEmitInfo must be duplicated for each different + // LIR-instruction because spilling can occur anywhere between two + // instructions and so the debug information must be different. + CodeEmitInfo* range_check_info = state_for (x); + CodeEmitInfo* null_check_info = NULL; + if (x->needs_null_check()) { + null_check_info = new CodeEmitInfo(range_check_info); + } + + // Emit array address setup early so it schedules better. + LIR_Address* array_addr = emit_array_address(array.result(), index.result(), x->elt_type(), obj_store); + if (value.result()->is_constant() && array_addr->index()->is_valid()) { + // Constants cannot be stored with index register on ZARCH_64 (see LIR_Assembler::const2mem()). + LIR_Opr tmp = new_pointer_register(); + __ leal(LIR_OprFact::address(array_addr), tmp); + array_addr = new LIR_Address(tmp, x->elt_type()); + } + + if (GenerateRangeChecks && needs_range_check) { + if (use_length) { + __ cmp(lir_cond_belowEqual, length.result(), index.result()); + __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result())); + } else { + array_range_check(array.result(), index.result(), null_check_info, range_check_info); + // Range_check also does the null check. + null_check_info = NULL; + } + } + + if (GenerateArrayStoreCheck && needs_store_check) { + LIR_Opr tmp1 = new_register(objectType); + LIR_Opr tmp2 = new_register(objectType); + LIR_Opr tmp3 = LIR_OprFact::illegalOpr; + + CodeEmitInfo* store_check_info = new CodeEmitInfo(range_check_info); + __ store_check(value.result(), array.result(), tmp1, tmp2, tmp3, store_check_info, x->profiled_method(), x->profiled_bci()); + } + + if (obj_store) { + // Needs GC write barriers. + pre_barrier(LIR_OprFact::address(array_addr), LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); + __ move(value.result(), array_addr, null_check_info); + // Seems to be a precise. + post_barrier(LIR_OprFact::address(array_addr), value.result()); + } else { + __ move(value.result(), array_addr, null_check_info); + } +} + +void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { + assert(x->is_pinned(),""); + LIRItem obj(x->obj(), this); + obj.load_item(); + + set_no_result(x); + + // "lock" stores the address of the monitor stack slot, so this is not an oop. + LIR_Opr lock = new_register(T_INT); + + CodeEmitInfo* info_for_exception = NULL; + if (x->needs_null_check()) { + info_for_exception = state_for (x); + } + // This CodeEmitInfo must not have the xhandlers because here the + // object is already locked (xhandlers expect object to be unlocked). + CodeEmitInfo* info = state_for (x, x->state(), true); + monitor_enter(obj.result(), lock, syncTempOpr(), LIR_OprFact::illegalOpr, + x->monitor_no(), info_for_exception, info); +} + +void LIRGenerator::do_MonitorExit(MonitorExit* x) { + assert(x->is_pinned(),""); + + LIRItem obj(x->obj(), this); + obj.dont_load_item(); + + LIR_Opr lock = new_register(T_INT); + LIR_Opr obj_temp = new_register(T_INT); + set_no_result(x); + monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no()); +} + +// _ineg, _lneg, _fneg, _dneg +void LIRGenerator::do_NegateOp(NegateOp* x) { + LIRItem value(x->x(), this); + value.load_item(); + LIR_Opr reg = rlock_result(x); + __ negate(value.result(), reg); +} + +// for _fadd, _fmul, _fsub, _fdiv, _frem +// _dadd, _dmul, _dsub, _ddiv, _drem +void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + LIRItem* left_arg = &left; + LIRItem* right_arg = &right; + assert(!left.is_stack(), "can't both be memory operands"); + left.load_item(); + + if (right.is_register() || right.is_constant()) { + right.load_item(); + } else { + right.dont_load_item(); + } + + if ((x->op() == Bytecodes::_frem) || (x->op() == Bytecodes::_drem)) { + address entry; + switch (x->op()) { + case Bytecodes::_frem: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem); + break; + case Bytecodes::_drem: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem); + break; + default: + ShouldNotReachHere(); + } + LIR_Opr result = call_runtime(x->x(), x->y(), entry, x->type(), NULL); + set_result(x, result); + } else { + LIR_Opr reg = rlock(x); + LIR_Opr tmp = LIR_OprFact::illegalOpr; + arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp(), tmp); + set_result(x, reg); + } +} + +// for _ladd, _lmul, _lsub, _ldiv, _lrem +void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { + if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) { + // Use shifts if divisior is a power of 2 otherwise use DSGR instruction. + // Instruction: DSGR R1, R2 + // input : R1+1: dividend (R1, R1+1 designate a register pair, R1 must be even) + // R2: divisor + // + // output: R1+1: quotient + // R1: remainder + // + // Register selection: R1: Z_R10 + // R1+1: Z_R11 + // R2: to be chosen by register allocator (linear scan) + + // R1, and R1+1 will be destroyed. + + LIRItem right(x->y(), this); + LIRItem left(x->x() , this); // Visit left second, so that the is_register test is valid. + + // Call state_for before load_item_force because state_for may + // force the evaluation of other instructions that are needed for + // correct debug info. Otherwise the live range of the fix + // register might be too long. + CodeEmitInfo* info = state_for (x); + + LIR_Opr result = rlock_result(x); + LIR_Opr result_reg = result; + LIR_Opr tmp = LIR_OprFact::illegalOpr; + LIR_Opr divisor_opr = right.result(); + if (divisor_opr->is_constant() && is_power_of_2(divisor_opr->as_jlong())) { + left.load_item(); + right.dont_load_item(); + } else { + left.load_item_force(ldivInOpr()); + right.load_item(); + + // DSGR instruction needs register pair. + if (x->op() == Bytecodes::_ldiv) { + result_reg = ldivOutOpr(); + tmp = lremOutOpr(); + } else { + result_reg = lremOutOpr(); + tmp = ldivOutOpr(); + } + } + + if (!ImplicitDiv0Checks) { + __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0)); + __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info)); + // Idiv/irem cannot trap (passing info would generate an assertion). + info = NULL; + } + + if (x->op() == Bytecodes::_lrem) { + __ irem(left.result(), right.result(), result_reg, tmp, info); + } else if (x->op() == Bytecodes::_ldiv) { + __ idiv(left.result(), right.result(), result_reg, tmp, info); + } else { + ShouldNotReachHere(); + } + + if (result_reg != result) { + __ move(result_reg, result); + } + } else { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + left.load_item(); + right.load_nonconstant(32); + rlock_result(x); + arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL); + } +} + +// for: _iadd, _imul, _isub, _idiv, _irem +void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) { + if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) { + // Use shifts if divisior is a power of 2 otherwise use DSGFR instruction. + // Instruction: DSGFR R1, R2 + // input : R1+1: dividend (R1, R1+1 designate a register pair, R1 must be even) + // R2: divisor + // + // output: R1+1: quotient + // R1: remainder + // + // Register selection: R1: Z_R10 + // R1+1: Z_R11 + // R2: To be chosen by register allocator (linear scan). + + // R1, and R1+1 will be destroyed. + + LIRItem right(x->y(), this); + LIRItem left(x->x() , this); // Visit left second, so that the is_register test is valid. + + // Call state_for before load_item_force because state_for may + // force the evaluation of other instructions that are needed for + // correct debug info. Otherwise the live range of the fix + // register might be too long. + CodeEmitInfo* info = state_for (x); + + LIR_Opr result = rlock_result(x); + LIR_Opr result_reg = result; + LIR_Opr tmp = LIR_OprFact::illegalOpr; + LIR_Opr divisor_opr = right.result(); + if (divisor_opr->is_constant() && is_power_of_2(divisor_opr->as_jint())) { + left.load_item(); + right.dont_load_item(); + } else { + left.load_item_force(divInOpr()); + right.load_item(); + + // DSGFR instruction needs register pair. + if (x->op() == Bytecodes::_idiv) { + result_reg = divOutOpr(); + tmp = remOutOpr(); + } else { + result_reg = remOutOpr(); + tmp = divOutOpr(); + } + } + + if (!ImplicitDiv0Checks) { + __ cmp(lir_cond_equal, right.result(), LIR_OprFact::intConst(0)); + __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info)); + // Idiv/irem cannot trap (passing info would generate an assertion). + info = NULL; + } + + if (x->op() == Bytecodes::_irem) { + __ irem(left.result(), right.result(), result_reg, tmp, info); + } else if (x->op() == Bytecodes::_idiv) { + __ idiv(left.result(), right.result(), result_reg, tmp, info); + } else { + ShouldNotReachHere(); + } + + if (result_reg != result) { + __ move(result_reg, result); + } + } else { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + LIRItem* left_arg = &left; + LIRItem* right_arg = &right; + if (x->is_commutative() && left.is_stack() && right.is_register()) { + // swap them if left is real stack (or cached) and right is real register(not cached) + left_arg = &right; + right_arg = &left; + } + + left_arg->load_item(); + + // Do not need to load right, as we can handle stack and constants. + if (x->op() == Bytecodes::_imul) { + bool use_tmp = false; + if (right_arg->is_constant()) { + int iconst = right_arg->get_jint_constant(); + if (is_power_of_2(iconst - 1) || is_power_of_2(iconst + 1)) { + use_tmp = true; + } + } + right_arg->dont_load_item(); + LIR_Opr tmp = LIR_OprFact::illegalOpr; + if (use_tmp) { + tmp = new_register(T_INT); + } + rlock_result(x); + + arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), tmp); + } else { + right_arg->dont_load_item(); + rlock_result(x); + LIR_Opr tmp = LIR_OprFact::illegalOpr; + arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), tmp); + } + } +} + +void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) { + // If an operand with use count 1 is the left operand, then it is + // likely that no move for 2-operand-LIR-form is necessary. + if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) { + x->swap_operands(); + } + + ValueTag tag = x->type()->tag(); + assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters"); + switch (tag) { + case floatTag: + case doubleTag: do_ArithmeticOp_FPU(x); return; + case longTag: do_ArithmeticOp_Long(x); return; + case intTag: do_ArithmeticOp_Int(x); return; + } + ShouldNotReachHere(); +} + +// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr +void LIRGenerator::do_ShiftOp(ShiftOp* x) { + // count must always be in rcx + LIRItem value(x->x(), this); + LIRItem count(x->y(), this); + + ValueTag elemType = x->type()->tag(); + bool must_load_count = !count.is_constant(); + if (must_load_count) { + count.load_item(); + } else { + count.dont_load_item(); + } + value.load_item(); + LIR_Opr reg = rlock_result(x); + + shift_op(x->op(), reg, value.result(), count.result(), LIR_OprFact::illegalOpr); +} + +// _iand, _land, _ior, _lor, _ixor, _lxor +void LIRGenerator::do_LogicOp(LogicOp* x) { + // IF an operand with use count 1 is the left operand, then it is + // likely that no move for 2-operand-LIR-form is necessary. + if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) { + x->swap_operands(); + } + + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + left.load_item(); + right.load_nonconstant(32); + LIR_Opr reg = rlock_result(x); + + logic_op(x->op(), reg, left.result(), right.result()); +} + +// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg +void LIRGenerator::do_CompareOp(CompareOp* x) { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + left.load_item(); + right.load_item(); + LIR_Opr reg = rlock_result(x); + if (x->x()->type()->is_float_kind()) { + Bytecodes::Code code = x->op(); + __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl)); + } else if (x->x()->type()->tag() == longTag) { + __ lcmp2int(left.result(), right.result(), reg); + } else { + ShouldNotReachHere(); + } +} + +void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) { + assert(x->number_of_arguments() == 4, "wrong type"); + LIRItem obj (x->argument_at(0), this); // object + LIRItem offset(x->argument_at(1), this); // offset of field + LIRItem cmp (x->argument_at(2), this); // Value to compare with field. + LIRItem val (x->argument_at(3), this); // Replace field with val if matches cmp. + + // Get address of field. + obj.load_item(); + offset.load_nonconstant(20); + cmp.load_item(); + val.load_item(); + + LIR_Opr addr = new_pointer_register(); + LIR_Address* a; + if (offset.result()->is_constant()) { + assert(Immediate::is_simm20(offset.result()->as_jlong()), "should have been loaded into register"); + a = new LIR_Address(obj.result(), + offset.result()->as_jlong(), + as_BasicType(type)); + } else { + a = new LIR_Address(obj.result(), + offset.result(), + 0, + as_BasicType(type)); + } + __ leal(LIR_OprFact::address(a), addr); + + if (type == objectType) { // Write-barrier needed for Object fields. + pre_barrier(addr, LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); + } + + LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience + if (type == objectType) { + __ cas_obj(addr, cmp.result(), val.result(), new_register(T_OBJECT), new_register(T_OBJECT)); + } else if (type == intType) { + __ cas_int(addr, cmp.result(), val.result(), ill, ill); + } else if (type == longType) { + __ cas_long(addr, cmp.result(), val.result(), ill, ill); + } else { + ShouldNotReachHere(); + } + // Generate conditional move of boolean result. + LIR_Opr result = rlock_result(x); + __ cmove(lir_cond_equal, LIR_OprFact::intConst(1), LIR_OprFact::intConst(0), + result, as_BasicType(type)); + if (type == objectType) { // Write-barrier needed for Object fields. + // Precise card mark since could either be object or array + post_barrier(addr, val.result()); + } +} + + +void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { + switch (x->id()) { + case vmIntrinsics::_dabs: + case vmIntrinsics::_dsqrt: { + assert(x->number_of_arguments() == 1, "wrong type"); + LIRItem value(x->argument_at(0), this); + value.load_item(); + LIR_Opr dst = rlock_result(x); + + switch (x->id()) { + case vmIntrinsics::_dsqrt: { + __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); + break; + } + case vmIntrinsics::_dabs: { + __ abs(value.result(), dst, LIR_OprFact::illegalOpr); + break; + } + } + break; + } + case vmIntrinsics::_dlog10: // fall through + case vmIntrinsics::_dlog: // fall through + case vmIntrinsics::_dsin: // fall through + case vmIntrinsics::_dtan: // fall through + case vmIntrinsics::_dcos: // fall through + case vmIntrinsics::_dexp: { + assert(x->number_of_arguments() == 1, "wrong type"); + + address runtime_entry = NULL; + switch (x->id()) { + case vmIntrinsics::_dsin: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); + break; + case vmIntrinsics::_dcos: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); + break; + case vmIntrinsics::_dtan: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); + break; + case vmIntrinsics::_dlog: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); + break; + case vmIntrinsics::_dlog10: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); + break; + case vmIntrinsics::_dexp: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); + break; + default: + ShouldNotReachHere(); + } + + LIR_Opr result = call_runtime(x->argument_at(0), runtime_entry, x->type(), NULL); + set_result(x, result); + break; + } + case vmIntrinsics::_dpow: { + assert(x->number_of_arguments() == 2, "wrong type"); + address runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); + LIR_Opr result = call_runtime(x->argument_at(0), x->argument_at(1), runtime_entry, x->type(), NULL); + set_result(x, result); + break; + } + } +} + +void LIRGenerator::do_ArrayCopy(Intrinsic* x) { + assert(x->number_of_arguments() == 5, "wrong type"); + + // Copy stubs possibly call C code, e.g. G1 barriers, so we need to reserve room + // for the C ABI (see frame::z_abi_160). + BasicTypeArray sig; // Empty signature is precise enough. + frame_map()->c_calling_convention(&sig); + + // Make all state_for calls early since they can emit code. + CodeEmitInfo* info = state_for (x, x->state()); + + LIRItem src(x->argument_at(0), this); + LIRItem src_pos(x->argument_at(1), this); + LIRItem dst(x->argument_at(2), this); + LIRItem dst_pos(x->argument_at(3), this); + LIRItem length(x->argument_at(4), this); + + // Operands for arraycopy must use fixed registers, otherwise + // LinearScan will fail allocation (because arraycopy always needs a + // call). + + src.load_item_force (FrameMap::as_oop_opr(Z_ARG1)); + src_pos.load_item_force (FrameMap::as_opr(Z_ARG2)); + dst.load_item_force (FrameMap::as_oop_opr(Z_ARG3)); + dst_pos.load_item_force (FrameMap::as_opr(Z_ARG4)); + length.load_item_force (FrameMap::as_opr(Z_ARG5)); + + LIR_Opr tmp = FrameMap::as_opr(Z_R7); + + set_no_result(x); + + int flags; + ciArrayKlass* expected_type; + arraycopy_helper(x, &flags, &expected_type); + + __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), + length.result(), tmp, expected_type, flags, info); // does add_safepoint +} + +// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f +// _i2b, _i2c, _i2s +void LIRGenerator::do_Convert(Convert* x) { + LIRItem value(x->value(), this); + + value.load_item(); + LIR_Opr reg = rlock_result(x); + __ convert(x->op(), value.result(), reg); +} + +void LIRGenerator::do_NewInstance(NewInstance* x) { + print_if_not_loaded(x); + + // This instruction can be deoptimized in the slow path : use + // Z_R2 as result register. + const LIR_Opr reg = result_register_for (x->type()); + + CodeEmitInfo* info = state_for (x, x->state()); + LIR_Opr tmp1 = FrameMap::Z_R12_oop_opr; + LIR_Opr tmp2 = FrameMap::Z_R13_oop_opr; + LIR_Opr tmp3 = reg; + LIR_Opr tmp4 = LIR_OprFact::illegalOpr; + LIR_Opr klass_reg = FrameMap::Z_R11_metadata_opr; + new_instance(reg, x->klass(), x->is_unresolved(), tmp1, tmp2, tmp3, tmp4, klass_reg, info); + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_NewTypeArray(NewTypeArray* x) { + CodeEmitInfo* info = state_for (x, x->state()); + + LIRItem length(x->length(), this); + length.load_item(); + + LIR_Opr reg = result_register_for (x->type()); + LIR_Opr tmp1 = FrameMap::Z_R12_oop_opr; + LIR_Opr tmp2 = FrameMap::Z_R13_oop_opr; + LIR_Opr tmp3 = reg; + LIR_Opr tmp4 = LIR_OprFact::illegalOpr; + LIR_Opr klass_reg = FrameMap::Z_R11_metadata_opr; + LIR_Opr len = length.result(); + BasicType elem_type = x->elt_type(); + + __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg); + + CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_NewObjectArray(NewObjectArray* x) { + // Evaluate state_for early since it may emit code. + CodeEmitInfo* info = state_for (x, x->state()); + // In case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction + // and therefore provide the state before the parameters have been consumed. + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for (x, x->state_before()); + } + + LIRItem length(x->length(), this); + length.load_item(); + + const LIR_Opr reg = result_register_for (x->type()); + LIR_Opr tmp1 = FrameMap::Z_R12_oop_opr; + LIR_Opr tmp2 = FrameMap::Z_R13_oop_opr; + LIR_Opr tmp3 = LIR_OprFact::illegalOpr; + LIR_Opr tmp4 = LIR_OprFact::illegalOpr; + LIR_Opr klass_reg = FrameMap::Z_R11_metadata_opr; + LIR_Opr len = length.result(); + + CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info); + ciKlass* obj = ciObjArrayKlass::make(x->klass()); + if (obj == ciEnv::unloaded_ciobjarrayklass()) { + BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error"); + } + klass2reg_with_patching(klass_reg, obj, patching_info); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_NewMultiArray(NewMultiArray* x) { + Values* dims = x->dims(); + int i = dims->length(); + LIRItemList* items = new LIRItemList(i, i, NULL); + while (i-- > 0) { + LIRItem* size = new LIRItem(dims->at(i), this); + items->at_put(i, size); + } + + // Evaluate state_for early since it may emit code. + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for (x, x->state_before()); + + // Cannot re-use same xhandlers for multiple CodeEmitInfos, so + // clone all handlers (NOTE: Usually this is handled transparently + // by the CodeEmitInfo cloning logic in CodeStub constructors but + // is done explicitly here because a stub isn't being used). + x->set_exception_handlers(new XHandlers(x->exception_handlers())); + } + CodeEmitInfo* info = state_for (x, x->state()); + + i = dims->length(); + while (--i >= 0) { + LIRItem* size = items->at(i); + size->load_nonconstant(32); + // FrameMap::_reserved_argument_area_size includes the dimensions varargs, because + // it's initialized to hir()->max_stack() when the FrameMap is created. + store_stack_parameter(size->result(), in_ByteSize(i*sizeof(jint) + FrameMap::first_available_sp_in_frame)); + } + + LIR_Opr klass_reg = FrameMap::Z_R3_metadata_opr; + klass2reg_with_patching(klass_reg, x->klass(), patching_info); + + LIR_Opr rank = FrameMap::Z_R4_opr; + __ move(LIR_OprFact::intConst(x->rank()), rank); + LIR_Opr varargs = FrameMap::Z_R5_opr; + __ leal(LIR_OprFact::address(new LIR_Address(FrameMap::Z_SP_opr, FrameMap::first_available_sp_in_frame, T_INT)), + varargs); + LIR_OprList* args = new LIR_OprList(3); + args->append(klass_reg); + args->append(rank); + args->append(varargs); + LIR_Opr reg = result_register_for (x->type()); + __ call_runtime(Runtime1::entry_for (Runtime1::new_multi_array_id), + LIR_OprFact::illegalOpr, + reg, args, info); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_BlockBegin(BlockBegin* x) { + // Nothing to do. +} + +void LIRGenerator::do_CheckCast(CheckCast* x) { + LIRItem obj(x->obj(), this); + + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || (PatchALot && !x->is_incompatible_class_change_check())) { + // Must do this before locking the destination register as an oop register, + // and before the obj is loaded (the latter is for deoptimization). + patching_info = state_for (x, x->state_before()); + } + obj.load_item(); + + // info for exceptions + CodeEmitInfo* info_for_exception = state_for (x); + + CodeStub* stub; + if (x->is_incompatible_class_change_check()) { + assert(patching_info == NULL, "can't patch this"); + stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, info_for_exception); + } else { + stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception); + } + LIR_Opr reg = rlock_result(x); + LIR_Opr tmp1 = new_register(objectType); + LIR_Opr tmp2 = new_register(objectType); + LIR_Opr tmp3 = LIR_OprFact::illegalOpr; + __ checkcast(reg, obj.result(), x->klass(), + tmp1, tmp2, tmp3, + x->direct_compare(), info_for_exception, patching_info, stub, + x->profiled_method(), x->profiled_bci()); +} + + +void LIRGenerator::do_InstanceOf(InstanceOf* x) { + LIRItem obj(x->obj(), this); + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for (x, x->state_before()); + } + // Ensure the result register is not the input register because the + // result is initialized before the patching safepoint. + obj.load_item(); + LIR_Opr out_reg = rlock_result(x); + LIR_Opr tmp1 = new_register(objectType); + LIR_Opr tmp2 = new_register(objectType); + LIR_Opr tmp3 = LIR_OprFact::illegalOpr; + __ instanceof(out_reg, obj.result(), x->klass(), tmp1, tmp2, tmp3, + x->direct_compare(), patching_info, + x->profiled_method(), x->profiled_bci()); +} + + +void LIRGenerator::do_If (If* x) { + assert(x->number_of_sux() == 2, "inconsistency"); + ValueTag tag = x->x()->type()->tag(); + bool is_safepoint = x->is_safepoint(); + + If::Condition cond = x->cond(); + + LIRItem xitem(x->x(), this); + LIRItem yitem(x->y(), this); + LIRItem* xin = &xitem; + LIRItem* yin = &yitem; + + if (tag == longTag) { + // For longs, only conditions "eql", "neq", "lss", "geq" are valid; + // mirror for other conditions. + if (cond == If::gtr || cond == If::leq) { + cond = Instruction::mirror(cond); + xin = &yitem; + yin = &xitem; + } + xin->set_destroys_register(); + } + xin->load_item(); + // TODO: don't load long constants != 0L + if (tag == longTag && yin->is_constant() && yin->get_jlong_constant() == 0 && (cond == If::eql || cond == If::neq)) { + // inline long zero + yin->dont_load_item(); + } else if (tag == longTag || tag == floatTag || tag == doubleTag) { + // Longs cannot handle constants at right side. + yin->load_item(); + } else { + yin->dont_load_item(); + } + + // Add safepoint before generating condition code so it can be recomputed. + if (x->is_safepoint()) { + // Increment backedge counter if needed. + increment_backedge_counter(state_for (x, x->state_before()), x->profiled_bci()); + // Use safepoint_poll_register() instead of LIR_OprFact::illegalOpr. + __ safepoint(safepoint_poll_register(), state_for (x, x->state_before())); + } + set_no_result(x); + + LIR_Opr left = xin->result(); + LIR_Opr right = yin->result(); + __ cmp(lir_cond(cond), left, right); + // Generate branch profiling. Profiling code doesn't kill flags. + profile_branch(x, cond); + move_to_phi(x->state()); + if (x->x()->type()->is_float_kind()) { + __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux()); + } else { + __ branch(lir_cond(cond), right->type(), x->tsux()); + } + assert(x->default_sux() == x->fsux(), "wrong destination above"); + __ jump(x->default_sux()); +} + +LIR_Opr LIRGenerator::getThreadPointer() { + return FrameMap::as_pointer_opr(Z_thread); +} + +void LIRGenerator::trace_block_entry(BlockBegin* block) { + __ move(LIR_OprFact::intConst(block->block_id()), FrameMap::Z_R2_opr); + LIR_OprList* args = new LIR_OprList(1); + args->append(FrameMap::Z_R2_opr); + address func = CAST_FROM_FN_PTR(address, Runtime1::trace_block_entry); + __ call_runtime_leaf(func, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, args); +} + +void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address, + CodeEmitInfo* info) { + __ store(value, address, info); +} + +void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result, + CodeEmitInfo* info) { + __ load(address, result, info); +} + + +void LIRGenerator::put_Object_unsafe(LIR_Opr src, LIR_Opr offset, LIR_Opr data, + BasicType type, bool is_volatile) { + LIR_Address* addr = new LIR_Address(src, offset, type); + bool is_obj = (type == T_ARRAY || type == T_OBJECT); + if (is_obj) { + // Do the pre-write barrier, if any. + pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); + __ move(data, addr); + assert(src->is_register(), "must be register"); + // Seems to be a precise address. + post_barrier(LIR_OprFact::address(addr), data); + } else { + __ move(data, addr); + } +} + + +void LIRGenerator::get_Object_unsafe(LIR_Opr dst, LIR_Opr src, LIR_Opr offset, + BasicType type, bool is_volatile) { + LIR_Address* addr = new LIR_Address(src, offset, type); + __ load(addr, dst); +} + +void LIRGenerator::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) { + BasicType type = x->basic_type(); + assert (x->is_add() && type != T_ARRAY && type != T_OBJECT, "not supported"); + LIRItem src(x->object(), this); + LIRItem off(x->offset(), this); + LIRItem value(x->value(), this); + + src.load_item(); + value.load_item(); + off.load_nonconstant(20); + + LIR_Opr dst = rlock_result(x, type); + LIR_Opr data = value.result(); + LIR_Opr offset = off.result(); + + LIR_Address* addr; + if (offset->is_constant()) { + assert(Immediate::is_simm20(offset->as_jlong()), "should have been loaded into register"); + addr = new LIR_Address(src.result(), offset->as_jlong(), type); + } else { + addr = new LIR_Address(src.result(), offset, type); + } + + __ xadd(LIR_OprFact::address(addr), data, dst, LIR_OprFact::illegalOpr); +} + +void LIRGenerator::do_update_CRC32(Intrinsic* x) { + assert(UseCRC32Intrinsics, "or should not be here"); + LIR_Opr result = rlock_result(x); + + switch (x->id()) { + case vmIntrinsics::_updateCRC32: { + LIRItem crc(x->argument_at(0), this); + LIRItem val(x->argument_at(1), this); + // Registers destroyed by update_crc32. + crc.set_destroys_register(); + val.set_destroys_register(); + crc.load_item(); + val.load_item(); + __ update_crc32(crc.result(), val.result(), result); + break; + } + case vmIntrinsics::_updateBytesCRC32: + case vmIntrinsics::_updateByteBufferCRC32: { + bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32); + + LIRItem crc(x->argument_at(0), this); + LIRItem buf(x->argument_at(1), this); + LIRItem off(x->argument_at(2), this); + LIRItem len(x->argument_at(3), this); + buf.load_item(); + off.load_nonconstant(); + + LIR_Opr index = off.result(); + int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0; + if (off.result()->is_constant()) { + index = LIR_OprFact::illegalOpr; + offset += off.result()->as_jint(); + } + LIR_Opr base_op = buf.result(); + + if (index->is_valid()) { + LIR_Opr tmp = new_register(T_LONG); + __ convert(Bytecodes::_i2l, index, tmp); + index = tmp; + } + + LIR_Address* a = new LIR_Address(base_op, index, offset, T_BYTE); + + BasicTypeList signature(3); + signature.append(T_INT); + signature.append(T_ADDRESS); + signature.append(T_INT); + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + const LIR_Opr result_reg = result_register_for (x->type()); + + LIR_Opr arg1 = cc->at(0); + LIR_Opr arg2 = cc->at(1); + LIR_Opr arg3 = cc->at(2); + + // CCallingConventionRequiresIntsAsLongs + crc.load_item_force(arg1); // We skip int->long conversion here, because CRC32 stub doesn't care about high bits. + __ leal(LIR_OprFact::address(a), arg2); + load_int_as_long(gen()->lir(), len, arg3); + + __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), LIR_OprFact::illegalOpr, result_reg, cc->args()); + __ move(result_reg, result); + break; + } + default: { + ShouldNotReachHere(); + } + } +} + +void LIRGenerator::do_update_CRC32C(Intrinsic* x) { + Unimplemented(); +} + +void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) { + fatal("FMA intrinsic is not implemented on this platform"); +} + +void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) { + fatal("vectorizedMismatch intrinsic is not implemented on this platform"); +} + diff --git a/hotspot/src/cpu/s390/vm/c1_LIR_s390.cpp b/hotspot/src/cpu/s390/vm/c1_LIR_s390.cpp new file mode 100644 index 00000000000..9507ca08561 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/c1_LIR_s390.cpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/register.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIR.hpp" + + +FloatRegister LIR_OprDesc::as_float_reg() const { + return FrameMap::nr2floatreg(fpu_regnr()); +} + +FloatRegister LIR_OprDesc::as_double_reg() const { + return FrameMap::nr2floatreg(fpu_regnrHi()); +} + +// Reg2 unused. +LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { + assert(!as_FloatRegister(reg2)->is_valid(), "Not used on this platform"); + return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | + (reg1 << LIR_OprDesc::reg2_shift) | + LIR_OprDesc::double_type | + LIR_OprDesc::fpu_register | + LIR_OprDesc::double_size); +} + +#ifndef PRODUCT +void LIR_Address::verify() const { + assert(base()->is_cpu_register(), "wrong base operand"); + assert(index()->is_illegal() || index()->is_double_cpu(), "wrong index operand"); + assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA, + "wrong type for addresses"); +} +#endif // PRODUCT + diff --git a/hotspot/src/cpu/s390/vm/c1_LinearScan_s390.cpp b/hotspot/src/cpu/s390/vm/c1_LinearScan_s390.cpp new file mode 100644 index 00000000000..f48496f34d3 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/c1_LinearScan_s390.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_LinearScan.hpp" +#include "utilities/debug.hpp" + +void LinearScan::allocate_fpu_stack() { + // No FPU stack on ZARCH_64. + ShouldNotCallThis(); +} diff --git a/hotspot/src/cpu/s390/vm/c1_LinearScan_s390.hpp b/hotspot/src/cpu/s390/vm/c1_LinearScan_s390.hpp new file mode 100644 index 00000000000..82f341360f9 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/c1_LinearScan_s390.hpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_C1_LINEARSCAN_S390_HPP +#define CPU_S390_VM_C1_LINEARSCAN_S390_HPP + +inline bool LinearScan::is_processed_reg_num(int reg_num) { + // unallocated: Z_thread, Z_fp, Z_SP, Z_R0_scratch, Z_R1_scratch, Z_R14 + assert(FrameMap::Z_R14_opr->cpu_regnr() == 10, "wrong assumption below"); + assert(FrameMap::Z_R0_opr->cpu_regnr() == 11, "wrong assumption below"); + assert(FrameMap::Z_R1_opr->cpu_regnr() == 12, "wrong assumption below"); + assert(FrameMap::Z_R8_opr->cpu_regnr() == 13, "wrong assumption below"); + assert(FrameMap::Z_R9_opr->cpu_regnr() == 14, "wrong assumption below"); + assert(FrameMap::Z_R15_opr->cpu_regnr() == 15, "wrong assumption below"); + assert(reg_num >= 0, "invalid reg_num"); + return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map; +} + +inline int LinearScan::num_physical_regs(BasicType type) { + // IBM Z requires one cpu registers for long, + // and one fpu register for double. + return 1; +} + +inline bool LinearScan::requires_adjacent_regs(BasicType type) { + return false; +} + +inline bool LinearScan::is_caller_save(int assigned_reg) { + assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers"); + return true; // No callee-saved registers on IBM Z. +} + +inline void LinearScan::pd_add_temps(LIR_Op* op) { + // No special case behaviours. +} + +inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) { + return false; // No special case behaviours. +} + +#endif // CPU_S390_VM_C1_LINEARSCAN_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/c1_MacroAssembler_s390.cpp b/hotspot/src/cpu/s390/vm/c1_MacroAssembler_s390.cpp new file mode 100644 index 00000000000..f7f8c29466a --- /dev/null +++ b/hotspot/src/cpu/s390/vm/c1_MacroAssembler_s390.cpp @@ -0,0 +1,380 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "classfile/systemDictionary.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markOop.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" + +void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) { + Label ic_miss, ic_hit; + verify_oop(receiver); + int klass_offset = oopDesc::klass_offset_in_bytes(); + + if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) { + if (VM_Version::has_CompareBranch()) { + z_cgij(receiver, 0, Assembler::bcondEqual, ic_miss); + } else { + z_ltgr(receiver, receiver); + z_bre(ic_miss); + } + } + + compare_klass_ptr(iCache, klass_offset, receiver, false); + z_bre(ic_hit); + + // If icache check fails, then jump to runtime routine. + // Note: RECEIVER must still contain the receiver! + load_const_optimized(Z_R1_scratch, AddressLiteral(SharedRuntime::get_ic_miss_stub())); + z_br(Z_R1_scratch); + align(CodeEntryAlignment); + bind(ic_hit); +} + +void C1_MacroAssembler::explicit_null_check(Register base) { + ShouldNotCallThis(); // unused +} + +void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes) { + assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect"); + generate_stack_overflow_check(bang_size_in_bytes); + save_return_pc(); + push_frame(frame_size_in_bytes); // TODO: Must we add z_abi_160? +} + +void C1_MacroAssembler::unverified_entry(Register receiver, Register ic_klass) { + ShouldNotCallThis(); // unused +} + +void C1_MacroAssembler::verified_entry() { + if (C1Breakpoint) z_illtrap(0xC1); +} + +void C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert_different_registers(hdr, obj, disp_hdr); + NearLabel done; + + verify_oop(obj); + + // Load object header. + z_lg(hdr, Address(obj, hdr_offset)); + + // Save object being locked into the BasicObjectLock... + z_stg(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + + if (UseBiasedLocking) { + biased_locking_enter(obj, hdr, Z_R1_scratch, Z_R0_scratch, done, &slow_case); + } + + // and mark it as unlocked. + z_oill(hdr, markOopDesc::unlocked_value); + // Save unlocked object header into the displaced header location on the stack. + z_stg(hdr, Address(disp_hdr, (intptr_t)0)); + // Test if object header is still the same (i.e. unlocked), and if so, store the + // displaced header address in the object header. If it is not the same, get the + // object header instead. + z_csg(hdr, disp_hdr, hdr_offset, obj); + // If the object header was the same, we're done. + if (PrintBiasedLockingStatistics) { + Unimplemented(); +#if 0 + cond_inc32(Assembler::equal, + ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); +#endif + } + branch_optimized(Assembler::bcondEqual, done); + // If the object header was not the same, it is now in the hdr register. + // => Test if it is a stack pointer into the same stack (recursive locking), i.e.: + // + // 1) (hdr & markOopDesc::lock_mask_in_place) == 0 + // 2) rsp <= hdr + // 3) hdr <= rsp + page_size + // + // These 3 tests can be done by evaluating the following expression: + // + // (hdr - Z_SP) & (~(page_size-1) | markOopDesc::lock_mask_in_place) + // + // assuming both the stack pointer and page_size have their least + // significant 2 bits cleared and page_size is a power of 2 + z_sgr(hdr, Z_SP); + + load_const_optimized(Z_R0_scratch, (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place)); + z_ngr(hdr, Z_R0_scratch); // AND sets CC (result eq/ne 0). + // For recursive locking, the result is zero. => Save it in the displaced header + // location (NULL in the displaced hdr location indicates recursive locking). + z_stg(hdr, Address(disp_hdr, (intptr_t)0)); + // Otherwise we don't care about the result and handle locking via runtime call. + branch_optimized(Assembler::bcondNotZero, slow_case); + // done + bind(done); +} + +void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { + const int aligned_mask = BytesPerWord -1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert_different_registers(hdr, obj, disp_hdr); + NearLabel done; + + if (UseBiasedLocking) { + // Load object. + z_lg(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + biased_locking_exit(obj, hdr, done); + } + + // Load displaced header. + z_ltg(hdr, Address(disp_hdr, (intptr_t)0)); + // If the loaded hdr is NULL we had recursive locking, and we are done. + z_bre(done); + if (!UseBiasedLocking) { + // Load object. + z_lg(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + } + verify_oop(obj); + // Test if object header is pointing to the displaced header, and if so, restore + // the displaced header in the object. If the object header is not pointing to + // the displaced header, get the object header instead. + z_csg(disp_hdr, hdr, hdr_offset, obj); + // If the object header was not pointing to the displaced header, + // we do unlocking via runtime call. + branch_optimized(Assembler::bcondNotEqual, slow_case); + // done + bind(done); +} + +void C1_MacroAssembler::try_allocate( + Register obj, // result: Pointer to object after successful allocation. + Register var_size_in_bytes, // Object size in bytes if unknown at compile time; invalid otherwise. + int con_size_in_bytes, // Object size in bytes if known at compile time. + Register t1, // Temp register: Must be global register for incr_allocated_bytes. + Label& slow_case // Continuation point if fast allocation fails. +) { + if (UseTLAB) { + tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); + } else { + // Allocation in shared Eden not implemented, because sapjvm allocation trace does not allow it. + z_brul(slow_case); + } +} + +void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register Rzero, Register t1) { + assert_different_registers(obj, klass, len, t1, Rzero); + if (UseBiasedLocking && !len->is_valid()) { + assert_different_registers(obj, klass, len, t1); + z_lg(t1, Address(klass, Klass::prototype_header_offset())); + } else { + // This assumes that all prototype bits fit in an int32_t. + load_const_optimized(t1, (intx)markOopDesc::prototype()); + } + z_stg(t1, Address(obj, oopDesc::mark_offset_in_bytes())); + + if (len->is_valid()) { + // Length will be in the klass gap, if one exists. + z_st(len, Address(obj, arrayOopDesc::length_offset_in_bytes())); + } else if (UseCompressedClassPointers) { + store_klass_gap(Rzero, obj); // Zero klass gap for compressed oops. + } + store_klass(klass, obj, t1); +} + +void C1_MacroAssembler::initialize_body(Register objectFields, Register len_in_bytes, Register Rzero) { + Label done; + assert_different_registers(objectFields, len_in_bytes, Rzero); + + // Initialize object fields. + // See documentation for MVCLE instruction!!! + assert(objectFields->encoding()%2==0, "objectFields must be an even register"); + assert(len_in_bytes->encoding() == (objectFields->encoding()+1), "objectFields and len_in_bytes must be a register pair"); + assert(Rzero->encoding()%2==1, "Rzero must be an odd register"); + + // Use Rzero as src length, then mvcle will copy nothing + // and fill the object with the padding value 0. + move_long_ext(objectFields, as_Register(Rzero->encoding()-1), 0); + bind(done); +} + +void C1_MacroAssembler::allocate_object( + Register obj, // Result: pointer to object after successful allocation. + Register t1, // temp register + Register t2, // temp register: Must be a global register for try_allocate. + int hdr_size, // object header size in words + int obj_size, // object size in words + Register klass, // object klass + Label& slow_case // Continuation point if fast allocation fails. +) { + assert_different_registers(obj, t1, t2, klass); + + // Allocate space and initialize header. + try_allocate(obj, noreg, obj_size * wordSize, t1, slow_case); + + initialize_object(obj, klass, noreg, obj_size * HeapWordSize, t1, t2); +} + +void C1_MacroAssembler::initialize_object( + Register obj, // result: Pointer to object after successful allocation. + Register klass, // object klass + Register var_size_in_bytes, // Object size in bytes if unknown at compile time; invalid otherwise. + int con_size_in_bytes, // Object size in bytes if known at compile time. + Register t1, // temp register + Register t2 // temp register + ) { + assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, + "con_size_in_bytes is not multiple of alignment"); + assert(var_size_in_bytes == noreg, "not implemented"); + const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize; + + const Register Rzero = t2; + + z_xgr(Rzero, Rzero); + initialize_header(obj, klass, noreg, Rzero, t1); + + // Clear rest of allocated space. + const int threshold = 4 * BytesPerWord; + if (con_size_in_bytes <= threshold) { + // Use explicit null stores. + // code size = 6*n bytes (n = number of fields to clear) + for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += BytesPerWord) + z_stg(Rzero, Address(obj, i)); + } else { + // Code size generated by initialize_body() is 16. + Register object_fields = Z_R0_scratch; + Register len_in_bytes = Z_R1_scratch; + z_la(object_fields, hdr_size_in_bytes, obj); + load_const_optimized(len_in_bytes, con_size_in_bytes - hdr_size_in_bytes); + initialize_body(object_fields, len_in_bytes, Rzero); + } + + // Dtrace support is unimplemented. + // if (CURRENT_ENV->dtrace_alloc_probes()) { + // assert(obj == rax, "must be"); + // call(RuntimeAddress(Runtime1::entry_for (Runtime1::dtrace_object_alloc_id))); + // } + + verify_oop(obj); +} + +void C1_MacroAssembler::allocate_array( + Register obj, // result: Pointer to array after successful allocation. + Register len, // array length + Register t1, // temp register + Register t2, // temp register + int hdr_size, // object header size in words + int elt_size, // element size in bytes + Register klass, // object klass + Label& slow_case // Continuation point if fast allocation fails. +) { + assert_different_registers(obj, len, t1, t2, klass); + + // Determine alignment mask. + assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work"); + + // Check for negative or excessive length. + compareU64_and_branch(len, (int32_t)max_array_allocation_length, bcondHigh, slow_case); + + // Compute array size. + // Note: If 0 <= len <= max_length, len*elt_size + header + alignment is + // smaller or equal to the largest integer. Also, since top is always + // aligned, we can do the alignment here instead of at the end address + // computation. + const Register arr_size = t2; + switch (elt_size) { + case 1: lgr_if_needed(arr_size, len); break; + case 2: z_sllg(arr_size, len, 1); break; + case 4: z_sllg(arr_size, len, 2); break; + case 8: z_sllg(arr_size, len, 3); break; + default: ShouldNotReachHere(); + } + add2reg(arr_size, hdr_size * wordSize + MinObjAlignmentInBytesMask); // Add space for header & alignment. + z_nill(arr_size, (~MinObjAlignmentInBytesMask) & 0xffff); // Align array size. + + try_allocate(obj, arr_size, 0, t1, slow_case); + + initialize_header(obj, klass, len, noreg, t1); + + // Clear rest of allocated space. + Label done; + Register object_fields = t1; + Register Rzero = Z_R1_scratch; + z_aghi(arr_size, -(hdr_size * BytesPerWord)); + z_bre(done); // Jump if size of fields is zero. + z_la(object_fields, hdr_size * BytesPerWord, obj); + z_xgr(Rzero, Rzero); + initialize_body(object_fields, arr_size, Rzero); + bind(done); + + // Dtrace support is unimplemented. + // if (CURRENT_ENV->dtrace_alloc_probes()) { + // assert(obj == rax, "must be"); + // call(RuntimeAddress(Runtime1::entry_for (Runtime1::dtrace_object_alloc_id))); + // } + + verify_oop(obj); +} + + +#ifndef PRODUCT + +void C1_MacroAssembler::verify_stack_oop(int stack_offset) { + Unimplemented(); + // if (!VerifyOops) return; + // verify_oop_addr(Address(SP, stack_offset + STACK_BIAS)); +} + +void C1_MacroAssembler::verify_not_null_oop(Register r) { + if (!VerifyOops) return; + NearLabel not_null; + compareU64_and_branch(r, (intptr_t)0, bcondNotEqual, not_null); + stop("non-null oop required"); + bind(not_null); + verify_oop(r); +} + +void C1_MacroAssembler::invalidate_registers(Register preserve1, + Register preserve2, + Register preserve3) { + Register dead_value = noreg; + for (int i = 0; i < FrameMap::nof_cpu_regs; i++) { + Register r = as_Register(i); + if (r != preserve1 && r != preserve2 && r != preserve3 && r != Z_SP && r != Z_thread) { + if (dead_value == noreg) { + load_const_optimized(r, 0xc1dead); + dead_value = r; + } else { + z_lgr(r, dead_value); + } + } + } +} + +#endif // !PRODUCT diff --git a/hotspot/src/cpu/s390/vm/c1_MacroAssembler_s390.hpp b/hotspot/src/cpu/s390/vm/c1_MacroAssembler_s390.hpp new file mode 100644 index 00000000000..e4e03af35d3 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/c1_MacroAssembler_s390.hpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_C1_MACROASSEMBLER_S390_HPP +#define CPU_S390_VM_C1_MACROASSEMBLER_S390_HPP + + void pd_init() { /* nothing to do */ } + + public: + void try_allocate( + Register obj, // result: Pointer to object after successful allocation. + Register var_size_in_bytes, // Object size in bytes if unknown at compile time; invalid otherwise. + int con_size_in_bytes, // Object size in bytes if known at compile time. + Register t1, // temp register + Label& slow_case // Continuation point if fast allocation fails. + ); + + void initialize_header(Register obj, Register klass, Register len, Register Rzero, Register t1); + void initialize_body(Register objectFields, Register len_in_bytes, Register Rzero); + + // locking + // hdr : Used to hold locked markOop to be CASed into obj, contents destroyed. + // obj : Must point to the object to lock, contents preserved. + // disp_hdr: Must point to the displaced header location, contents preserved. + // Returns code offset at which to add null check debug information. + void lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case); + + // unlocking + // hdr : Used to hold original markOop to be CASed back into obj, contents destroyed. + // obj : Must point to the object to lock, contents preserved. + // disp_hdr: Must point to the displaced header location, contents destroyed. + void unlock_object(Register hdr, Register obj, Register lock, Label& slow_case); + + void initialize_object( + Register obj, // result: Pointer to object after successful allocation. + Register klass, // object klass + Register var_size_in_bytes, // Object size in bytes if unknown at compile time; invalid otherwise. + int con_size_in_bytes, // Object size in bytes if known at compile time. + Register t1, // temp register + Register t2 // temp register + ); + + // Allocation of fixed-size objects. + // This can also be used to allocate fixed-size arrays, by setting + // hdr_size correctly and storing the array length afterwards. + void allocate_object( + Register obj, // result: Pointer to object after successful allocation. + Register t1, // temp register + Register t2, // temp register + int hdr_size, // object header size in words + int obj_size, // object size in words + Register klass, // object klass + Label& slow_case // Continuation point if fast allocation fails. + ); + + enum { + max_array_allocation_length = 0x01000000 // Sparc friendly value, requires sethi only. + }; + + // Allocation of arrays. + void allocate_array( + Register obj, // result: Pointer to array after successful allocation. + Register len, // array length + Register t1, // temp register + Register t2, // temp register + int hdr_size, // object header size in words + int elt_size, // element size in bytes + Register klass, // object klass + Label& slow_case // Continuation point if fast allocation fails. + ); + + // Invalidates registers in this window. + void invalidate_registers(Register preserve1 = noreg, Register preserve2 = noreg, + Register preserve3 = noreg) PRODUCT_RETURN; + + void nop() { z_nop(); } + + // This platform only uses signal-based null checks. The Label is not needed. + void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); } + +#endif // CPU_S390_VM_C1_MACROASSEMBLER_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/c1_Runtime1_s390.cpp b/hotspot/src/cpu/s390/vm/c1_Runtime1_s390.cpp new file mode 100644 index 00000000000..c0fbee53da7 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/c1_Runtime1_s390.cpp @@ -0,0 +1,1065 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_Defs.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "interpreter/interpreter.hpp" +#include "nativeInst_s390.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "register_s390.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/signature.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/macros.hpp" +#include "vmreg_s390.inline.hpp" +#include "registerSaver_s390.hpp" +#if INCLUDE_ALL_GCS +#include "gc/g1/g1SATBCardTableModRefBS.hpp" +#endif + +// Implementation of StubAssembler + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry_point, int number_of_arguments) { + set_num_rt_args(0); // Nothing on stack. + assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result, "registers must be different"); + + // We cannot trust that code generated by the C++ compiler saves R14 + // to z_abi_160.return_pc, because sometimes it spills R14 using stmg at + // z_abi_160.gpr14 (e.g. InterpreterRuntime::_new()). + // Therefore we load the PC into Z_R1_scratch and let set_last_Java_frame() save + // it into the frame anchor. + address pc = get_PC(Z_R1_scratch); + int call_offset = (int)(pc - addr_at(0)); + set_last_Java_frame(Z_SP, Z_R1_scratch); + + // ARG1 must hold thread address. + z_lgr(Z_ARG1, Z_thread); + + address return_pc = NULL; + align_call_far_patchable(this->pc()); + return_pc = call_c_opt(entry_point); + assert(return_pc != NULL, "const section overflow"); + + reset_last_Java_frame(); + + // Check for pending exceptions. + { + load_and_test_long(Z_R0_scratch, Address(Z_thread, Thread::pending_exception_offset())); + + // This used to conditionally jump to forward_exception however it is + // possible if we relocate that the branch will not reach. So we must jump + // around so we can always reach. + + Label ok; + z_bre(ok); // Bcondequal is the same as bcondZero. + + // exception pending => forward to exception handler + + // Make sure that the vm_results are cleared. + if (oop_result1->is_valid()) { + clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(jlong)); + } + if (metadata_result->is_valid()) { + clear_mem(Address(Z_thread, JavaThread::vm_result_2_offset()), sizeof(jlong)); + } + if (frame_size() == no_frame_size) { + // Pop the stub frame. + pop_frame(); + restore_return_pc(); + load_const_optimized(Z_R1, StubRoutines::forward_exception_entry()); + z_br(Z_R1); + } else if (_stub_id == Runtime1::forward_exception_id) { + should_not_reach_here(); + } else { + load_const_optimized(Z_R1, Runtime1::entry_for (Runtime1::forward_exception_id)); + z_br(Z_R1); + } + + bind(ok); + } + + // Get oop results if there are any and reset the values in the thread. + if (oop_result1->is_valid()) { + get_vm_result(oop_result1); + } + if (metadata_result->is_valid()) { + get_vm_result_2(metadata_result); + } + + return call_offset; +} + + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1) { + // Z_ARG1 is reserved for the thread. + lgr_if_needed(Z_ARG2, arg1); + return call_RT(oop_result1, metadata_result, entry, 1); +} + + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2) { + // Z_ARG1 is reserved for the thread. + lgr_if_needed(Z_ARG2, arg1); + assert(arg2 != Z_ARG2, "smashed argument"); + lgr_if_needed(Z_ARG3, arg2); + return call_RT(oop_result1, metadata_result, entry, 2); +} + + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) { + // Z_ARG1 is reserved for the thread. + lgr_if_needed(Z_ARG2, arg1); + assert(arg2 != Z_ARG2, "smashed argument"); + lgr_if_needed(Z_ARG3, arg2); + assert(arg3 != Z_ARG3, "smashed argument"); + lgr_if_needed(Z_ARG4, arg3); + return call_RT(oop_result1, metadata_result, entry, 3); +} + + +// Implementation of Runtime1 + +#define __ sasm-> + +#ifndef PRODUCT +#undef __ +#define __ (Verbose ? (sasm->block_comment(FILE_AND_LINE),sasm):sasm)-> +#endif // !PRODUCT + +#define BLOCK_COMMENT(str) if (PrintAssembly) __ block_comment(str) +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +static OopMap* generate_oop_map(StubAssembler* sasm) { + RegisterSaver::RegisterSet reg_set = RegisterSaver::all_registers; + int frame_size_in_slots = + RegisterSaver::live_reg_frame_size(reg_set) / VMRegImpl::stack_slot_size; + sasm->set_frame_size(frame_size_in_slots / VMRegImpl::slots_per_word); + return RegisterSaver::generate_oop_map(sasm, reg_set); +} + +static OopMap* save_live_registers(StubAssembler* sasm, bool save_fpu_registers = true, Register return_pc = Z_R14) { + __ block_comment("save_live_registers"); + RegisterSaver::RegisterSet reg_set = + save_fpu_registers ? RegisterSaver::all_registers : RegisterSaver::all_integer_registers; + int frame_size_in_slots = + RegisterSaver::live_reg_frame_size(reg_set) / VMRegImpl::stack_slot_size; + sasm->set_frame_size(frame_size_in_slots / VMRegImpl::slots_per_word); + return RegisterSaver::save_live_registers(sasm, reg_set, return_pc); +} + +static OopMap* save_live_registers_except_r2(StubAssembler* sasm, bool save_fpu_registers = true) { + if (!save_fpu_registers) { + __ unimplemented(FILE_AND_LINE); + } + __ block_comment("save_live_registers"); + RegisterSaver::RegisterSet reg_set = RegisterSaver::all_registers_except_r2; + int frame_size_in_slots = + RegisterSaver::live_reg_frame_size(reg_set) / VMRegImpl::stack_slot_size; + sasm->set_frame_size(frame_size_in_slots / VMRegImpl::slots_per_word); + return RegisterSaver::save_live_registers(sasm, reg_set); +} + +static OopMap* save_volatile_registers(StubAssembler* sasm, Register return_pc = Z_R14) { + __ block_comment("save_volatile_registers"); + RegisterSaver::RegisterSet reg_set = RegisterSaver::all_volatile_registers; + int frame_size_in_slots = + RegisterSaver::live_reg_frame_size(reg_set) / VMRegImpl::stack_slot_size; + sasm->set_frame_size(frame_size_in_slots / VMRegImpl::slots_per_word); + return RegisterSaver::save_live_registers(sasm, reg_set, return_pc); +} + +static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) { + __ block_comment("restore_live_registers"); + RegisterSaver::RegisterSet reg_set = + restore_fpu_registers ? RegisterSaver::all_registers : RegisterSaver::all_integer_registers; + RegisterSaver::restore_live_registers(sasm, reg_set); +} + +static void restore_live_registers_except_r2(StubAssembler* sasm, bool restore_fpu_registers = true) { + if (!restore_fpu_registers) { + __ unimplemented(FILE_AND_LINE); + } + __ block_comment("restore_live_registers_except_r2"); + RegisterSaver::restore_live_registers(sasm, RegisterSaver::all_registers_except_r2); +} + +static void restore_volatile_registers(StubAssembler* sasm) { + __ block_comment("restore_volatile_registers"); + RegisterSaver::RegisterSet reg_set = RegisterSaver::all_volatile_registers; + RegisterSaver::restore_live_registers(sasm, reg_set); +} + +void Runtime1::initialize_pd() { + // Nothing to do. +} + +OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) { + // Make a frame and preserve the caller's caller-save registers. + OopMap* oop_map = save_live_registers(sasm); + int call_offset; + if (!has_argument) { + call_offset = __ call_RT(noreg, noreg, target); + } else { + call_offset = __ call_RT(noreg, noreg, target, Z_R1_scratch, Z_R0_scratch); + } + OopMapSet* oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + + __ should_not_reach_here(); + return oop_maps; +} + +void Runtime1::generate_unwind_exception(StubAssembler *sasm) { + // Incoming parameters: Z_EXC_OOP and Z_EXC_PC. + // Keep copies in callee-saved registers during runtime call. + const Register exception_oop_callee_saved = Z_R11; + const Register exception_pc_callee_saved = Z_R12; + // Other registers used in this stub. + const Register handler_addr = Z_R4; + + // Verify that only exception_oop, is valid at this time. + __ invalidate_registers(Z_EXC_OOP, Z_EXC_PC); + + // Check that fields in JavaThread for exception oop and issuing pc are set. + __ asm_assert_mem8_is_zero(in_bytes(JavaThread::exception_oop_offset()), Z_thread, "exception oop already set : " FILE_AND_LINE, 0); + __ asm_assert_mem8_is_zero(in_bytes(JavaThread::exception_pc_offset()), Z_thread, "exception pc already set : " FILE_AND_LINE, 0); + + // Save exception_oop and pc in callee-saved register to preserve it + // during runtime calls. + __ verify_not_null_oop(Z_EXC_OOP); + __ lgr_if_needed(exception_oop_callee_saved, Z_EXC_OOP); + __ lgr_if_needed(exception_pc_callee_saved, Z_EXC_PC); + + __ push_frame_abi160(0); // Runtime code needs the z_abi_160. + + // Search the exception handler address of the caller (using the return address). + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), Z_thread, Z_EXC_PC); + // Z_RET(Z_R2): exception handler address of the caller. + + __ pop_frame(); + + __ invalidate_registers(exception_oop_callee_saved, exception_pc_callee_saved, Z_RET); + + // Move result of call into correct register. + __ lgr_if_needed(handler_addr, Z_RET); + + // Restore exception oop and pc to Z_EXC_OOP and Z_EXC_PC (required convention of exception handler). + __ lgr_if_needed(Z_EXC_OOP, exception_oop_callee_saved); + __ lgr_if_needed(Z_EXC_PC, exception_pc_callee_saved); + + // Verify that there is really a valid exception in Z_EXC_OOP. + __ verify_not_null_oop(Z_EXC_OOP); + + __ z_br(handler_addr); // Jump to exception handler. +} + +OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { + // Make a frame and preserve the caller's caller-save registers. + OopMap* oop_map = save_live_registers(sasm); + + // Call the runtime patching routine, returns non-zero if nmethod got deopted. + int call_offset = __ call_RT(noreg, noreg, target); + OopMapSet* oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + + // Re-execute the patched instruction or, if the nmethod was + // deoptmized, return to the deoptimization handler entry that will + // cause re-execution of the current bytecode. + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + + __ z_ltr(Z_RET, Z_RET); // return value == 0 + + restore_live_registers(sasm); + + __ z_bcr(Assembler::bcondZero, Z_R14); + + // Return to the deoptimization handler entry for unpacking and + // rexecute if we simply returned then we'd deopt as if any call we + // patched had just returned. + AddressLiteral dest(deopt_blob->unpack_with_reexecution()); + __ load_const_optimized(Z_R1_scratch, dest); + __ z_br(Z_R1_scratch); + + return oop_maps; +} + +OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + // for better readability + const bool must_gc_arguments = true; + const bool dont_gc_arguments = false; + + // Default value; overwritten for some optimized stubs that are + // called from methods that do not use the fpu. + bool save_fpu_registers = true; + + // Stub code and info for the different stubs. + OopMapSet* oop_maps = NULL; + switch (id) { + case forward_exception_id: + { + oop_maps = generate_handle_exception(id, sasm); + // will not return + } + break; + + case new_instance_id: + case fast_new_instance_id: + case fast_new_instance_init_check_id: + { + Register klass = Z_R11; // Incoming + Register obj = Z_R2; // Result + + if (id == new_instance_id) { + __ set_info("new_instance", dont_gc_arguments); + } else if (id == fast_new_instance_id) { + __ set_info("fast new_instance", dont_gc_arguments); + } else { + assert(id == fast_new_instance_init_check_id, "bad StubID"); + __ set_info("fast new_instance init check", dont_gc_arguments); + } + + if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) && + UseTLAB && FastTLABRefill) { + // Sapjvm: must call RT to generate allocation events. + } + + OopMap* map = save_live_registers_except_r2(sasm); + int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r2(sasm); + + __ verify_oop(obj); + __ z_br(Z_R14); + } + break; + + case counter_overflow_id: + { + // Arguments : + // bci : stack param 0 + // method : stack param 1 + // + Register bci = Z_ARG2, method = Z_ARG3; + // frame size in bytes + OopMap* map = save_live_registers(sasm); + const int frame_size = sasm->frame_size() * VMRegImpl::slots_per_word * VMRegImpl::stack_slot_size; + __ z_lg(bci, 0*BytesPerWord + FrameMap::first_available_sp_in_frame + frame_size, Z_SP); + __ z_lg(method, 1*BytesPerWord + FrameMap::first_available_sp_in_frame + frame_size, Z_SP); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm); + __ z_br(Z_R14); + } + break; + case new_type_array_id: + case new_object_array_id: + { + Register length = Z_R13; // Incoming + Register klass = Z_R11; // Incoming + Register obj = Z_R2; // Result + + if (id == new_type_array_id) { + __ set_info("new_type_array", dont_gc_arguments); + } else { + __ set_info("new_object_array", dont_gc_arguments); + } + +#ifdef ASSERT + // Assert object type is really an array of the proper kind. + { + NearLabel ok; + Register t0 = obj; + __ mem2reg_opt(t0, Address(klass, Klass::layout_helper_offset()), false); + __ z_sra(t0, Klass::_lh_array_tag_shift); + int tag = ((id == new_type_array_id) + ? Klass::_lh_array_tag_type_value + : Klass::_lh_array_tag_obj_value); + __ compare32_and_branch(t0, tag, Assembler::bcondEqual, ok); + __ stop("assert(is an array klass)"); + __ should_not_reach_here(); + __ bind(ok); + } +#endif // ASSERT + + if (UseTLAB && FastTLABRefill) { + // sapjvm: must call RT to generate allocation events. + } + + OopMap* map = save_live_registers_except_r2(sasm); + int call_offset; + if (id == new_type_array_id) { + call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length); + } else { + call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length); + } + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r2(sasm); + + __ verify_oop(obj); + __ z_br(Z_R14); + } + break; + + case new_multi_array_id: + { __ set_info("new_multi_array", dont_gc_arguments); + // Z_R3,: klass + // Z_R4,: rank + // Z_R5: address of 1st dimension + OopMap* map = save_live_registers(sasm); + int call_offset = __ call_RT(Z_R2, noreg, CAST_FROM_FN_PTR(address, new_multi_array), Z_R3, Z_R4, Z_R5); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r2(sasm); + + // Z_R2,: new multi array + __ verify_oop(Z_R2); + __ z_br(Z_R14); + } + break; + + case register_finalizer_id: + { + __ set_info("register_finalizer", dont_gc_arguments); + + // Load the klass and check the has finalizer flag. + Register klass = Z_ARG2; + __ load_klass(klass, Z_ARG1); + __ testbit(Address(klass, Klass::access_flags_offset()), exact_log2(JVM_ACC_HAS_FINALIZER)); + __ z_bcr(Assembler::bcondAllZero, Z_R14); // Return if bit is not set. + + OopMap* oop_map = save_live_registers(sasm); + int call_offset = __ call_RT(noreg, noreg, + CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), Z_ARG1); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + + // Now restore all the live registers. + restore_live_registers(sasm); + + __ z_br(Z_R14); + } + break; + + case throw_range_check_failed_id: + { __ set_info("range_check_failed", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); + } + break; + + case throw_index_exception_id: + { __ set_info("index_range_check_failed", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); + } + break; + case throw_div0_exception_id: + { __ set_info("throw_div0_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); + } + break; + case throw_null_pointer_exception_id: + { __ set_info("throw_null_pointer_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); + } + break; + case handle_exception_nofpu_id: + case handle_exception_id: + { __ set_info("handle_exception", dont_gc_arguments); + oop_maps = generate_handle_exception(id, sasm); + } + break; + case handle_exception_from_callee_id: + { __ set_info("handle_exception_from_callee", dont_gc_arguments); + oop_maps = generate_handle_exception(id, sasm); + } + break; + case unwind_exception_id: + { __ set_info("unwind_exception", dont_gc_arguments); + // Note: no stubframe since we are about to leave the current + // activation and we are calling a leaf VM function only. + generate_unwind_exception(sasm); + } + break; + case throw_array_store_exception_id: + { __ set_info("throw_array_store_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); + } + break; + case throw_class_cast_exception_id: + { // Z_R1_scratch: object + __ set_info("throw_class_cast_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); + } + break; + case throw_incompatible_class_change_error_id: + { __ set_info("throw_incompatible_class_cast_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); + } + break; + case slow_subtype_check_id: + { + // Arguments : + // sub : stack param 0 + // super: stack param 1 + // raddr: Z_R14, blown by call + // + // Result : condition code 0 for match (bcondEqual will be true), + // condition code 2 for miss (bcondNotEqual will be true) + NearLabel miss; + const Register Rsubklass = Z_ARG2; // sub + const Register Rsuperklass = Z_ARG3; // super + + // No args, but tmp registers that are killed. + const Register Rlength = Z_ARG4; // cache array length + const Register Rarray_ptr = Z_ARG5; // Current value from cache array. + + if (UseCompressedOops) { + assert(Universe::heap() != NULL, "java heap must be initialized to generate partial_subtype_check stub"); + } + + const int frame_size = 4*BytesPerWord + frame::z_abi_160_size; + // Save return pc. This is not necessary, but could be helpful + // in the case of crashes. + __ save_return_pc(); + __ push_frame(frame_size); + // Save registers before changing them. + int i = 0; + __ z_stg(Rsubklass, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP); + __ z_stg(Rsuperklass, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP); + __ z_stg(Rlength, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP); + __ z_stg(Rarray_ptr, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP); + assert(i*BytesPerWord + frame::z_abi_160_size == frame_size, "check"); + + // Get sub and super from stack. + __ z_lg(Rsubklass, 0*BytesPerWord + FrameMap::first_available_sp_in_frame + frame_size, Z_SP); + __ z_lg(Rsuperklass, 1*BytesPerWord + FrameMap::first_available_sp_in_frame + frame_size, Z_SP); + + __ check_klass_subtype_slow_path(Rsubklass, Rsuperklass, Rarray_ptr, Rlength, NULL, &miss); + + // Match falls through here. + i = 0; + __ z_lg(Rsubklass, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP); + __ z_lg(Rsuperklass, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP); + __ z_lg(Rlength, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP); + __ z_lg(Rarray_ptr, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP); + assert(i*BytesPerWord + frame::z_abi_160_size == frame_size, "check"); + __ pop_frame(); + // Return pc is still in R_14. + __ clear_reg(Z_R0_scratch); // Zero indicates a match. Set CC 0 (bcondEqual will be true) + __ z_br(Z_R14); + + __ BIND(miss); + i = 0; + __ z_lg(Rsubklass, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP); + __ z_lg(Rsuperklass, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP); + __ z_lg(Rlength, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP); + __ z_lg(Rarray_ptr, (i++)*BytesPerWord + frame::z_abi_160_size, Z_SP); + assert(i*BytesPerWord + frame::z_abi_160_size == frame_size, "check"); + __ pop_frame(); + // return pc is still in R_14 + __ load_const_optimized(Z_R0_scratch, 1); // One indicates a miss. + __ z_ltgr(Z_R0_scratch, Z_R0_scratch); // Set CC 2 (bcondNotEqual will be true). + __ z_br(Z_R14); + } + break; + case monitorenter_nofpu_id: + case monitorenter_id: + { // Z_R1_scratch : object + // Z_R13 : lock address (see LIRGenerator::syncTempOpr()) + __ set_info("monitorenter", dont_gc_arguments); + + int save_fpu_registers = (id == monitorenter_id); + // Make a frame and preserve the caller's caller-save registers. + OopMap* oop_map = save_live_registers(sasm, save_fpu_registers); + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), Z_R1_scratch, Z_R13); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + restore_live_registers(sasm, save_fpu_registers); + + __ z_br(Z_R14); + } + break; + + case monitorexit_nofpu_id: + case monitorexit_id: + { // Z_R1_scratch : lock address + // Note: really a leaf routine but must setup last java sp + // => Use call_RT for now (speed can be improved by + // doing last java sp setup manually). + __ set_info("monitorexit", dont_gc_arguments); + + int save_fpu_registers = (id == monitorexit_id); + // Make a frame and preserve the caller's caller-save registers. + OopMap* oop_map = save_live_registers(sasm, save_fpu_registers); + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), Z_R1_scratch); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + restore_live_registers(sasm, save_fpu_registers); + + __ z_br(Z_R14); + } + break; + + case deoptimize_id: + { // Args: Z_R1_scratch: trap request + __ set_info("deoptimize", dont_gc_arguments); + Register trap_request = Z_R1_scratch; + OopMap* oop_map = save_live_registers(sasm); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), trap_request); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + restore_live_registers(sasm); + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + AddressLiteral dest(deopt_blob->unpack_with_reexecution()); + __ load_const_optimized(Z_R1_scratch, dest); + __ z_br(Z_R1_scratch); + } + break; + + case access_field_patching_id: + { __ set_info("access_field_patching", dont_gc_arguments); + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); + } + break; + + case load_klass_patching_id: + { __ set_info("load_klass_patching", dont_gc_arguments); + // We should set up register map. + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); + } + break; + + case load_mirror_patching_id: + { __ set_info("load_mirror_patching", dont_gc_arguments); + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); + } + break; + + case load_appendix_patching_id: + { __ set_info("load_appendix_patching", dont_gc_arguments); + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching)); + } + break; +#if 0 + case dtrace_object_alloc_id: + { // rax,: object + StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments); + // We can't gc here so skip the oopmap but make sure that all + // the live registers get saved. + save_live_registers(sasm, 1); + + __ NOT_LP64(push(rax)) LP64_ONLY(mov(c_rarg0, rax)); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc))); + NOT_LP64(__ pop(rax)); + + restore_live_registers(sasm); + } + break; + + case fpu2long_stub_id: + { + // rax, and rdx are destroyed, but should be free since the result is returned there + // preserve rsi,ecx + __ push(rsi); + __ push(rcx); + LP64_ONLY(__ push(rdx);) + + // check for NaN + Label return0, do_return, return_min_jlong, do_convert; + + Address value_high_word(rsp, wordSize + 4); + Address value_low_word(rsp, wordSize); + Address result_high_word(rsp, 3*wordSize + 4); + Address result_low_word(rsp, 3*wordSize); + + __ subptr(rsp, 32); // more than enough on 32bit + __ fst_d(value_low_word); + __ movl(rax, value_high_word); + __ andl(rax, 0x7ff00000); + __ cmpl(rax, 0x7ff00000); + __ jcc(Assembler::notEqual, do_convert); + __ movl(rax, value_high_word); + __ andl(rax, 0xfffff); + __ orl(rax, value_low_word); + __ jcc(Assembler::notZero, return0); + + __ bind(do_convert); + __ fnstcw(Address(rsp, 0)); + __ movzwl(rax, Address(rsp, 0)); + __ orl(rax, 0xc00); + __ movw(Address(rsp, 2), rax); + __ fldcw(Address(rsp, 2)); + __ fwait(); + __ fistp_d(result_low_word); + __ fldcw(Address(rsp, 0)); + __ fwait(); + // This gets the entire long in rax on 64bit + __ movptr(rax, result_low_word); + // testing of high bits + __ movl(rdx, result_high_word); + __ mov(rcx, rax); + // What the heck is the point of the next instruction??? + __ xorl(rcx, 0x0); + __ movl(rsi, 0x80000000); + __ xorl(rsi, rdx); + __ orl(rcx, rsi); + __ jcc(Assembler::notEqual, do_return); + __ fldz(); + __ fcomp_d(value_low_word); + __ fnstsw_ax(); + __ testl(rax, 0x4100); // ZF & CF == 0 + __ jcc(Assembler::equal, return_min_jlong); + // return max_jlong + __ mov64(rax, CONST64(0x7fffffffffffffff)); + __ jmp(do_return); + + __ bind(return_min_jlong); + __ mov64(rax, UCONST64(0x8000000000000000)); + __ jmp(do_return); + + __ bind(return0); + __ fpop(); + __ xorptr(rax, rax); + + __ bind(do_return); + __ addptr(rsp, 32); + LP64_ONLY(__ pop(rdx);) + __ pop(rcx); + __ pop(rsi); + __ ret(0); + } + break; +#endif // TODO + +#if INCLUDE_ALL_GCS + case g1_pre_barrier_slow_id: + { // Z_R1_scratch: previous value of memory + + BarrierSet* bs = Universe::heap()->barrier_set(); + if (bs->kind() != BarrierSet::G1SATBCTLogging) { + __ should_not_reach_here(FILE_AND_LINE); + break; + } + + __ set_info("g1_pre_barrier_slow_id", dont_gc_arguments); + + Register pre_val = Z_R1_scratch; + Register tmp = Z_R6; // Must be non-volatile because it is used to save pre_val. + Register tmp2 = Z_R7; + + Label refill, restart; + int satb_q_index_byte_offset = + in_bytes(JavaThread::satb_mark_queue_offset() + + SATBMarkQueue::byte_offset_of_index()); + int satb_q_buf_byte_offset = + in_bytes(JavaThread::satb_mark_queue_offset() + + SATBMarkQueue::byte_offset_of_buf()); + + // Save tmp registers (see assertion in G1PreBarrierStub::emit_code()). + __ z_stg(tmp, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP); + __ z_stg(tmp2, 1*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP); + + __ bind(restart); + // Load the index into the SATB buffer. SATBMarkQueue::_index is a + // size_t so ld_ptr is appropriate. + __ z_ltg(tmp, satb_q_index_byte_offset, Z_R0, Z_thread); + + // index == 0? + __ z_brz(refill); + + __ z_lg(tmp2, satb_q_buf_byte_offset, Z_thread); + __ add2reg(tmp, -oopSize); + + __ z_stg(pre_val, 0, tmp, tmp2); // [_buf + index] := + __ z_stg(tmp, satb_q_index_byte_offset, Z_thread); + + // Restore tmp registers (see assertion in G1PreBarrierStub::emit_code()). + __ z_lg(tmp, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP); + __ z_lg(tmp2, 1*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP); + __ z_br(Z_R14); + + __ bind(refill); + save_volatile_registers(sasm); + __ z_lgr(tmp, pre_val); // save pre_val + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SATBMarkQueueSet::handle_zero_index_for_thread), + Z_thread); + __ z_lgr(pre_val, tmp); // restore pre_val + restore_volatile_registers(sasm); + __ z_bru(restart); + } + break; + + case g1_post_barrier_slow_id: + { // Z_R1_scratch: oop address, address of updated memory slot + BarrierSet* bs = Universe::heap()->barrier_set(); + if (bs->kind() != BarrierSet::G1SATBCTLogging) { + __ should_not_reach_here(FILE_AND_LINE); + break; + } + + __ set_info("g1_post_barrier_slow_id", dont_gc_arguments); + + Register addr_oop = Z_R1_scratch; + Register addr_card = Z_R1_scratch; + Register r1 = Z_R6; // Must be saved/restored. + Register r2 = Z_R7; // Must be saved/restored. + Register cardtable = r1; // Must be non-volatile, because it is used to save addr_card. + jbyte* byte_map_base = ((CardTableModRefBS*)bs)->byte_map_base; + + // Save registers used below (see assertion in G1PreBarrierStub::emit_code()). + __ z_stg(r1, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP); + + Label not_already_dirty, restart, refill, young_card; + + // Calculate address of card corresponding to the updated oop slot. + AddressLiteral rs(byte_map_base); + __ z_srlg(addr_card, addr_oop, CardTableModRefBS::card_shift); + addr_oop = noreg; // dead now + __ load_const_optimized(cardtable, rs); // cardtable := + __ z_agr(addr_card, cardtable); // addr_card := addr_oop>>card_shift + cardtable + + __ z_cli(0, addr_card, (int)G1SATBCardTableModRefBS::g1_young_card_val()); + __ z_bre(young_card); + + __ z_sync(); // Required to support concurrent cleaning. + + __ z_cli(0, addr_card, (int)CardTableModRefBS::dirty_card_val()); + __ z_brne(not_already_dirty); + + __ bind(young_card); + // We didn't take the branch, so we're already dirty: restore + // used registers and return. + __ z_lg(r1, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP); + __ z_br(Z_R14); + + // Not dirty. + __ bind(not_already_dirty); + + // First, dirty it: [addr_card] := 0 + __ z_mvi(0, addr_card, CardTableModRefBS::dirty_card_val()); + + Register idx = cardtable; // Must be non-volatile, because it is used to save addr_card. + Register buf = r2; + cardtable = noreg; // now dead + + // Save registers used below (see assertion in G1PreBarrierStub::emit_code()). + __ z_stg(r2, 1*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP); + + ByteSize dirty_card_q_index_byte_offset = + JavaThread::dirty_card_queue_offset() + DirtyCardQueue::byte_offset_of_index(); + ByteSize dirty_card_q_buf_byte_offset = + JavaThread::dirty_card_queue_offset() + DirtyCardQueue::byte_offset_of_buf(); + + __ bind(restart); + + // Get the index into the update buffer. DirtyCardQueue::_index is + // a size_t so z_ltg is appropriate here. + __ z_ltg(idx, Address(Z_thread, dirty_card_q_index_byte_offset)); + + // index == 0? + __ z_brz(refill); + + __ z_lg(buf, Address(Z_thread, dirty_card_q_buf_byte_offset)); + __ add2reg(idx, -oopSize); + + __ z_stg(addr_card, 0, idx, buf); // [_buf + index] := + __ z_stg(idx, Address(Z_thread, dirty_card_q_index_byte_offset)); + // Restore killed registers and return. + __ z_lg(r1, 0*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP); + __ z_lg(r2, 1*BytesPerWord + FrameMap::first_available_sp_in_frame, Z_SP); + __ z_br(Z_R14); + + __ bind(refill); + save_volatile_registers(sasm); + __ z_lgr(idx, addr_card); // Save addr_card, tmp3 must be non-volatile. + __ call_VM_leaf(CAST_FROM_FN_PTR(address, DirtyCardQueueSet::handle_zero_index_for_thread), + Z_thread); + __ z_lgr(addr_card, idx); + restore_volatile_registers(sasm); // Restore addr_card. + __ z_bru(restart); + } + break; +#endif // INCLUDE_ALL_GCS + case predicate_failed_trap_id: + { + __ set_info("predicate_failed_trap", dont_gc_arguments); + + OopMap* map = save_live_registers(sasm); + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap)); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm); + + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + + __ load_const_optimized(Z_R1_scratch, deopt_blob->unpack_with_reexecution()); + __ z_br(Z_R1_scratch); + } + break; + + default: + { + __ should_not_reach_here(FILE_AND_LINE, id); + } + break; + } + return oop_maps; +} + +OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { + __ block_comment("generate_handle_exception"); + + // incoming parameters: Z_EXC_OOP, Z_EXC_PC + + // Save registers if required. + OopMapSet* oop_maps = new OopMapSet(); + OopMap* oop_map = NULL; + Register reg_fp = Z_R1_scratch; + + switch (id) { + case forward_exception_id: { + // We're handling an exception in the context of a compiled frame. + // The registers have been saved in the standard places. Perform + // an exception lookup in the caller and dispatch to the handler + // if found. Otherwise unwind and dispatch to the callers + // exception handler. + oop_map = generate_oop_map(sasm); + + // Load and clear pending exception oop into. + __ z_lg(Z_EXC_OOP, Address(Z_thread, Thread::pending_exception_offset())); + __ clear_mem(Address(Z_thread, Thread::pending_exception_offset()), 8); + + // Different stubs forward their exceptions; they should all have similar frame layouts + // (a) to find their return address (b) for a correct oop_map generated above. + assert(RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers) == + RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers_except_r2), "requirement"); + + // Load issuing PC (the return address for this stub). + const int frame_size_in_bytes = sasm->frame_size() * VMRegImpl::slots_per_word * VMRegImpl::stack_slot_size; + __ z_lg(Z_EXC_PC, Address(Z_SP, frame_size_in_bytes + _z_abi16(return_pc))); + DEBUG_ONLY(__ z_lay(reg_fp, Address(Z_SP, frame_size_in_bytes));) + + // Make sure that the vm_results are cleared (may be unnecessary). + __ clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(oop)); + __ clear_mem(Address(Z_thread, JavaThread::vm_result_2_offset()), sizeof(Metadata*)); + break; + } + case handle_exception_nofpu_id: + case handle_exception_id: + // At this point all registers MAY be live. + DEBUG_ONLY(__ z_lgr(reg_fp, Z_SP);) + oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id, Z_EXC_PC); + break; + case handle_exception_from_callee_id: { + // At this point all registers except Z_EXC_OOP and Z_EXC_PC are dead. + DEBUG_ONLY(__ z_lgr(reg_fp, Z_SP);) + __ save_return_pc(Z_EXC_PC); + const int frame_size_in_bytes = __ push_frame_abi160(0); + oop_map = new OopMap(frame_size_in_bytes / VMRegImpl::stack_slot_size, 0); + sasm->set_frame_size(frame_size_in_bytes / BytesPerWord); + break; + } + default: ShouldNotReachHere(); + } + + // Verify that only Z_EXC_OOP, and Z_EXC_PC are valid at this time. + __ invalidate_registers(Z_EXC_OOP, Z_EXC_PC, reg_fp); + // Verify that Z_EXC_OOP, contains a valid exception. + __ verify_not_null_oop(Z_EXC_OOP); + + // Check that fields in JavaThread for exception oop and issuing pc + // are empty before writing to them. + __ asm_assert_mem8_is_zero(in_bytes(JavaThread::exception_oop_offset()), Z_thread, "exception oop already set : " FILE_AND_LINE, 0); + __ asm_assert_mem8_is_zero(in_bytes(JavaThread::exception_pc_offset()), Z_thread, "exception pc already set : " FILE_AND_LINE, 0); + + // Save exception oop and issuing pc into JavaThread. + // (Exception handler will load it from here.) + __ z_stg(Z_EXC_OOP, Address(Z_thread, JavaThread::exception_oop_offset())); + __ z_stg(Z_EXC_PC, Address(Z_thread, JavaThread::exception_pc_offset())); + +#ifdef ASSERT + { NearLabel ok; + __ z_cg(Z_EXC_PC, Address(reg_fp, _z_abi16(return_pc))); + __ branch_optimized(Assembler::bcondEqual, ok); + __ stop("use throwing pc as return address (has bci & oop map)"); + __ bind(ok); + } +#endif + + // Compute the exception handler. + // The exception oop and the throwing pc are read from the fields in JavaThread. + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc)); + oop_maps->add_gc_map(call_offset, oop_map); + + // Z_RET(Z_R2): handler address + // will be the deopt blob if nmethod was deoptimized while we looked up + // handler regardless of whether handler existed in the nmethod. + + // Only Z_R2, is valid at this time, all other registers have been destroyed by the runtime call. + __ invalidate_registers(Z_R2); + + switch(id) { + case forward_exception_id: + case handle_exception_nofpu_id: + case handle_exception_id: + // Restore the registers that were saved at the beginning. + __ z_lgr(Z_R1_scratch, Z_R2); // Restoring live registers kills Z_R2. + restore_live_registers(sasm, id != handle_exception_nofpu_id); // Pops as well the frame. + __ z_br(Z_R1_scratch); + break; + case handle_exception_from_callee_id: { + __ pop_frame(); + __ z_br(Z_R2); // Jump to exception handler. + } + break; + default: ShouldNotReachHere(); + } + + return oop_maps; +} + + +#undef __ + +const char *Runtime1::pd_name_for_address(address entry) { + return ""; +} diff --git a/hotspot/src/cpu/s390/vm/c1_globals_s390.hpp b/hotspot/src/cpu/s390/vm/c1_globals_s390.hpp new file mode 100644 index 00000000000..fac7d539f33 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/c1_globals_s390.hpp @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_C1_GLOBALS_S390_HPP +#define CPU_S390_VM_C1_GLOBALS_S390_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the client compiler. +// (see c1_globals.hpp) +// Flags sorted according to sparc. + +#ifndef TIERED +define_pd_global(bool, BackgroundCompilation, true); +define_pd_global(bool, CICompileOSR, true); +define_pd_global(bool, InlineIntrinsics, true); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, false); +define_pd_global(bool, UseOnStackReplacement, true); +define_pd_global(bool, TieredCompilation, false); +define_pd_global(intx, CompileThreshold, 1000); + +define_pd_global(intx, OnStackReplacePercentage, 1400); +define_pd_global(bool, UseTLAB, true); +define_pd_global(bool, ProfileInterpreter, false); +define_pd_global(intx, FreqInlineSize, 325); +define_pd_global(bool, ResizeTLAB, true); +define_pd_global(intx, ReservedCodeCacheSize, 32*M); +define_pd_global(uintx, NonProfiledCodeHeapSize, 13*M); +define_pd_global(uintx, ProfiledCodeHeapSize, 14*M); +define_pd_global(uintx, NonNMethodCodeHeapSize, 5*M); +define_pd_global(uintx, CodeCacheExpansionSize, 32*K); +define_pd_global(uintx, CodeCacheMinBlockLength, 1); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); +define_pd_global(size_t, MetaspaceSize, 12*M); +define_pd_global(bool, NeverActAsServerClassMachine, true); +define_pd_global(size_t, NewSizeThreadIncrease, 16*K); +define_pd_global(uint64_t, MaxRAM, 1ULL*G); +define_pd_global(uintx, InitialCodeCacheSize, 160*K); +#endif // !TIERED + +define_pd_global(bool, UseTypeProfile, false); +define_pd_global(bool, RoundFPResults, false); + +define_pd_global(bool, LIRFillDelaySlots, false); +define_pd_global(bool, OptimizeSinglePrecision, false); +define_pd_global(bool, CSEArrayLength, true); +define_pd_global(bool, TwoOperandLIRForm, true); + +#endif // CPU_S390_VM_C1_GLOBALS_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/c2_globals_s390.hpp b/hotspot/src/cpu/s390/vm/c2_globals_s390.hpp new file mode 100644 index 00000000000..2c00ec86521 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/c2_globals_s390.hpp @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_C2_GLOBALS_S390_HPP +#define CPU_S390_VM_C2_GLOBALS_S390_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the server compiler. +// (see c2_globals.hpp). +// Sorted according to sparc. + +define_pd_global(bool, BackgroundCompilation, true); +define_pd_global(bool, CICompileOSR, true); +define_pd_global(bool, InlineIntrinsics, true); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, true); +define_pd_global(bool, UseOnStackReplacement, true); +define_pd_global(bool, ProfileInterpreter, true); +define_pd_global(bool, TieredCompilation, trueInTiered); +define_pd_global(intx, CompileThreshold, 10000); + +define_pd_global(intx, OnStackReplacePercentage, 140); +define_pd_global(intx, ConditionalMoveLimit, 4); +define_pd_global(intx, FLOATPRESSURE, 15); +define_pd_global(intx, FreqInlineSize, 175); +// 10 prevents spill-split-recycle sanity check in JVM2008.xml.transform. +define_pd_global(intx, INTPRESSURE, 10); // Medium size register set, 6 special purpose regs, 3 SOE regs. +define_pd_global(intx, InteriorEntryAlignment, 2); +define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K)); +define_pd_global(intx, RegisterCostAreaRatio, 12000); +define_pd_global(bool, UseTLAB, true); +define_pd_global(bool, ResizeTLAB, true); +define_pd_global(intx, LoopUnrollLimit, 60); +define_pd_global(intx, LoopPercentProfileLimit, 10); +define_pd_global(intx, PostLoopMultiversioning, false); +define_pd_global(intx, MinJumpTableSize, 18); + +// Peephole and CISC spilling both break the graph, and so makes the +// scheduler sick. +define_pd_global(bool, OptoPeephole, false); +define_pd_global(bool, UseCISCSpill, true); +define_pd_global(bool, OptoBundling, false); +define_pd_global(bool, OptoScheduling, false); +define_pd_global(bool, OptoRegScheduling, false); +define_pd_global(bool, SuperWordLoopUnrollAnalysis, false); +// On s390x, we can clear the array with a single instruction, +// so don't idealize it. +define_pd_global(bool, IdealizeClearArrayNode, false); + +// InitialCodeCacheSize derived from specjbb2000 run. +define_pd_global(uintx, InitialCodeCacheSize, 2048*K); // Integral multiple of CodeCacheExpansionSize +define_pd_global(uintx, ReservedCodeCacheSize, 48*M); +define_pd_global(uintx, NonProfiledCodeHeapSize, 21*M); +define_pd_global(uintx, ProfiledCodeHeapSize, 22*M); +define_pd_global(uintx, NonNMethodCodeHeapSize, 5*M); +define_pd_global(uintx, CodeCacheExpansionSize, 64*K); + +// Ergonomics related flags +define_pd_global(uint64_t, MaxRAM, 128ULL*G); +define_pd_global(uintx, CodeCacheMinBlockLength, 4); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); + +define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed on z/Architecture. + +// Heap related flags +define_pd_global(size_t, MetaspaceSize, ScaleForWordSize(16*M)); + +// Ergonomics related flags +define_pd_global(bool, NeverActAsServerClassMachine, false); + +#endif // CPU_S390_VM_C2_GLOBALS_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/c2_init_s390.cpp b/hotspot/src/cpu/s390/vm/c2_init_s390.cpp new file mode 100644 index 00000000000..d2fa9f07f78 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/c2_init_s390.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "opto/compile.hpp" +#include "opto/node.hpp" + +// Processor dependent initialization for z/Architecture. + +void Compile::pd_compiler2_init() { + guarantee(CodeEntryAlignment >= InteriorEntryAlignment, ""); +} diff --git a/hotspot/src/cpu/s390/vm/codeBuffer_s390.hpp b/hotspot/src/cpu/s390/vm/codeBuffer_s390.hpp new file mode 100644 index 00000000000..1a32f94f4f1 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/codeBuffer_s390.hpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_CODEBUFFER_S390_HPP +#define CPU_S390_VM_CODEBUFFER_S390_HPP + + private: + void pd_initialize() {} + + public: + void flush_bundle(bool start_new_bundle) {} + + void getCpuData(const CodeBuffer * const cb) {} + +#endif // CPU_S390_VM_CODEBUFFER_S390_HPP + + diff --git a/hotspot/src/cpu/s390/vm/compiledIC_s390.cpp b/hotspot/src/cpu/s390/vm/compiledIC_s390.cpp new file mode 100644 index 00000000000..33e383b30e6 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/compiledIC_s390.cpp @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/compiledIC.hpp" +#include "code/icBuffer.hpp" +#include "code/nmethod.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/safepoint.hpp" +#ifdef COMPILER2 +#include "opto/matcher.hpp" +#endif + +// ---------------------------------------------------------------------------- + +#undef __ +#define __ _masm. + +address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark/* = NULL*/) { +#ifdef COMPILER2 + // Stub is fixed up when the corresponding call is converted from calling + // compiled code to calling interpreted code. + if (mark == NULL) { + // Get the mark within main instrs section which is set to the address of the call. + mark = cbuf.insts_mark(); + } + assert(mark != NULL, "mark must not be NULL"); + + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a stub. + MacroAssembler _masm(&cbuf); + + address stub = __ start_a_stub(Compile::MAX_stubs_size); + if (stub == NULL) { + return NULL; // CodeBuffer::expand failed. + } + __ relocate(static_stub_Relocation::spec(mark)); + + AddressLiteral meta = __ allocate_metadata_address(NULL); + bool success = __ load_const_from_toc(as_Register(Matcher::inline_cache_reg_encode()), meta); + + __ set_inst_mark(); + AddressLiteral a((address)-1); + success = success && __ load_const_from_toc(Z_R1, a); + if (!success) { + return NULL; // CodeCache is full. + } + + __ z_br(Z_R1); + __ end_a_stub(); // Update current stubs pointer and restore insts_end. + return stub; +#else + ShouldNotReachHere(); +#endif +} + +#undef __ + +int CompiledStaticCall::to_interp_stub_size() { + return 2 * MacroAssembler::load_const_from_toc_size() + + 2; // branch +} + +// Relocation entries for call stub, compiled java to interpreter. +int CompiledStaticCall::reloc_to_interp_stub() { + return 5; // 4 in emit_java_to_interp + 1 in Java_Static_Call +} + +void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry) { + address stub = find_stub(); + guarantee(stub != NULL, "stub not found"); + + if (TraceICs) { + ResourceMark rm; + tty->print_cr("CompiledStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", + p2i(instruction_address()), + callee->name_and_sig_as_C_string()); + } + + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + get_IC_pos_in_java_to_interp_stub()); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + + // A generated lambda form might be deleted from the Lambdaform + // cache in MethodTypeForm. If a jit compiled lambdaform method + // becomes not entrant and the cache access returns null, the new + // resolve will lead to a new generated LambdaForm. + + assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee() || callee->is_compiled_lambda_form(), + "a) MT-unsafe modification of inline cache"); + assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry, + "b) MT-unsafe modification of inline cache"); + + // Update stub. + method_holder->set_data((intptr_t)callee()); + jump->set_jump_destination(entry); + + // Update jump to call. + set_destination_mt_safe(stub); +} + +void CompiledStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { + assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); + // Reset stub. + address stub = static_stub->addr(); + assert(stub != NULL, "stub not found"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + get_IC_pos_in_java_to_interp_stub()); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + method_holder->set_data(0); + jump->set_jump_destination((address)-1); +} + +//----------------------------------------------------------------------------- + +#ifndef PRODUCT + +void CompiledStaticCall::verify() { + // Verify call. + NativeCall::verify(); + if (os::is_MP()) { + verify_alignment(); + } + + // Verify stub. + address stub = find_stub(); + assert(stub != NULL, "no stub found for static call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + get_IC_pos_in_java_to_interp_stub()); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + + // Verify state. + assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); +} + +#endif // !PRODUCT diff --git a/hotspot/src/cpu/s390/vm/copy_s390.hpp b/hotspot/src/cpu/s390/vm/copy_s390.hpp new file mode 100644 index 00000000000..d2431d982b3 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/copy_s390.hpp @@ -0,0 +1,1134 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// Major contributions by LS + +#ifndef CPU_S390_VM_COPY_S390_HPP +#define CPU_S390_VM_COPY_S390_HPP + +// Inline functions for memory copy and fill. + +// HeapWordSize (the size of class HeapWord) is 8 Bytes (the size of a +// pointer variable), since we always run the _LP64 model. As a consequence, +// HeapWord* memory ranges are always assumed to be doubleword-aligned, +// having a size which is an integer multiple of HeapWordSize. +// +// Dealing only with doubleword-aligned doubleword units has important +// positive performance and data access consequences. Many of the move +// instructions perform particularly well under these circumstances. +// Data access is "doubleword-concurrent", except for MVC and XC. +// Furthermore, data access can be forced to be sequential (MVCL and MVCLE) +// by use of the special padding byte 0xb1, where required. For copying, +// we use padding byte 0xb0 to prevent the D-cache from being polluted. +// +// On z/Architecture, gcc optimizes memcpy into a series of MVC instructions. +// This is optimal, even if just one HeapWord is copied. However, MVC +// copying is not atomic, i.e. not "doubleword concurrent" by definition. +// +// If the -mmvcle compiler option is specified, memcpy translates into +// code such that the entire memory range is copied or preset with just +// one MVCLE instruction. +// +// *to = *from is transformed into a MVC instruction already with -O1. +// Thus, for atomic copy operations, (inline) assembler code is required +// to guarantee atomic data accesses. +// +// For large (len >= MVCLEThreshold) chunks of memory, we exploit +// special H/W support of z/Architecture: +// 1) copy short piece of memory to page-align address(es) +// 2) copy largest part (all contained full pages) of memory using mvcle instruction. +// z/Architecture processors have special H/W support for page-aligned storage +// where len is an int multiple of page size. In that case, up to 4 cache lines are +// processed in parallel and L1 cache is not polluted. +// 3) copy the remaining piece of memory. +// +// Measurement classifications: +// very rare - <= 10.000 calls AND <= 1.000 usec elapsed +// rare - <= 100.000 calls AND <= 10.000 usec elapsed +// some - <= 1.000.000 calls AND <= 100.000 usec elapsed +// freq - <= 10.000.000 calls AND <= 1.000.000 usec elapsed +// very freq - > 10.000.000 calls OR > 1.000.000 usec elapsed + +#undef USE_INLINE_ASM + +static void copy_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) { + if (from > to) { + while (count-- > 0) { + // Copy forwards + *to++ = *from++; + } + } else { + from += count - 1; + to += count - 1; + while (count-- > 0) { + // Copy backwards + *to-- = *from--; + } + } +} + +static void copy_conjoint_jints_atomic(jint* from, jint* to, size_t count) { + if (from > to) { + while (count-- > 0) { + // Copy forwards + *to++ = *from++; + } + } else { + from += count - 1; + to += count - 1; + while (count-- > 0) { + // Copy backwards + *to-- = *from--; + } + } +} + +static bool has_destructive_overlap(char* from, char* to, size_t byte_count) { + return (from < to) && ((to-from) < (ptrdiff_t)byte_count); +} + +#ifdef USE_INLINE_ASM + + //-------------------------------------------------------------- + // Atomic copying. Atomicity is given by the minimum of source + // and target alignment. Refer to mail comm with Tim Slegel/IBM. + // Only usable for disjoint source and target. + //-------------------------------------------------------------- + #define MOVE8_ATOMIC_4(_to,_from) { \ + unsigned long toaddr; \ + unsigned long fromaddr; \ + asm( \ + "LG %[toaddr],%[to] \n\t" /* address of to area */ \ + "LG %[fromaddr],%[from] \n\t" /* address of from area */ \ + "MVC 0(32,%[toaddr]),0(%[fromaddr]) \n\t" /* move data */ \ + : [to] "+Q" (_to) /* outputs */ \ + , [from] "+Q" (_from) \ + , [toaddr] "=a" (toaddr) \ + , [fromaddr] "=a" (fromaddr) \ + : \ + : "cc" /* clobbered */ \ + ); \ + } + #define MOVE8_ATOMIC_3(_to,_from) { \ + unsigned long toaddr; \ + unsigned long fromaddr; \ + asm( \ + "LG %[toaddr],%[to] \n\t" /* address of to area */ \ + "LG %[fromaddr],%[from] \n\t" /* address of from area */ \ + "MVC 0(24,%[toaddr]),0(%[fromaddr]) \n\t" /* move data */ \ + : [to] "+Q" (_to) /* outputs */ \ + , [from] "+Q" (_from) \ + , [toaddr] "=a" (toaddr) \ + , [fromaddr] "=a" (fromaddr) \ + : \ + : "cc" /* clobbered */ \ + ); \ + } + #define MOVE8_ATOMIC_2(_to,_from) { \ + unsigned long toaddr; \ + unsigned long fromaddr; \ + asm( \ + "LG %[toaddr],%[to] \n\t" /* address of to area */ \ + "LG %[fromaddr],%[from] \n\t" /* address of from area */ \ + "MVC 0(16,%[toaddr]),0(%[fromaddr]) \n\t" /* move data */ \ + : [to] "+Q" (_to) /* outputs */ \ + , [from] "+Q" (_from) \ + , [toaddr] "=a" (toaddr) \ + , [fromaddr] "=a" (fromaddr) \ + : \ + : "cc" /* clobbered */ \ + ); \ + } + #define MOVE8_ATOMIC_1(_to,_from) { \ + unsigned long toaddr; \ + unsigned long fromaddr; \ + asm( \ + "LG %[toaddr],%[to] \n\t" /* address of to area */ \ + "LG %[fromaddr],%[from] \n\t" /* address of from area */ \ + "MVC 0(8,%[toaddr]),0(%[fromaddr]) \n\t" /* move data */ \ + : [to] "+Q" (_to) /* outputs */ \ + , [from] "+Q" (_from) \ + , [toaddr] "=a" (toaddr) \ + , [fromaddr] "=a" (fromaddr) \ + : \ + : "cc" /* clobbered */ \ + ); \ + } + + //-------------------------------------------------------------- + // Atomic copying of 8-byte entities. + // Conjoint/disjoint property does not matter. Entities are first + // loaded and then stored. + // _to and _from must be 8-byte aligned. + //-------------------------------------------------------------- + #define COPY8_ATOMIC_4(_to,_from) { \ + unsigned long toaddr; \ + asm( \ + "LG 3,%[from] \n\t" /* address of from area */ \ + "LG %[toaddr],%[to] \n\t" /* address of to area */ \ + "LMG 0,3,0(3) \n\t" /* load data */ \ + "STMG 0,3,0(%[toaddr]) \n\t" /* store data */ \ + : [to] "+Q" (_to) /* outputs */ \ + , [from] "+Q" (_from) /* outputs */ \ + , [toaddr] "=a" (toaddr) /* inputs */ \ + : \ + : "cc", "r0", "r1", "r2", "r3" /* clobbered */ \ + ); \ + } + #define COPY8_ATOMIC_3(_to,_from) { \ + unsigned long toaddr; \ + asm( \ + "LG 2,%[from] \n\t" /* address of from area */ \ + "LG %[toaddr],%[to] \n\t" /* address of to area */ \ + "LMG 0,2,0(2) \n\t" /* load data */ \ + "STMG 0,2,0(%[toaddr]) \n\t" /* store data */ \ + : [to] "+Q" (_to) /* outputs */ \ + , [from] "+Q" (_from) /* outputs */ \ + , [toaddr] "=a" (toaddr) /* inputs */ \ + : \ + : "cc", "r0", "r1", "r2" /* clobbered */ \ + ); \ + } + #define COPY8_ATOMIC_2(_to,_from) { \ + unsigned long toaddr; \ + asm( \ + "LG 1,%[from] \n\t" /* address of from area */ \ + "LG %[toaddr],%[to] \n\t" /* address of to area */ \ + "LMG 0,1,0(1) \n\t" /* load data */ \ + "STMG 0,1,0(%[toaddr]) \n\t" /* store data */ \ + : [to] "+Q" (_to) /* outputs */ \ + , [from] "+Q" (_from) /* outputs */ \ + , [toaddr] "=a" (toaddr) /* inputs */ \ + : \ + : "cc", "r0", "r1" /* clobbered */ \ + ); \ + } + #define COPY8_ATOMIC_1(_to,_from) { \ + unsigned long addr; \ + asm( \ + "LG %[addr],%[from] \n\t" /* address of from area */ \ + "LG 0,0(0,%[addr]) \n\t" /* load data */ \ + "LG %[addr],%[to] \n\t" /* address of to area */ \ + "STG 0,0(0,%[addr]) \n\t" /* store data */ \ + : [to] "+Q" (_to) /* outputs */ \ + , [from] "+Q" (_from) /* outputs */ \ + , [addr] "=a" (addr) /* inputs */ \ + : \ + : "cc", "r0" /* clobbered */ \ + ); \ + } + + //-------------------------------------------------------------- + // Atomic copying of 4-byte entities. + // Exactly 4 (four) entities are copied. + // Conjoint/disjoint property does not matter. Entities are first + // loaded and then stored. + // _to and _from must be 4-byte aligned. + //-------------------------------------------------------------- + #define COPY4_ATOMIC_4(_to,_from) { \ + unsigned long toaddr; \ + asm( \ + "LG 3,%[from] \n\t" /* address of from area */ \ + "LG %[toaddr],%[to] \n\t" /* address of to area */ \ + "LM 0,3,0(3) \n\t" /* load data */ \ + "STM 0,3,0(%[toaddr]) \n\t" /* store data */ \ + : [to] "+Q" (_to) /* outputs */ \ + , [from] "+Q" (_from) /* outputs */ \ + , [toaddr] "=a" (toaddr) /* inputs */ \ + : \ + : "cc", "r0", "r1", "r2", "r3" /* clobbered */ \ + ); \ + } + #define COPY4_ATOMIC_3(_to,_from) { \ + unsigned long toaddr; \ + asm( \ + "LG 2,%[from] \n\t" /* address of from area */ \ + "LG %[toaddr],%[to] \n\t" /* address of to area */ \ + "LM 0,2,0(2) \n\t" /* load data */ \ + "STM 0,2,0(%[toaddr]) \n\t" /* store data */ \ + : [to] "+Q" (_to) /* outputs */ \ + , [from] "+Q" (_from) /* outputs */ \ + , [toaddr] "=a" (toaddr) /* inputs */ \ + : \ + : "cc", "r0", "r1", "r2" /* clobbered */ \ + ); \ + } + #define COPY4_ATOMIC_2(_to,_from) { \ + unsigned long toaddr; \ + asm( \ + "LG 1,%[from] \n\t" /* address of from area */ \ + "LG %[toaddr],%[to] \n\t" /* address of to area */ \ + "LM 0,1,0(1) \n\t" /* load data */ \ + "STM 0,1,0(%[toaddr]) \n\t" /* store data */ \ + : [to] "+Q" (_to) /* outputs */ \ + , [from] "+Q" (_from) /* outputs */ \ + , [toaddr] "=a" (toaddr) /* inputs */ \ + : \ + : "cc", "r0", "r1" /* clobbered */ \ + ); \ + } + #define COPY4_ATOMIC_1(_to,_from) { \ + unsigned long addr; \ + asm( \ + "LG %[addr],%[from] \n\t" /* address of from area */ \ + "L 0,0(0,%[addr]) \n\t" /* load data */ \ + "LG %[addr],%[to] \n\t" /* address of to area */ \ + "ST 0,0(0,%[addr]) \n\t" /* store data */ \ + : [to] "+Q" (_to) /* outputs */ \ + , [from] "+Q" (_from) /* outputs */ \ + , [addr] "=a" (addr) /* inputs */ \ + : \ + : "cc", "r0" /* clobbered */ \ + ); \ + } + +#if 0 // Waiting for gcc to support EXRL. + #define MVC_MEMCOPY(_to,_from,_len) \ + if (VM_Version::has_ExecuteExtensions()) { \ + asm("\t" \ + " LAY 1,-1(0,%[len]) \n\t" /* decr for MVC */ \ + " EXRL 1,1f \n\t" /* execute MVC instr */ \ + " BRC 15,2f \n\t" /* skip template */ \ + "1: MVC 0(%[len],%[to]),0(%[from]) \n\t" \ + "2: BCR 0,0 \n\t" \ + : [to] "+Q" (_to) /* outputs */ \ + , [from] "+Q" (_from) /* outputs */ \ + : [len] "r" (_len) /* inputs */ \ + : "cc", "r1" /* clobbered */ \ + ); \ + } else { \ + asm("\t" \ + " LARL 2,3f \n\t" \ + " LAY 1,-1(0,%[len]) \n\t" /* decr for MVC */ \ + " EX 1,0(2) \n\t" /* execute MVC instr */ \ + " BRC 15,4f \n\t" /* skip template */ \ + "3: MVC 0(%[len],%[to]),0(%[from]) \n\t" \ + "4: BCR 0,0 \n\t" \ + : [to] "+Q" (_to) /* outputs */ \ + , [from] "+Q" (_from) /* outputs */ \ + : [len] "r" (_len) /* inputs */ \ + : "cc", "r1", "r2" /* clobbered */ \ + ); \ + } +#else + #define MVC_MEMCOPY(_to,_from,_len) \ + { unsigned long toaddr; unsigned long tolen; \ + unsigned long fromaddr; unsigned long target; \ + asm("\t" \ + " LTGR %[tolen],%[len] \n\t" /* decr for MVC */ \ + " BRC 8,2f \n\t" /* do nothing for l=0*/ \ + " AGHI %[tolen],-1 \n\t" \ + " LG %[toaddr],%[to] \n\t" \ + " LG %[fromaddr],%[from] \n\t" \ + " LARL %[target],1f \n\t" /* addr of MVC instr */ \ + " EX %[tolen],0(%[target]) \n\t" /* execute MVC instr */ \ + " BRC 15,2f \n\t" /* skip template */ \ + "1: MVC 0(1,%[toaddr]),0(%[fromaddr]) \n\t" \ + "2: BCR 0,0 \n\t" /* nop a branch target*/\ + : [to] "+Q" (_to) /* outputs */ \ + , [from] "+Q" (_from) \ + , [tolen] "=a" (tolen) \ + , [toaddr] "=a" (toaddr) \ + , [fromaddr] "=a" (fromaddr) \ + , [target] "=a" (target) \ + : [len] "r" (_len) /* inputs */ \ + : "cc" /* clobbered */ \ + ); \ + } +#endif + + #if 0 // code snippet to be used for debugging + /* ASSERT code BEGIN */ \ + " LARL %[len],5f \n\t" \ + " LARL %[mta],4f \n\t" \ + " SLGR %[len],%[mta] \n\t" \ + " CGHI %[len],16 \n\t" \ + " BRC 7,9f \n\t" /* block size != 16 */ \ + \ + " LARL %[len],1f \n\t" \ + " SLGR %[len],%[mta] \n\t" \ + " CGHI %[len],256 \n\t" \ + " BRC 7,9f \n\t" /* list len != 256 */ \ + \ + " LGR 0,0 \n\t" /* artificial SIGILL */ \ + "9: BRC 7,-2 \n\t" \ + " LARL %[mta],1f \n\t" /* restore MVC table begin */ \ + /* ASSERT code END */ + #endif + + // Optimized copying for data less than 4k + // - no destructive overlap + // - 0 <= _n_bytes <= 4096 + // This macro needs to be gcc-compiled with -march=z990. Otherwise, the + // LAY instruction is not available. + #define MVC_MULTI(_to,_from,_n_bytes) \ + { unsigned long toaddr; \ + unsigned long fromaddr; \ + unsigned long movetable; \ + unsigned long len; \ + asm("\t" \ + " LTGFR %[len],%[nby] \n\t" \ + " LG %[ta],%[to] \n\t" /* address of to area */ \ + " BRC 8,1f \n\t" /* nothing to copy */ \ + \ + " NILL %[nby],255 \n\t" /* # bytes mod 256 */ \ + " LG %[fa],%[from] \n\t" /* address of from area */ \ + " BRC 8,3f \n\t" /* no rest, skip copying */ \ + \ + " LARL %[mta],2f \n\t" /* MVC template addr */ \ + " AHI %[nby],-1 \n\t" /* adjust for EX MVC */ \ + \ + " EX %[nby],0(%[mta]) \n\t" /* only rightmost */ \ + /* 8 bits of nby used */ \ + /* Since nby is <= 4096 on entry to this code, we do need */ \ + /* no zero extension before using it in addr calc. */ \ + " LA %[fa],1(%[nby],%[fa]) \n\t"/* adjust from addr */ \ + " LA %[ta],1(%[nby],%[ta]) \n\t"/* adjust to addr */ \ + \ + "3: SRAG %[nby],%[len],8 \n\t" /* # cache lines */ \ + " LARL %[mta],1f \n\t" /* MVC table begin */ \ + " BRC 8,1f \n\t" /* nothing to copy */ \ + \ + /* Insert ASSERT code here if required. */ \ + \ + \ + " LNGFR %[nby],%[nby] \n\t" /* negative offset into */ \ + " SLLG %[nby],%[nby],4 \n\t" /* MVC table 16-byte blocks */ \ + " BC 15,0(%[nby],%[mta]) \n\t" /* branch to block #ncl */ \ + \ + "2: MVC 0(1,%[ta]),0(%[fa]) \n\t" /* MVC template */ \ + \ + "4: MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 4096 == l */ \ + " LAY %[ta],256(0,%[ta]) \n\t" \ + " LA %[fa],256(0,%[fa]) \n\t" \ + "5: MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 3840 <= l < 4096 */ \ + " LAY %[ta],256(0,%[ta]) \n\t" \ + " LA %[fa],256(0,%[fa]) \n\t" \ + " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 3548 <= l < 3328 */ \ + " LAY %[ta],256(0,%[ta]) \n\t" \ + " LA %[fa],256(0,%[fa]) \n\t" \ + " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 3328 <= l < 3328 */ \ + " LAY %[ta],256(0,%[ta]) \n\t" \ + " LA %[fa],256(0,%[fa]) \n\t" \ + " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 3072 <= l < 3328 */ \ + " LAY %[ta],256(0,%[ta]) \n\t" \ + " LA %[fa],256(0,%[fa]) \n\t" \ + " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 2816 <= l < 3072 */ \ + " LAY %[ta],256(0,%[ta]) \n\t" \ + " LA %[fa],256(0,%[fa]) \n\t" \ + " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 2560 <= l < 2816 */ \ + " LAY %[ta],256(0,%[ta]) \n\t" \ + " LA %[fa],256(0,%[fa]) \n\t" \ + " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 2304 <= l < 2560 */ \ + " LAY %[ta],256(0,%[ta]) \n\t" \ + " LA %[fa],256(0,%[fa]) \n\t" \ + " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 2048 <= l < 2304 */ \ + " LAY %[ta],256(0,%[ta]) \n\t" \ + " LA %[fa],256(0,%[fa]) \n\t" \ + " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 1792 <= l < 2048 */ \ + " LAY %[ta],256(0,%[ta]) \n\t" \ + " LA %[fa],256(0,%[fa]) \n\t" \ + " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 1536 <= l < 1792 */ \ + " LAY %[ta],256(0,%[ta]) \n\t" \ + " LA %[fa],256(0,%[fa]) \n\t" \ + " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 1280 <= l < 1536 */ \ + " LAY %[ta],256(0,%[ta]) \n\t" \ + " LA %[fa],256(0,%[fa]) \n\t" \ + " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 1024 <= l < 1280 */ \ + " LAY %[ta],256(0,%[ta]) \n\t" \ + " LA %[fa],256(0,%[fa]) \n\t" \ + " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 768 <= l < 1024 */ \ + " LAY %[ta],256(0,%[ta]) \n\t" \ + " LA %[fa],256(0,%[fa]) \n\t" \ + " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 512 <= l < 768 */ \ + " LAY %[ta],256(0,%[ta]) \n\t" \ + " LA %[fa],256(0,%[fa]) \n\t" \ + " MVC 0(256,%[ta]),0(%[fa]) \n\t" /* 256 <= l < 512 */ \ + " LAY %[ta],256(0,%[ta]) \n\t" \ + " LA %[fa],256(0,%[fa]) \n\t" \ + "1: BCR 0,0 \n\t" /* nop as branch target */ \ + : [to] "+Q" (_to) /* outputs */ \ + , [from] "+Q" (_from) \ + , [ta] "=a" (toaddr) \ + , [fa] "=a" (fromaddr) \ + , [mta] "=a" (movetable) \ + , [nby] "+a" (_n_bytes) \ + , [len] "=a" (len) \ + : \ + : "cc" /* clobbered */ \ + ); \ + } + + #define MVCLE_MEMCOPY(_to,_from,_len) \ + asm( \ + " LG 0,%[to] \n\t" /* address of to area */ \ + " LG 2,%[from] \n\t" /* address of from area */ \ + " LGR 1,%[len] \n\t" /* len of to area */ \ + " LGR 3,%[len] \n\t" /* len of from area */ \ + "1: MVCLE 0,2,176 \n\t" /* copy storage, bypass cache (0xb0) */ \ + " BRC 1,1b \n\t" /* retry if interrupted */ \ + : [to] "+Q" (_to) /* outputs */ \ + , [from] "+Q" (_from) /* outputs */ \ + : [len] "r" (_len) /* inputs */ \ + : "cc", "r0", "r1", "r2", "r3" /* clobbered */ \ + ); + + #define MVCLE_MEMINIT(_to,_val,_len) \ + asm( \ + " LG 0,%[to] \n\t" /* address of to area */ \ + " LGR 1,%[len] \n\t" /* len of to area */ \ + " XGR 3,3 \n\t" /* from area len = 0 */ \ + "1: MVCLE 0,2,0(%[val]) \n\t" /* init storage */ \ + " BRC 1,1b \n\t" /* retry if interrupted */ \ + : [to] "+Q" (_to) /* outputs */ \ + : [len] "r" (_len) /* inputs */ \ + , [val] "r" (_val) /* inputs */ \ + : "cc", "r0", "r1", "r3" /* clobbered */ \ + ); + #define MVCLE_MEMZERO(_to,_len) \ + asm( \ + " LG 0,%[to] \n\t" /* address of to area */ \ + " LGR 1,%[len] \n\t" /* len of to area */ \ + " XGR 3,3 \n\t" /* from area len = 0 */ \ + "1: MVCLE 0,2,0 \n\t" /* clear storage */ \ + " BRC 1,1b \n\t" /* retry if interrupted */ \ + : [to] "+Q" (_to) /* outputs */ \ + : [len] "r" (_len) /* inputs */ \ + : "cc", "r0", "r1", "r3" /* clobbered */ \ + ); + + // Clear a stretch of memory, 0 <= _len <= 256. + // There is no alignment prereq. + // There is no test for len out of range specified above. + #define XC_MEMZERO_256(_to,_len) \ +{ unsigned long toaddr; unsigned long tolen; \ + unsigned long target; \ + asm("\t" \ + " LTGR %[tolen],%[len] \n\t" /* decr for MVC */ \ + " BRC 8,2f \n\t" /* do nothing for l=0*/ \ + " AGHI %[tolen],-1 \n\t" /* adjust for EX XC */ \ + " LARL %[target],1f \n\t" /* addr of XC instr */ \ + " LG %[toaddr],%[to] \n\t" /* addr of data area */ \ + " EX %[tolen],0(%[target]) \n\t" /* execute MVC instr */ \ + " BRC 15,2f \n\t" /* skip template */ \ + "1: XC 0(1,%[toaddr]),0(%[toaddr]) \n\t" \ + "2: BCR 0,0 \n\t" /* nop a branch target*/\ + : [to] "+Q" (_to) /* outputs */ \ + , [tolen] "=a" (tolen) \ + , [toaddr] "=a" (toaddr) \ + , [target] "=a" (target) \ + : [len] "r" (_len) /* inputs */ \ + : "cc" /* clobbered */ \ + ); \ +} + + // Clear a stretch of memory, 256 < _len. + // XC_MEMZERO_256 may be used to clear shorter areas. + // + // The code + // - first zeroes a few bytes to align on a HeapWord. + // This step is currently inactive because all calls seem + // to have their data aligned on HeapWord boundaries. + // - then zeroes a few HeapWords to align on a cache line. + // - then zeroes entire cache lines in a loop. + // - then zeroes the remaining (partial) cache line. +#if 1 + #define XC_MEMZERO_ANY(_to,_len) \ +{ unsigned long toaddr; unsigned long tolen; \ + unsigned long len8; unsigned long len256; \ + unsigned long target; unsigned long lenx; \ + asm("\t" \ + " LTGR %[tolen],%[len] \n\t" /* */ \ + " BRC 8,2f \n\t" /* do nothing for l=0*/ \ + " LG %[toaddr],%[to] \n\t" /* addr of data area */ \ + " LARL %[target],1f \n\t" /* addr of XC instr */ \ + " " \ + " LCGR %[len256],%[toaddr] \n\t" /* cache line alignment */\ + " NILL %[len256],0xff \n\t" \ + " BRC 8,4f \n\t" /* already aligned */ \ + " NILH %[len256],0x00 \n\t" /* zero extend */ \ + " LLGFR %[len256],%[len256] \n\t" \ + " LAY %[lenx],-1(,%[len256]) \n\t" \ + " EX %[lenx],0(%[target]) \n\t" /* execute MVC instr */ \ + " LA %[toaddr],0(%[len256],%[toaddr]) \n\t" \ + " SGR %[tolen],%[len256] \n\t" /* adjust len */ \ + " " \ + "4: SRAG %[lenx],%[tolen],8 \n\t" /* # cache lines */ \ + " BRC 8,6f \n\t" /* no full cache lines */ \ + "5: XC 0(256,%[toaddr]),0(%[toaddr]) \n\t" \ + " LA %[toaddr],256(,%[toaddr]) \n\t" \ + " BRCTG %[lenx],5b \n\t" /* iterate */ \ + " " \ + "6: NILL %[tolen],0xff \n\t" /* leftover bytes */ \ + " BRC 8,2f \n\t" /* done if none */ \ + " LAY %[lenx],-1(,%[tolen]) \n\t" \ + " EX %[lenx],0(%[target]) \n\t" /* execute MVC instr */ \ + " BRC 15,2f \n\t" /* skip template */ \ + " " \ + "1: XC 0(1,%[toaddr]),0(%[toaddr]) \n\t" \ + "2: BCR 0,0 \n\t" /* nop a branch target */ \ + : [to] "+Q" (_to) /* outputs */ \ + , [lenx] "=a" (lenx) \ + , [len256] "=a" (len256) \ + , [tolen] "=a" (tolen) \ + , [toaddr] "=a" (toaddr) \ + , [target] "=a" (target) \ + : [len] "r" (_len) /* inputs */ \ + : "cc" /* clobbered */ \ + ); \ +} +#else + #define XC_MEMZERO_ANY(_to,_len) \ +{ unsigned long toaddr; unsigned long tolen; \ + unsigned long len8; unsigned long len256; \ + unsigned long target; unsigned long lenx; \ + asm("\t" \ + " LTGR %[tolen],%[len] \n\t" /* */ \ + " BRC 8,2f \n\t" /* do nothing for l=0*/ \ + " LG %[toaddr],%[to] \n\t" /* addr of data area */ \ + " LARL %[target],1f \n\t" /* addr of XC instr */ \ + " " \ + " LCGR %[len8],%[toaddr] \n\t" /* HeapWord alignment */ \ + " NILL %[len8],0x07 \n\t" \ + " BRC 8,3f \n\t" /* already aligned */ \ + " NILH %[len8],0x00 \n\t" /* zero extend */ \ + " LLGFR %[len8],%[len8] \n\t" \ + " LAY %[lenx],-1(,%[len8]) \n\t" \ + " EX %[lenx],0(%[target]) \n\t" /* execute MVC instr */ \ + " LA %[toaddr],0(%[len8],%[toaddr]) \n\t" \ + " SGR %[tolen],%[len8] \n\t" /* adjust len */ \ + " " \ + "3: LCGR %[len256],%[toaddr] \n\t" /* cache line alignment */\ + " NILL %[len256],0xff \n\t" \ + " BRC 8,4f \n\t" /* already aligned */ \ + " NILH %[len256],0x00 \n\t" /* zero extend */ \ + " LLGFR %[len256],%[len256] \n\t" \ + " LAY %[lenx],-1(,%[len256]) \n\t" \ + " EX %[lenx],0(%[target]) \n\t" /* execute MVC instr */ \ + " LA %[toaddr],0(%[len256],%[toaddr]) \n\t" \ + " SGR %[tolen],%[len256] \n\t" /* adjust len */ \ + " " \ + "4: SRAG %[lenx],%[tolen],8 \n\t" /* # cache lines */ \ + " BRC 8,6f \n\t" /* no full cache lines */ \ + "5: XC 0(256,%[toaddr]),0(%[toaddr]) \n\t" \ + " LA %[toaddr],256(,%[toaddr]) \n\t" \ + " BRCTG %[lenx],5b \n\t" /* iterate */ \ + " " \ + "6: NILL %[tolen],0xff \n\t" /* leftover bytes */ \ + " BRC 8,2f \n\t" /* done if none */ \ + " LAY %[lenx],-1(,%[tolen]) \n\t" \ + " EX %[lenx],0(%[target]) \n\t" /* execute MVC instr */ \ + " BRC 15,2f \n\t" /* skip template */ \ + " " \ + "1: XC 0(1,%[toaddr]),0(%[toaddr]) \n\t" \ + "2: BCR 0,0 \n\t" /* nop a branch target */ \ + : [to] "+Q" (_to) /* outputs */ \ + , [lenx] "=a" (lenx) \ + , [len8] "=a" (len8) \ + , [len256] "=a" (len256) \ + , [tolen] "=a" (tolen) \ + , [toaddr] "=a" (toaddr) \ + , [target] "=a" (target) \ + : [len] "r" (_len) /* inputs */ \ + : "cc" /* clobbered */ \ + ); \ +} +#endif +#endif // USE_INLINE_ASM + +//*************************************// +// D I S J O I N T C O P Y I N G // +//*************************************// + +static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) { + // JVM2008: very frequent, some tests frequent. + + // Copy HeapWord (=DW) aligned storage. Use MVCLE in inline-asm code. + // MVCLE guarantees DW concurrent (i.e. atomic) accesses if both the addresses of the operands + // are DW aligned and the length is an integer multiple of a DW. Should always be true here. + // + // No special exploit needed. H/W discovers suitable situations itself. + // + // For large chunks of memory, exploit special H/W support of z/Architecture: + // 1) copy short piece of memory to page-align address(es) + // 2) copy largest part (all contained full pages) of memory using mvcle instruction. + // z/Architecture processors have special H/W support for page-aligned storage + // where len is an int multiple of page size. In that case, up to 4 cache lines are + // processed in parallel and L1 cache is not polluted. + // 3) copy the remaining piece of memory. + // +#ifdef USE_INLINE_ASM + jbyte* to_bytes = (jbyte*)to; + jbyte* from_bytes = (jbyte*)from; + size_t len_bytes = count*HeapWordSize; + + // Optimized copying for data less than 4k + switch (count) { + case 0: return; + case 1: MOVE8_ATOMIC_1(to,from) + return; + case 2: MOVE8_ATOMIC_2(to,from) + return; +// case 3: MOVE8_ATOMIC_3(to,from) +// return; +// case 4: MOVE8_ATOMIC_4(to,from) +// return; + default: + if (len_bytes <= 4096) { + MVC_MULTI(to,from,len_bytes) + return; + } + // else + MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes) + return; + } +#else + // Fallback code. + switch (count) { + case 0: + return; + + case 1: + *to = *from; + return; + + case 2: + *to++ = *from++; + *to = *from; + return; + + case 3: + *to++ = *from++; + *to++ = *from++; + *to = *from; + return; + + case 4: + *to++ = *from++; + *to++ = *from++; + *to++ = *from++; + *to = *from; + return; + + default: + while (count-- > 0) + *(to++) = *(from++); + return; + } +#endif +} + +static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) { + // JVM2008: < 4k calls. + assert(((((size_t)from) & 0x07L) | (((size_t)to) & 0x07L)) == 0, "No atomic copy w/o aligned data"); + pd_aligned_disjoint_words(from, to, count); // Rare calls -> just delegate. +} + +static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) { + // JVM2008: very rare. + pd_aligned_disjoint_words(from, to, count); // Rare calls -> just delegate. +} + + +//*************************************// +// C O N J O I N T C O P Y I N G // +//*************************************// + +static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) { + // JVM2008: between some and lower end of frequent. + +#ifdef USE_INLINE_ASM + size_t count_in = count; + if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerLong)) { + switch (count_in) { + case 4: COPY8_ATOMIC_4(to,from) + return; + case 3: COPY8_ATOMIC_3(to,from) + return; + case 2: COPY8_ATOMIC_2(to,from) + return; + case 1: COPY8_ATOMIC_1(to,from) + return; + case 0: return; + default: + from += count_in; + to += count_in; + while (count_in-- > 0) + *(--to) = *(--from); // Copy backwards, areas overlap destructively. + return; + } + } + // else + jbyte* to_bytes = (jbyte*)to; + jbyte* from_bytes = (jbyte*)from; + size_t len_bytes = count_in*BytesPerLong; + MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes) + return; +#else + // Fallback code. + if (has_destructive_overlap((char*)from, (char*)to, count*BytesPerLong)) { + HeapWord t1, t2, t3; + switch (count) { + case 0: + return; + + case 1: + *to = *from; + return; + + case 2: + t1 = *(from+1); + *to = *from; + *(to+1) = t1; + return; + + case 3: + t1 = *(from+1); + t2 = *(from+2); + *to = *from; + *(to+1) = t1; + *(to+2) = t2; + return; + + case 4: + t1 = *(from+1); + t2 = *(from+2); + t3 = *(from+3); + *to = *from; + *(to+1) = t1; + *(to+2) = t2; + *(to+3) = t3; + return; + + default: + from += count; + to += count; + while (count-- > 0) + *(--to) = *(--from); // Copy backwards, areas overlap destructively. + return; + } + } + // else + // Just delegate. HeapWords are optimally aligned anyway. + pd_aligned_disjoint_words(from, to, count); +#endif +} + +static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) { + + // Just delegate. HeapWords are optimally aligned anyway. + pd_aligned_conjoint_words(from, to, count); +} + +static void pd_conjoint_bytes(void* from, void* to, size_t count) { + +#ifdef USE_INLINE_ASM + size_t count_in = count; + if (has_destructive_overlap((char*)from, (char*)to, count_in)) + (void)memmove(to, from, count_in); + else { + jbyte* to_bytes = (jbyte*)to; + jbyte* from_bytes = (jbyte*)from; + size_t len_bytes = count_in; + MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes) + } +#else + if (has_destructive_overlap((char*)from, (char*)to, count)) + (void)memmove(to, from, count); + else + (void)memcpy(to, from, count); +#endif +} + +//**************************************************// +// C O N J O I N T A T O M I C C O P Y I N G // +//**************************************************// + +static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) { + // Call arraycopy stubs to do the job. + pd_conjoint_bytes(from, to, count); // bytes are always accessed atomically. +} + +static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) { + +#ifdef USE_INLINE_ASM + size_t count_in = count; + if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerShort)) { + // Use optimizations from shared code where no z-specific optimization exists. + copy_conjoint_jshorts_atomic(from, to, count); + } else { + jbyte* to_bytes = (jbyte*)to; + jbyte* from_bytes = (jbyte*)from; + size_t len_bytes = count_in*BytesPerShort; + MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes) + } +#else + // Use optimizations from shared code where no z-specific optimization exists. + copy_conjoint_jshorts_atomic(from, to, count); +#endif +} + +static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) { + +#ifdef USE_INLINE_ASM + size_t count_in = count; + if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerInt)) { + switch (count_in) { + case 4: COPY4_ATOMIC_4(to,from) + return; + case 3: COPY4_ATOMIC_3(to,from) + return; + case 2: COPY4_ATOMIC_2(to,from) + return; + case 1: COPY4_ATOMIC_1(to,from) + return; + case 0: return; + default: + // Use optimizations from shared code where no z-specific optimization exists. + copy_conjoint_jints_atomic(from, to, count_in); + return; + } + } + // else + jbyte* to_bytes = (jbyte*)to; + jbyte* from_bytes = (jbyte*)from; + size_t len_bytes = count_in*BytesPerInt; + MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes) +#else + // Use optimizations from shared code where no z-specific optimization exists. + copy_conjoint_jints_atomic(from, to, count); +#endif +} + +static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) { + +#ifdef USE_INLINE_ASM + size_t count_in = count; + if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerLong)) { + switch (count_in) { + case 4: COPY8_ATOMIC_4(to,from) return; + case 3: COPY8_ATOMIC_3(to,from) return; + case 2: COPY8_ATOMIC_2(to,from) return; + case 1: COPY8_ATOMIC_1(to,from) return; + case 0: return; + default: + from += count_in; + to += count_in; + while (count_in-- > 0) { *(--to) = *(--from); } // Copy backwards, areas overlap destructively. + return; + } + } + // else { + jbyte* to_bytes = (jbyte*)to; + jbyte* from_bytes = (jbyte*)from; + size_t len_bytes = count_in*BytesPerLong; + MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes) +#else + size_t count_in = count; + if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerLong)) { + if (count_in < 8) { + from += count_in; + to += count_in; + while (count_in-- > 0) + *(--to) = *(--from); // Copy backwards, areas overlap destructively. + return; + } + // else { + from += count_in-1; + to += count_in-1; + if (count_in&0x01) { + *(to--) = *(from--); + count_in--; + } + for (; count_in>0; count_in-=2) { + *to = *from; + *(to-1) = *(from-1); + to -= 2; + from -= 2; + } + } + else + pd_aligned_disjoint_words((HeapWord*)from, (HeapWord*)to, count_in); // rare calls -> just delegate. +#endif +} + +static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) { + +#ifdef USE_INLINE_ASM + size_t count_in = count; + if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerOop)) { + switch (count_in) { + case 4: COPY8_ATOMIC_4(to,from) return; + case 3: COPY8_ATOMIC_3(to,from) return; + case 2: COPY8_ATOMIC_2(to,from) return; + case 1: COPY8_ATOMIC_1(to,from) return; + case 0: return; + default: + from += count_in; + to += count_in; + while (count_in-- > 0) { *(--to) = *(--from); } // Copy backwards, areas overlap destructively. + return; + } + } + // else + jbyte* to_bytes = (jbyte*)to; + jbyte* from_bytes = (jbyte*)from; + size_t len_bytes = count_in*BytesPerOop; + MVCLE_MEMCOPY(to_bytes, from_bytes, len_bytes) +#else + size_t count_in = count; + if (has_destructive_overlap((char*)from, (char*)to, count_in*BytesPerOop)) { + from += count_in; + to += count_in; + while (count_in-- > 0) *(--to) = *(--from); // Copy backwards, areas overlap destructively. + return; + } + // else + pd_aligned_disjoint_words((HeapWord*)from, (HeapWord*)to, count_in); // rare calls -> just delegate. + return; +#endif +} + +static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_bytes_atomic(from, to, count); +} + +static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count); +} + +static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jints_atomic((jint*)from, (jint*)to, count); +} + +static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count); +} + +static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_oops_atomic((oop*)from, (oop*)to, count); +} + +//**********************************************// +// M E M O R Y I N I T I A L I S A T I O N // +//**********************************************// + +static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { + // JVM2008: very rare, only in some tests. +#ifdef USE_INLINE_ASM + // Initialize storage to a given value. Use memset instead of copy loop. + // For large chunks of memory, exploit special H/W support of z/Architecture: + // 1) init short piece of memory to page-align address + // 2) init largest part (all contained full pages) of memory using mvcle instruction. + // z/Architecture processors have special H/W support for page-aligned storage + // where len is an int multiple of page size. In that case, up to 4 cache lines are + // processed in parallel and L1 cache is not polluted. + // 3) init the remaining piece of memory. + // Atomicity cannot really be an issue since gcc implements the loop body with XC anyway. + // If atomicity is a problem, we have to prevent gcc optimization. Best workaround: inline asm. + + jbyte* to_bytes = (jbyte*)to; + size_t len_bytes = count; + + MVCLE_MEMINIT(to_bytes, value, len_bytes) + +#else + // Memset does the best job possible: loop over 256-byte MVCs, with + // the last MVC EXecuted. With the -mmvcle option, initialization + // is done using MVCLE -> slight advantage for large areas. + (void)memset(to, value, count); +#endif +} + +static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { + // Occurs in dbg builds only. Usually memory poisoning with BAADBABE, DEADBEEF, etc. + // JVM2008: < 4k calls. + if (value == 0) { + pd_zero_to_words(tohw, count); + return; + } + if (value == ~(juint)(0)) { + pd_fill_to_bytes(tohw, count*HeapWordSize, (jubyte)(~(juint)(0))); + return; + } + julong* to = (julong*) tohw; + julong v = ((julong) value << 32) | value; + while (count-- > 0) { + *to++ = v; + } +} + +static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { + // JVM2008: very frequent, but virtually all calls are with value == 0. + pd_fill_to_words(tohw, count, value); +} + +//**********************************// +// M E M O R Y C L E A R I N G // +//**********************************// + +// Delegate to pd_zero_to_bytes. It also works HeapWord-atomic. +// Distinguish between simple and large zero_to_words. +static void pd_zero_to_words(HeapWord* tohw, size_t count) { + pd_zero_to_bytes(tohw, count*HeapWordSize); +} + +// Delegate to pd_zero_to_bytes. It also works HeapWord-atomic. +static void pd_zero_to_words_large(HeapWord* tohw, size_t count) { + // JVM2008: generally frequent, some tests show very frequent calls. + pd_zero_to_bytes(tohw, count*HeapWordSize); +} + +static void pd_zero_to_bytes(void* to, size_t count) { + // JVM2008: some calls (generally), some tests frequent +#ifdef USE_INLINE_ASM + // Even zero_to_bytes() requires HeapWord-atomic, or, at least, sequential + // zeroing of the memory. MVCLE is not fit for that job: + // "As observed by other CPUs and by the channel subsystem, + // that portion of the first operand which is filled + // with the padding byte is not necessarily stored into in + // a left-to-right direction and may appear to be stored + // into more than once." + // Therefore, implementation was changed to use (multiple) XC instructions. + + const long line_size = 256; + jbyte* to_bytes = (jbyte*)to; + size_t len_bytes = count; + + if (len_bytes <= line_size) { + XC_MEMZERO_256(to_bytes, len_bytes); + } else { + XC_MEMZERO_ANY(to_bytes, len_bytes); + } + +#else + // Memset does the best job possible: loop over 256-byte MVCs, with + // the last MVC EXecuted. With the -mmvcle option, initialization + // is done using MVCLE -> slight advantage for large areas. + (void)memset(to, 0, count); +#endif +} + +#endif // CPU_S390_VM_COPY_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/debug_s390.cpp b/hotspot/src/cpu/s390/vm/debug_s390.cpp new file mode 100644 index 00000000000..75f69ee45af --- /dev/null +++ b/hotspot/src/cpu/s390/vm/debug_s390.cpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "code/codeCache.hpp" +#include "code/nmethod.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/frame.hpp" +#include "runtime/init.hpp" +#include "runtime/os.hpp" +#include "utilities/debug.hpp" + +void pd_ps(frame f) {} diff --git a/hotspot/src/cpu/s390/vm/depChecker_s390.hpp b/hotspot/src/cpu/s390/vm/depChecker_s390.hpp new file mode 100644 index 00000000000..f272becb982 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/depChecker_s390.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_DEPCHECKER_S390_HPP +#define CPU_S390_VM_DEPCHECKER_S390_HPP + +// Nothing to do on z/Architecture + +#endif // CPU_S390_VM_DEPCHECKER_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/disassembler_s390.hpp b/hotspot/src/cpu/s390/vm/disassembler_s390.hpp new file mode 100644 index 00000000000..5804b831a77 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/disassembler_s390.hpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_DISASSEMBLER_S390_HPP +#define CPU_S390_VM_DISASSEMBLER_S390_HPP + + static int pd_instruction_alignment() { + return 1; + } + + static const char* pd_cpu_opts() { + return "zarch"; + } + +#endif // CPU_S390_VM_DISASSEMBLER_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/frame_s390.cpp b/hotspot/src/cpu/s390/vm/frame_s390.cpp new file mode 100644 index 00000000000..89a45c5f075 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/frame_s390.cpp @@ -0,0 +1,504 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "oops/markOop.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/monitorChunk.hpp" +#include "runtime/signature.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_s390.inline.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#include "runtime/vframeArray.hpp" +#endif + +// Major contributions by Aha, AS. + +#ifdef ASSERT +void RegisterMap::check_location_valid() { +} +#endif // ASSERT + + +// Profiling/safepoint support + +bool frame::safe_for_sender(JavaThread *thread) { + bool safe = false; + address cursp = (address)sp(); + address curfp = (address)fp(); + if ((cursp != NULL && curfp != NULL && + (cursp <= thread->stack_base() && cursp >= thread->stack_base() - thread->stack_size())) && + (curfp <= thread->stack_base() && curfp >= thread->stack_base() - thread->stack_size())) { + safe = true; + } + return safe; +} + +bool frame::is_interpreted_frame() const { + return Interpreter::contains(pc()); +} + +// sender_sp + +intptr_t* frame::interpreter_frame_sender_sp() const { + return sender_sp(); +} + +frame frame::sender_for_entry_frame(RegisterMap *map) const { + assert(map != NULL, "map must be set"); + // Java frame called from C. Skip all C frames and return top C + // frame of that chunk as the sender. + JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); + + assert(!entry_frame_is_first(), "next Java sp must be non zero"); + assert(jfa->last_Java_sp() > _sp, "must be above this frame on stack"); + + map->clear(); + + assert(map->include_argument_oops(), "should be set by clear"); + + if (jfa->last_Java_pc() != NULL) { + frame fr(jfa->last_Java_sp(), jfa->last_Java_pc()); + return fr; + } + // Last_java_pc is not set if we come here from compiled code. + frame fr(jfa->last_Java_sp()); + return fr; +} + +frame frame::sender_for_interpreter_frame(RegisterMap *map) const { + // Pass callers sender_sp as unextended_sp. + return frame(sender_sp(), sender_pc(), (intptr_t*)(ijava_state()->sender_sp)); +} + +frame frame::sender_for_compiled_frame(RegisterMap *map) const { + assert(map != NULL, "map must be set"); + // Frame owned by compiler. + + address pc = *compiled_sender_pc_addr(_cb); + frame caller(compiled_sender_sp(_cb), pc); + + // Now adjust the map. + + // Get the rest. + if (map->update_map()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); + if (_cb->oop_maps() != NULL) { + OopMapSet::update_register_map(this, map); + } + } + + return caller; +} + +intptr_t* frame::compiled_sender_sp(CodeBlob* cb) const { + return sender_sp(); +} + +address* frame::compiled_sender_pc_addr(CodeBlob* cb) const { + return sender_pc_addr(); +} + +frame frame::sender(RegisterMap* map) const { + // Default is we don't have to follow them. The sender_for_xxx will + // update it accordingly. + map->set_include_argument_oops(false); + + if (is_entry_frame()) { + return sender_for_entry_frame(map); + } + if (is_interpreted_frame()) { + return sender_for_interpreter_frame(map); + } + assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); + if (_cb != NULL) { + return sender_for_compiled_frame(map); + } + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + return frame(sender_sp(), sender_pc()); +} + +void frame::patch_pc(Thread* thread, address pc) { + if (TracePcPatching) { + tty->print_cr("patch_pc at address " PTR_FORMAT " [" PTR_FORMAT " -> " PTR_FORMAT "] ", + p2i(&((address*) _sp)[-1]), p2i(((address*) _sp)[-1]), p2i(pc)); + } + own_abi()->return_pc = (uint64_t)pc; + _cb = CodeCache::find_blob(pc); + address original_pc = nmethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + assert(original_pc == _pc, "expected original to be stored before patching"); + _deopt_state = is_deoptimized; + // Leave _pc as is. + } else { + _deopt_state = not_deoptimized; + _pc = pc; + } +} + +bool frame::is_interpreted_frame_valid(JavaThread* thread) const { + // Is there anything to do? + assert(is_interpreted_frame(), "Not an interpreted frame"); + return true; +} + +BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { + assert(is_interpreted_frame(), "interpreted frame expected"); + Method* method = interpreter_frame_method(); + BasicType type = method->result_type(); + + if (method->is_native()) { + address lresult = (address)&(ijava_state()->lresult); + address fresult = (address)&(ijava_state()->fresult); + + switch (type) { + case T_OBJECT: + case T_ARRAY: { + *oop_result = (oop) (void*) ijava_state()->oop_tmp; + break; + } + // We use std/stfd to store the values. + case T_BOOLEAN : value_result->z = (jboolean) *(unsigned long*)lresult; break; + case T_INT : value_result->i = (jint) *(long*)lresult; break; + case T_CHAR : value_result->c = (jchar) *(unsigned long*)lresult; break; + case T_SHORT : value_result->s = (jshort) *(long*)lresult; break; + case T_BYTE : value_result->z = (jbyte) *(long*)lresult; break; + case T_LONG : value_result->j = (jlong) *(long*)lresult; break; + case T_FLOAT : value_result->f = (jfloat) *(float*)fresult; break; + case T_DOUBLE : value_result->d = (jdouble) *(double*)fresult; break; + case T_VOID : break; // Nothing to do. + default : ShouldNotReachHere(); + } + } else { + intptr_t* tos_addr = interpreter_frame_tos_address(); + switch (type) { + case T_OBJECT: + case T_ARRAY: { + oop obj = *(oop*)tos_addr; + assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); + *oop_result = obj; + break; + } + case T_BOOLEAN : value_result->z = (jboolean) *(jint*)tos_addr; break; + case T_BYTE : value_result->b = (jbyte) *(jint*)tos_addr; break; + case T_CHAR : value_result->c = (jchar) *(jint*)tos_addr; break; + case T_SHORT : value_result->s = (jshort) *(jint*)tos_addr; break; + case T_INT : value_result->i = *(jint*)tos_addr; break; + case T_LONG : value_result->j = *(jlong*)tos_addr; break; + case T_FLOAT : value_result->f = *(jfloat*)tos_addr; break; + case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; + case T_VOID : break; // Nothing to do. + default : ShouldNotReachHere(); + } + } + + return type; +} + + +// Dump all frames starting a given C stack-pointer. +// Use max_frames to limit the number of traced frames. +void frame::back_trace(outputStream* st, intptr_t* start_sp, intptr_t* top_pc, unsigned long flags, int max_frames) { + + static char buf[ 150 ]; + + bool print_outgoing_arguments = flags & 0x1; + bool print_istate_pointers = flags & 0x2; + int num = 0; + + intptr_t* current_sp = (intptr_t*) start_sp; + int last_num_jargs = 0; + int frame_type = 0; + int last_frame_type = 0; + + while (current_sp) { + intptr_t* current_fp = (intptr_t*) *current_sp; + address current_pc = (num == 0) + ? (address) top_pc + : (address) *((intptr_t*)(((address) current_sp) + _z_abi(return_pc))); + + if ((intptr_t*) current_fp != 0 && (intptr_t*) current_fp <= current_sp) { + st->print_cr("ERROR: corrupt stack"); + return; + } + + st->print("#%-3d ", num); + const char* type_name = " "; + const char* function_name = NULL; + + // Detect current frame's frame_type, default to 'C frame'. + frame_type = 0; + + CodeBlob* blob = NULL; + + if (Interpreter::contains(current_pc)) { + frame_type = 1; + } else if (StubRoutines::contains(current_pc)) { + if (StubRoutines::returns_to_call_stub(current_pc)) { + frame_type = 2; + } else { + frame_type = 4; + type_name = "stu"; + StubCodeDesc* desc = StubCodeDesc::desc_for (current_pc); + if (desc) { + function_name = desc->name(); + } else { + function_name = "unknown stub"; + } + } + } else if (CodeCache::contains(current_pc)) { + blob = CodeCache::find_blob_unsafe(current_pc); + if (blob) { + if (blob->is_nmethod()) { + frame_type = 3; + } else if (blob->is_deoptimization_stub()) { + frame_type = 4; + type_name = "deo"; + function_name = "deoptimization blob"; + } else if (blob->is_uncommon_trap_stub()) { + frame_type = 4; + type_name = "uct"; + function_name = "uncommon trap blob"; + } else if (blob->is_exception_stub()) { + frame_type = 4; + type_name = "exc"; + function_name = "exception blob"; + } else if (blob->is_safepoint_stub()) { + frame_type = 4; + type_name = "saf"; + function_name = "safepoint blob"; + } else if (blob->is_runtime_stub()) { + frame_type = 4; + type_name = "run"; + function_name = ((RuntimeStub *)blob)->name(); + } else if (blob->is_method_handles_adapter_blob()) { + frame_type = 4; + type_name = "mha"; + function_name = "method handles adapter blob"; + } else { + frame_type = 4; + type_name = "blo"; + function_name = "unknown code blob"; + } + } else { + frame_type = 4; + type_name = "blo"; + function_name = "unknown code blob"; + } + } + + st->print("sp=" PTR_FORMAT " ", p2i(current_sp)); + + if (frame_type == 0) { + current_pc = (address) *((intptr_t*)(((address) current_sp) + _z_abi(gpr14))); + } + + st->print("pc=" PTR_FORMAT " ", p2i(current_pc)); + st->print(" "); + + switch (frame_type) { + case 0: // C frame: + { + st->print(" "); + if (current_pc == 0) { + st->print("? "); + } else { + // name + int func_offset; + char demangled_name[256]; + int demangled_name_len = 256; + if (os::dll_address_to_function_name(current_pc, demangled_name, demangled_name_len, &func_offset)) { + demangled_name[demangled_name_len-1] = '\0'; + st->print(func_offset == -1 ? "%s " : "%s+0x%x", demangled_name, func_offset); + } else { + st->print("? "); + } + } + } + break; + + case 1: // interpreter frame: + { + st->print(" i "); + + if (last_frame_type != 1) last_num_jargs = 8; + + // name + Method* method = *(Method**)((address)current_fp + _z_ijava_state_neg(method)); + if (method) { + if (method->is_synchronized()) st->print("synchronized "); + if (method->is_static()) st->print("static "); + if (method->is_native()) st->print("native "); + method->name_and_sig_as_C_string(buf, sizeof(buf)); + st->print("%s ", buf); + } + else + st->print("? "); + + intptr_t* tos = (intptr_t*) *(intptr_t*)((address)current_fp + _z_ijava_state_neg(esp)); + if (print_istate_pointers) { + st->cr(); + st->print(" "); + st->print("ts=" PTR_FORMAT " ", p2i(tos)); + } + + // Dump some Java stack slots. + if (print_outgoing_arguments) { + if (method->is_native()) { +#ifdef ASSERT + intptr_t* cargs = (intptr_t*) (((address)current_sp) + _z_abi(carg_1)); + for (int i = 0; i < last_num_jargs; i++) { + // Cargs is not prepushed. + st->cr(); + st->print(" "); + st->print(PTR_FORMAT, *(cargs)); + cargs++; + } +#endif /* ASSERT */ + } + else { + if (tos) { + for (int i = 0; i < last_num_jargs; i++) { + // tos+0 is prepushed, ignore. + tos++; + if (tos >= (intptr_t *)((address)current_fp + _z_ijava_state_neg(monitors))) + break; + st->cr(); + st->print(" "); + st->print(PTR_FORMAT " %+.3e %+.3le", *(tos), *(float*)(tos), *(double*)(tos)); + } + } + } + last_num_jargs = method->size_of_parameters(); + } + } + break; + + case 2: // entry frame: + { + st->print("v2i "); + + // name + st->print("call stub"); + } + break; + + case 3: // compiled frame: + { + st->print(" c "); + + // name + Method* method = ((nmethod *)blob)->method(); + if (method) { + method->name_and_sig_as_C_string(buf, sizeof(buf)); + st->print("%s ", buf); + } + else + st->print("? "); + } + break; + + case 4: // named frames + { + st->print("%s ", type_name); + + // name + if (function_name) + st->print("%s", function_name); + } + break; + + default: + break; + } + + st->cr(); + st->flush(); + + current_sp = current_fp; + last_frame_type = frame_type; + num++; + // Check for maximum # of frames, and stop when reached. + if (max_frames > 0 && --max_frames == 0) + break; + } + +} + +// Convenience function for calls from the debugger. + +extern "C" void bt(intptr_t* start_sp,intptr_t* top_pc) { + frame::back_trace(tty,start_sp, top_pc, 0); +} + +extern "C" void bt_full(intptr_t* start_sp,intptr_t* top_pc) { + frame::back_trace(tty,start_sp, top_pc, (unsigned long)(long)-1); +} + + +// Function for tracing a limited number of frames. +// Use this one if you only need to see the "top of stack" frames. +extern "C" void bt_max(intptr_t *start_sp, intptr_t *top_pc, int max_frames) { + frame::back_trace(tty, start_sp, top_pc, 0, max_frames); +} + +#if !defined(PRODUCT) + +#define DESCRIBE_ADDRESS(name) \ + values.describe(frame_no, (intptr_t*)&ijava_state()->name, #name); + +void frame::describe_pd(FrameValues& values, int frame_no) { + if (is_interpreted_frame()) { + // Describe z_ijava_state elements. + DESCRIBE_ADDRESS(method); + DESCRIBE_ADDRESS(locals); + DESCRIBE_ADDRESS(monitors); + DESCRIBE_ADDRESS(cpoolCache); + DESCRIBE_ADDRESS(bcp); + DESCRIBE_ADDRESS(mdx); + DESCRIBE_ADDRESS(esp); + DESCRIBE_ADDRESS(sender_sp); + DESCRIBE_ADDRESS(top_frame_sp); + DESCRIBE_ADDRESS(oop_tmp); + DESCRIBE_ADDRESS(lresult); + DESCRIBE_ADDRESS(fresult); + } +} + +#endif // !PRODUCT + +intptr_t *frame::initial_deoptimization_info() { + // Used to reset the saved FP. + return fp(); +} diff --git a/hotspot/src/cpu/s390/vm/frame_s390.hpp b/hotspot/src/cpu/s390/vm/frame_s390.hpp new file mode 100644 index 00000000000..d17cbdc627e --- /dev/null +++ b/hotspot/src/cpu/s390/vm/frame_s390.hpp @@ -0,0 +1,552 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// Major contributions by ML, AHa. + +#ifndef CPU_S390_VM_FRAME_S390_HPP +#define CPU_S390_VM_FRAME_S390_HPP + +#include "runtime/synchronizer.hpp" + + // C frame layout on ZARCH_64. + // + // In this figure the stack grows upwards, while memory grows + // downwards. See "Linux for zSeries: ELF Application Binary Interface Supplement", + // IBM Corp. (LINUX-1107-01) + // + // Square brackets denote stack regions possibly larger + // than a single 64 bit slot. + // + // STACK: + // 0 [C_FRAME] <-- SP after prolog (mod 8 = 0) + // [C_FRAME] <-- SP before prolog + // ... + // [C_FRAME] + // + // C_FRAME: + // 0 [ABI_160] + // + // ABI_160: + // 0 [ABI_16] + // 16 CARG_1: spill slot for outgoing arg 1. used by next callee. + // 24 CARG_2: spill slot for outgoing arg 2. used by next callee. + // 32 CARG_3: spill slot for outgoing arg 3. used by next callee. + // 40 CARG_4: spill slot for outgoing arg 4. used by next callee. + // 48 GPR_6: spill slot for GPR_6. used by next callee. + // ... ... + // 120 GPR_15: spill slot for GPR_15. used by next callee. + // 128 CFARG_1: spill slot for outgoing fp arg 1. used by next callee. + // 136 CFARG_2: spill slot for outgoing fp arg 2. used by next callee. + // 144 CFARG_3: spill slot for outgoing fp arg 3. used by next callee. + // 152 CFARG_4: spill slot for outgoing fp arg 4. used by next callee. + // 160 [REMAINING CARGS] + // + // ABI_16: + // 0 callers_sp + // 8 return_pc + + public: + + // C frame layout + + typedef enum { + // stack alignment + alignment_in_bytes = 8, + // log_2(8*8 bits) = 6. + log_2_of_alignment_in_bits = 6 + } frame_constants; + + struct z_abi_16 { + uint64_t callers_sp; + uint64_t return_pc; + }; + + enum { + z_abi_16_size = sizeof(z_abi_16) + }; + + #define _z_abi16(_component) \ + (offset_of(frame::z_abi_16, _component)) + + // ABI_160: + + // REMARK: This structure should reflect the "minimal" ABI frame + // layout, but it doesn't. There is an extra field at the end of the + // structure that marks the area where arguments are passed, when + // the argument registers "overflow". Thus, sizeof(z_abi_160) + // doesn't yield the expected (and desired) result. Therefore, as + // long as we do not provide extra infrastructure, one should use + // either z_abi_160_size, or _z_abi(remaining_cargs) instead of + // sizeof(...). + struct z_abi_160 { + uint64_t callers_sp; + uint64_t return_pc; + uint64_t carg_1; + uint64_t carg_2; + uint64_t carg_3; + uint64_t carg_4; + uint64_t gpr6; + uint64_t gpr7; + uint64_t gpr8; + uint64_t gpr9; + uint64_t gpr10; + uint64_t gpr11; + uint64_t gpr12; + uint64_t gpr13; + uint64_t gpr14; + uint64_t gpr15; + uint64_t cfarg_1; + uint64_t cfarg_2; + uint64_t cfarg_3; + uint64_t cfarg_4; + uint64_t remaining_cargs; + }; + + enum { + z_abi_160_size = 160 + }; + + #define _z_abi(_component) \ + (offset_of(frame::z_abi_160, _component)) + + struct z_abi_160_spill : z_abi_160 { + // Additional spill slots. Use as 'offset_of(z_abi_160_spill, spill[n])'. + uint64_t spill[0]; + // Aligned to frame::alignment_in_bytes (16). + }; + + + // non-volatile GPRs: + + struct z_spill_nonvolatiles { + uint64_t r6; + uint64_t r7; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + }; + + enum { + z_spill_nonvolatiles_size = sizeof(z_spill_nonvolatiles) + }; + + #define _z_spill_nonvolatiles_neg(_component) \ + (-frame::z_spill_nonvolatiles_size + offset_of(frame::z_spill_nonvolatiles, _component)) + + // Frame layout for the Java template interpreter on z/Architecture. + // + // In these figures the stack grows upwards, while memory grows + // downwards. Square brackets denote regions possibly larger than + // single 64 bit slots. + // + // STACK (no JNI, no compiled code, no library calls, template interpreter is active): + // + // 0 [TOP_IJAVA_FRAME] + // [PARENT_IJAVA_FRAME] + // [PARENT_IJAVA_FRAME] + // ... + // [PARENT_IJAVA_FRAME] + // [ENTRY_FRAME] + // [C_FRAME] + // ... + // [C_FRAME] + // + // TOP_IJAVA_FRAME: + // + // 0 [TOP_IJAVA_FRAME_ABI] + // 16 [operand stack] + // [monitors] (optional) + // [IJAVA_STATE] + // note: Own locals are located in the caller frame. + // + // PARENT_IJAVA_FRAME: + // + // 0 [PARENT_IJAVA_FRAME_ABI] + // [callee's locals w/o arguments] + // [outgoing arguments] + // [used part of operand stack w/o arguments] + // [monitors] (optional) + // [IJAVA_STATE] + // + // ENTRY_FRAME: + // + // 0 [PARENT_IJAVA_FRAME_ABI] + // [callee's locals w/o arguments] + // [outgoing arguments] + // [ENTRY_FRAME_LOCALS] + // + // TOP_IJAVA_FRAME_ABI: + // + // 0 [ABI_160] + // + // + // PARENT_IJAVA_FRAME_ABI: + // + // 0 [ABI_16] + // + // IJAVA_STATE: + // + // 0 method + // 8 locals + // monitors : monitor block top (i.e. lowest address) + // cpoolCache + // bcp + // mdx + // esp : Points to first slot above operands. + // sender_sp : See comment in z_ijava_state. + // top_frame_sp : Own SP before modification by i2c adapter. + // oop_tmp + // lresult + // fresult + // + // EXAMPLE: + // --------- + // + // 3 monitors, 5 operand stack slots max. / 3 allocated + // + // F0 callers_sp <- Z_SP (callers_sp == Z_fp (own fp)) + // return_pc + // [rest of ABI_160] + // /slot 4: free + // oper. | slot 3: free <- Z_esp points to first free slot + // stack | slot 2: ref val v2 caches IJAVA_STATE.esp + // | slot 1: unused + // \slot 0: long val v1 + // /slot 5 <- IJAVA_STATE.monitors = monitor block top + // | slot 4 + // monitors| slot 3 + // | slot 2 + // | slot 1 + // \slot 0 + // [IJAVA_STATE] <- monitor block bot (points to first byte in IJAVA_STATE) + // F1 [PARENT_IJAVA_FRAME_ABI] <- Z_fp (== *Z_SP, points to slot just below IJAVA_STATE) + // [F0's locals] <- Z_locals, locals[i] := *(Z_locals - i*BytesPerWord) + // [F1's operand stack] + // [F1's monitors] (optional) + // [IJAVA_STATE] + + public: + + // PARENT_IJAVA_FRAME_ABI + + struct z_parent_ijava_frame_abi : z_abi_16 { + }; + + enum { + z_parent_ijava_frame_abi_size = sizeof(z_parent_ijava_frame_abi) + }; + + #define _z_parent_ijava_frame_abi(_component) \ + (offset_of(frame::z_parent_ijava_frame_abi, _component)) + + // TOP_IJAVA_FRAME_ABI + + struct z_top_ijava_frame_abi : z_abi_160 { + }; + + enum { + z_top_ijava_frame_abi_size = sizeof(z_top_ijava_frame_abi) + }; + + #define _z_top_ijava_frame_abi(_component) \ + (offset_of(frame::z_top_ijava_frame_abi, _component)) + + // IJAVA_STATE + + struct z_ijava_state{ + DEBUG_ONLY(uint64_t magic;) // wrong magic -> wrong state! + uint64_t method; + uint64_t mirror; + uint64_t locals; // Z_locals + uint64_t monitors; + uint64_t cpoolCache; + uint64_t bcp; // Z_bcp + uint64_t mdx; + uint64_t esp; // Z_esp + // Caller's original SP before modification by c2i adapter (if caller is compiled) + // and before top -> parent frame conversion by the interpreter entry. + // Note: for i2i calls a correct sender_sp is required, too, because there + // we cannot use the caller's top_frame_sp as sp when removing the callee + // frame (caller could be compiled or entry frame). Therefore the sender_sp + // has to be the interpreted caller's sp as TOP_IJAVA_FRAME. See also + // AbstractInterpreter::layout_activation() used by deoptimization. + uint64_t sender_sp; + // Own SP before modification by i2c adapter and top-2-parent-resize + // by interpreted callee. + uint64_t top_frame_sp; + // Slots only needed for native calls. Maybe better to move elsewhere. + uint64_t oop_tmp; + uint64_t lresult; + uint64_t fresult; + }; + + enum { + z_ijava_state_size = sizeof(z_ijava_state) + }; + +#ifdef ASSERT + enum { + z_istate_magic_number = 0x900d // ~= good magic + }; +#endif + +#define _z_ijava_state_neg(_component) \ + (int) (-frame::z_ijava_state_size + offset_of(frame::z_ijava_state, _component)) + + // ENTRY_FRAME + + struct z_entry_frame_locals { + uint64_t call_wrapper_address; + uint64_t result_address; + uint64_t result_type; + uint64_t arguments_tos_address; + // Callee saved registers are spilled to caller frame. + // Caller must have z_abi_160. + }; + + enum { + z_entry_frame_locals_size = sizeof(z_entry_frame_locals) + }; + + #define _z_entry_frame_locals_neg(_component) \ + (int) (-frame::z_entry_frame_locals_size + offset_of(frame::z_entry_frame_locals, _component)) + + // Frame layout for JIT generated methods + // + // In these figures the stack grows upwards, while memory grows + // downwards. Square brackets denote regions possibly larger than single + // 64 bit slots. + // + // STACK (interpreted Java calls JIT generated Java): + // + // [JIT_FRAME] <-- SP (mod 16 = 0) + // [TOP_IJAVA_FRAME] + // ... + // + // + // JIT_FRAME (is a C frame according to z/Architecture ABI): + // + // [out_preserve] + // [out_args] + // [spills] + // [monitor] (optional) + // ... + // [monitor] (optional) + // [in_preserve] added / removed by prolog / epilog + + public: + + struct z_top_jit_abi_32 { + uint64_t callers_sp; + uint64_t return_pc; + uint64_t toc; + uint64_t tmp; + }; + + #define _z_top_jit_abi(_component) \ + (offset_of(frame::z_top_jit_abi_32, _component)) + + struct jit_monitor { + uint64_t monitor[1]; + }; + + struct jit_in_preserve { + // Used to provide a z/Architecture ABI on top of a jit frame. + // nothing to add here! + }; + + struct jit_out_preserve : z_top_jit_abi_32 { + // Nothing to add here! + }; + + enum { + z_jit_out_preserve_size = sizeof(jit_out_preserve) + }; + + typedef enum { + jit_monitor_size_in_4_byte_units = sizeof(jit_monitor) / 4, + + // Stack alignment requirement. Log_2 of alignment size in bits. + // log_2(16*8 bits) = 7. + jit_log_2_of_stack_alignment_in_bits = 7, + + jit_out_preserve_size_in_4_byte_units = sizeof(jit_out_preserve) / 4, + + jit_in_preserve_size_in_4_byte_units = sizeof(jit_in_preserve) / 4 + } jit_frame_constants; + + + // C2I adapter frames: + // + // STACK (interpreted called from compiled, on entry to frame manager): + // + // [TOP_C2I_FRAME] + // [JIT_FRAME] + // ... + // + // + // STACK (interpreted called from compiled, after interpreter has been pushed): + // + // [TOP_IJAVA_FRAME] + // [PARENT_C2I_FRAME] + // [JIT_FRAME] + // ... + // + // + // TOP_C2I_FRAME: + // + // [TOP_IJAVA_FRAME_ABI] + // [outgoing Java arguments] + // alignment (optional) + // + // + // PARENT_C2I_FRAME: + // + // [PARENT_IJAVA_FRAME_ABI] + // alignment (optional) + // [callee's locals w/o arguments] + // [outgoing Java arguments] + // alignment (optional) + + private: + + // STACK: + // ... + // [THIS_FRAME] <-- this._sp (stack pointer for this frame) + // [CALLER_FRAME] <-- this.fp() (_sp of caller's frame) + // ... + // + + // NOTE: Stack pointer is now held in the base class, so remove it from here. + + // Frame pointer for this frame. + intptr_t* _fp; + + // Needed by deoptimization. + intptr_t* _unextended_sp; + + public: + + // Interface for all frames: + + // Accessors + + inline intptr_t* fp() const { return _fp; } + + private: + + inline void find_codeblob_and_set_pc_and_deopt_state(address pc); + + // Constructors + + public: + frame(intptr_t* sp); + // To be used, if sp was not extended to match callee's calling convention. + frame(intptr_t* sp, address pc); + frame(intptr_t* sp, address pc, intptr_t* unextended_sp); + + // Access frame via stack pointer. + inline intptr_t* sp_addr_at(int index) const { return &sp()[index]; } + inline intptr_t sp_at( int index) const { return *sp_addr_at(index); } + + // Access ABIs. + inline z_abi_16* own_abi() const { return (z_abi_16*) sp(); } + inline z_abi_160* callers_abi() const { return (z_abi_160*) fp(); } + + private: + + intptr_t* compiled_sender_sp(CodeBlob* cb) const; + address* compiled_sender_pc_addr(CodeBlob* cb) const; + + address* sender_pc_addr(void) const; + + public: + + // Additional interface for interpreter frames: + static int interpreter_frame_interpreterstate_size_in_bytes(); + static int interpreter_frame_monitor_size_in_bytes(); + + private: + + // template interpreter state + inline z_ijava_state* ijava_state() const; + + // Where z_ijava_state.monitors is saved. + inline BasicObjectLock** interpreter_frame_monitors_addr() const; + // Where z_ijava_state.esp is saved. + inline intptr_t** interpreter_frame_esp_addr() const; + + public: + inline intptr_t* interpreter_frame_top_frame_sp(); + inline void interpreter_frame_set_tos_address(intptr_t* x); + inline void interpreter_frame_set_top_frame_sp(intptr_t* top_frame_sp); + inline void interpreter_frame_set_sender_sp(intptr_t* sender_sp); +#ifdef ASSERT + inline void interpreter_frame_set_magic(); +#endif + + // monitors: + + // Next two functions read and write z_ijava_state.monitors. + private: + inline BasicObjectLock* interpreter_frame_monitors() const; + inline void interpreter_frame_set_monitors(BasicObjectLock* monitors); + + public: + + // Additional interface for entry frames: + inline z_entry_frame_locals* entry_frame_locals() const { + return (z_entry_frame_locals*) (((address) fp()) - z_entry_frame_locals_size); + } + + public: + + // Get caller pc from stack slot of gpr14. + address native_sender_pc() const; + // Get caller pc from stack slot of gpr10. + address callstub_sender_pc() const; + + // Dump all frames starting at a given C stack pointer. + // max_frames: Limit number of traced frames. + // <= 0 --> full trace + // > 0 --> trace the #max_frames topmost frames + static void back_trace(outputStream* st, intptr_t* start_sp, intptr_t* top_pc, + unsigned long flags, int max_frames = 0); + + enum { + // This enum value specifies the offset from the pc remembered by + // call instructions to the location where control returns to + // after a normal return. Most architectures remember the return + // location directly, i.e. the offset is zero. This is the case + // for z/Architecture, too. + // + // Normal return address is the instruction following the branch. + pc_return_offset = 0, + }; + +#endif // CPU_S390_VM_FRAME_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/frame_s390.inline.hpp b/hotspot/src/cpu/s390/vm/frame_s390.inline.hpp new file mode 100644 index 00000000000..9f56814d074 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/frame_s390.inline.hpp @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_FRAME_S390_INLINE_HPP +#define CPU_S390_VM_FRAME_S390_INLINE_HPP + +#include "code/codeCache.hpp" +#include "code/vmreg.inline.hpp" + +// Inline functions for z/Architecture frames: + +inline void frame::find_codeblob_and_set_pc_and_deopt_state(address pc) { + assert(pc != NULL, "precondition: must have PC"); + + _cb = CodeCache::find_blob(pc); + _pc = pc; // Must be set for get_deopt_original_pc(). + + _fp = (intptr_t *) own_abi()->callers_sp; + + address original_pc = nmethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } + + assert(((uint64_t)_sp & 0x7) == 0, "SP must be 8-byte aligned"); +} + +// Constructors + +// Initialize all fields, _unextended_sp will be adjusted in find_codeblob_and_set_pc_and_deopt_state. +inline frame::frame() : _sp(NULL), _unextended_sp(NULL), _fp(NULL), _cb(NULL), _pc(NULL), _deopt_state(unknown) {} + +inline frame::frame(intptr_t* sp) : _sp(sp), _unextended_sp(sp) { + find_codeblob_and_set_pc_and_deopt_state((address)own_abi()->return_pc); +} + +inline frame::frame(intptr_t* sp, address pc) : _sp(sp), _unextended_sp(sp) { + find_codeblob_and_set_pc_and_deopt_state(pc); // Also sets _fp and adjusts _unextended_sp. +} + +inline frame::frame(intptr_t* sp, address pc, intptr_t* unextended_sp) : _sp(sp), _unextended_sp(unextended_sp) { + find_codeblob_and_set_pc_and_deopt_state(pc); // Also sets _fp and adjusts _unextended_sp. +} + +// Generic constructor. Used by pns() in debug.cpp only +#ifndef PRODUCT +inline frame::frame(void* sp, void* pc, void* unextended_sp) : + _sp((intptr_t*)sp), _unextended_sp((intptr_t*)unextended_sp), _cb(NULL), _pc(NULL) { + find_codeblob_and_set_pc_and_deopt_state((address)pc); // Also sets _fp and adjusts _unextended_sp. +} +#endif + +// template interpreter state +inline frame::z_ijava_state* frame::ijava_state() const { + z_ijava_state* state = (z_ijava_state*) ((uintptr_t)fp() - z_ijava_state_size); + assert(state->magic == (intptr_t) frame::z_istate_magic_number, + "wrong z_ijava_state in interpreter frame (no magic found)"); + return state; +} + +inline BasicObjectLock** frame::interpreter_frame_monitors_addr() const { + return (BasicObjectLock**) &(ijava_state()->monitors); +} + +// The next two funcions read and write z_ijava_state.monitors. +inline BasicObjectLock* frame::interpreter_frame_monitors() const { + return *interpreter_frame_monitors_addr(); +} +inline void frame::interpreter_frame_set_monitors(BasicObjectLock* monitors) { + *interpreter_frame_monitors_addr() = monitors; +} + +// Accessors + +// Return unique id for this frame. The id must have a value where we +// can distinguish identity and younger/older relationship. NULL +// represents an invalid (incomparable) frame. +inline intptr_t* frame::id(void) const { + // Use _fp. _sp or _unextended_sp wouldn't be correct due to resizing. + return _fp; +} + +// Return true if this frame is younger (more recent activation) than +// the frame represented by id. +inline bool frame::is_younger(intptr_t* id) const { + assert(this->id() != NULL && id != NULL, "NULL frame id"); + // Stack grows towards smaller addresses on z/Architecture. + return this->id() < id; +} + +// Return true if this frame is older (less recent activation) than +// the frame represented by id. +inline bool frame::is_older(intptr_t* id) const { + assert(this->id() != NULL && id != NULL, "NULL frame id"); + // Stack grows towards smaller addresses on z/Architecture. + return this->id() > id; +} + +inline int frame::frame_size(RegisterMap* map) const { + // Stack grows towards smaller addresses on z/Linux: sender is at a higher address. + return sender_sp() - sp(); +} + +// Ignore c2i adapter frames. +inline intptr_t* frame::unextended_sp() const { + return _unextended_sp; +} + +inline address frame::sender_pc() const { + return (address) callers_abi()->return_pc; +} + +// Get caller pc, if caller is native from stack slot of gpr14. +inline address frame::native_sender_pc() const { + return (address) callers_abi()->gpr14; +} + +// Get caller pc from stack slot of gpr10. +inline address frame::callstub_sender_pc() const { + return (address) callers_abi()->gpr10; +} + +inline address* frame::sender_pc_addr() const { + return (address*) &(callers_abi()->return_pc); +} + +inline intptr_t* frame::sender_sp() const { + return (intptr_t*) callers_abi(); +} + +inline intptr_t* frame::link() const { + return (intptr_t*) callers_abi()->callers_sp; +} + +inline intptr_t** frame::interpreter_frame_locals_addr() const { + return (intptr_t**) &(ijava_state()->locals); +} + +inline intptr_t* frame::interpreter_frame_bcp_addr() const { + return (intptr_t*) &(ijava_state()->bcp); +} + +inline intptr_t* frame::interpreter_frame_mdp_addr() const { + return (intptr_t*) &(ijava_state()->mdx); +} + +// Bottom(base) of the expression stack (highest address). +inline intptr_t* frame::interpreter_frame_expression_stack() const { + return (intptr_t*)interpreter_frame_monitor_end() - 1; +} + +inline jint frame::interpreter_frame_expression_stack_direction() { + return -1; +} + +inline intptr_t* frame::interpreter_frame_tos_at(jint offset) const { + return &interpreter_frame_tos_address()[offset]; +} + + +// monitor elements + +// End is lower in memory than begin, and beginning element is oldest element. +// Also begin is one past last monitor. + +inline intptr_t* frame::interpreter_frame_top_frame_sp() { + return (intptr_t*)ijava_state()->top_frame_sp; +} + +inline void frame::interpreter_frame_set_top_frame_sp(intptr_t* top_frame_sp) { + ijava_state()->top_frame_sp = (intptr_t) top_frame_sp; +} + +inline void frame::interpreter_frame_set_sender_sp(intptr_t* sender_sp) { + ijava_state()->sender_sp = (intptr_t) sender_sp; +} + +#ifdef ASSERT +inline void frame::interpreter_frame_set_magic() { + ijava_state()->magic = (intptr_t) frame::z_istate_magic_number; +} +#endif + +// Where z_ijava_state.esp is saved. +inline intptr_t** frame::interpreter_frame_esp_addr() const { + return (intptr_t**) &(ijava_state()->esp); +} + +// top of expression stack (lowest address) +inline intptr_t* frame::interpreter_frame_tos_address() const { + return *interpreter_frame_esp_addr() + 1; +} + +inline void frame::interpreter_frame_set_tos_address(intptr_t* x) { + *interpreter_frame_esp_addr() = x - 1; +} + +// Stack slot needed for native calls and GC. +inline oop * frame::interpreter_frame_temp_oop_addr() const { + return (oop *) ((address) _fp + _z_ijava_state_neg(oop_tmp)); +} + +// In keeping with Intel side: end is lower in memory than begin. +// Beginning element is oldest element. Also begin is one past last monitor. +inline BasicObjectLock * frame::interpreter_frame_monitor_begin() const { + return (BasicObjectLock*)ijava_state(); +} + +inline BasicObjectLock * frame::interpreter_frame_monitor_end() const { + return interpreter_frame_monitors(); +} + +inline void frame::interpreter_frame_set_monitor_end(BasicObjectLock* monitors) { + interpreter_frame_set_monitors((BasicObjectLock *)monitors); +} + +inline int frame::interpreter_frame_monitor_size() { + // Number of stack slots for a monitor + return round_to(BasicObjectLock::size() /* number of stack slots */, + WordsPerLong /* Number of stack slots for a Java long. */); +} + +inline int frame::interpreter_frame_monitor_size_in_bytes() { + // Number of bytes for a monitor. + return frame::interpreter_frame_monitor_size() * wordSize; +} + +inline int frame::interpreter_frame_interpreterstate_size_in_bytes() { + return z_ijava_state_size; +} + +inline Method** frame::interpreter_frame_method_addr() const { + return (Method**)&(ijava_state()->method); +} + +inline oop* frame::interpreter_frame_mirror_addr() const { + return (oop*)&(ijava_state()->mirror); +} + +// Constant pool cache + +inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { + return (ConstantPoolCache**)&(ijava_state()->cpoolCache); +} + +// entry frames + +inline intptr_t* frame::entry_frame_argument_at(int offset) const { + // Since an entry frame always calls the interpreter first, + // the parameters are on the stack and relative to known register in the + // entry frame. + intptr_t* tos = (intptr_t*) entry_frame_locals()->arguments_tos_address; + return &tos[offset + 1]; // prepushed tos +} + +inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { + return (JavaCallWrapper**) &entry_frame_locals()->call_wrapper_address; +} + +inline oop frame::saved_oop_result(RegisterMap* map) const { + return *((oop*) map->location(Z_R2->as_VMReg())); // R2 is return register. +} + +inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { + *((oop*) map->location(Z_R2->as_VMReg())) = obj; // R2 is return register. +} + +inline intptr_t* frame::real_fp() const { + return fp(); +} + +#endif // CPU_S390_VM_FRAME_S390_INLINE_HPP diff --git a/hotspot/src/cpu/s390/vm/globalDefinitions_s390.hpp b/hotspot/src/cpu/s390/vm/globalDefinitions_s390.hpp new file mode 100644 index 00000000000..83261cfdf47 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/globalDefinitions_s390.hpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_GLOBALDEFINITIONS_S390_HPP +#define CPU_S390_VM_GLOBALDEFINITIONS_S390_HPP + +#ifdef CC_INTERP +#error "CC_INTERP is not supported on z/Architecture." +#endif + +// Convenience macro that produces a string literal with the filename +// and linenumber of the location where the macro was used. +#ifndef FILE_AND_LINE +#define FILE_AND_LINE __FILE__ ":" XSTR(__LINE__) +#endif + +#define ShortenBranches true + +const int StackAlignmentInBytes = 16; + +#define SUPPORTS_NATIVE_CX8 + +// Indicates whether the C calling conventions require that +// 32-bit integer argument values are extended to 64 bits. +// This is the case on z/Architecture. +const bool CCallingConventionRequiresIntsAsLongs = true; + +// Contended Locking reorder and cache line bucket. +// This setting should be kept compatible with vm_version_s390.cpp. +// The expected size in bytes of a cache line, used to pad data structures. +#define DEFAULT_CACHE_LINE_SIZE 256 + +#endif // CPU_S390_VM_GLOBALDEFINITIONS_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/globals_s390.hpp b/hotspot/src/cpu/s390/vm/globals_s390.hpp new file mode 100644 index 00000000000..707534ac831 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/globals_s390.hpp @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_GLOBALS_S390_HPP +#define CPU_S390_VM_GLOBALS_S390_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) +// Sorted according to sparc. + +// z/Architecture remembers branch targets, so don't share vtables. +define_pd_global(bool, ShareVtableStubs, false); +define_pd_global(bool, NeedsDeoptSuspend, false); // Only register window machines need this. + +define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks. +define_pd_global(bool, TrapBasedNullChecks, true); +define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast. + +define_pd_global(uintx, CodeCacheSegmentSize, 256); +// This shall be at least 32 for proper branch target alignment. +// Ideally, this is 256 (cache line size). This keeps code end data +// on separate lines. But we reduced it to 64 since 256 increased +// code size significantly by padding nops between IVC and second UEP. +define_pd_global(intx, CodeEntryAlignment, 64); +define_pd_global(intx, OptoLoopAlignment, 2); +define_pd_global(intx, InlineFrequencyCount, 100); +define_pd_global(intx, InlineSmallCode, 2000); + +#define DEFAULT_STACK_YELLOW_PAGES (2) +#define DEFAULT_STACK_RED_PAGES (1) +// Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the +// stack. To pass stack overflow tests we need 20 shadow pages. +#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+2)) +#define DEFAULT_STACK_RESERVED_PAGES (0) + +#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES +#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES +#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES +#define MIN_STACK_RESERVED_PAGES (0) + +define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES); +define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES); +define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); +define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); + +define_pd_global(bool, RewriteBytecodes, true); +define_pd_global(bool, RewriteFrequentPairs, true); + +define_pd_global(bool, UseMembar, false); + +define_pd_global(bool, PreserveFramePointer, false); + +// GC Ergo Flags +define_pd_global(size_t, CMSYoungGenPerWorker, 16*M); // Default max size of CMS young gen, per GC worker thread. + +define_pd_global(uintx, TypeProfileLevel, 111); + +define_pd_global(bool, CompactStrings, true); + +// 8146801 (Short Array Allocation): No performance work done here yet. +define_pd_global(intx, InitArrayShortSize, 1*BytesPerLong); + +#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint, writeable) \ + \ + /* Reoptimize code-sequences of calls at runtime, e.g. replace an */ \ + /* indirect call by a direct call. */ \ + product(bool, ReoptimizeCallSequences, true, \ + "Reoptimize code-sequences of calls at runtime.") \ + \ + product(bool, UseCountLeadingZerosInstruction, true, \ + "Use count leading zeros instruction.") \ + \ + product(bool, UseByteReverseInstruction, true, \ + "Use byte reverse instruction.") \ + \ + product(bool, ExpandLoadingBaseDecode, true, "Expand the assembler " \ + "instruction required to load the base from DecodeN nodes during " \ + "matching.") \ + product(bool, ExpandLoadingBaseDecode_NN, true, "Expand the assembler " \ + "instruction required to load the base from DecodeN_NN nodes " \ + "during matching.") \ + product(bool, ExpandLoadingBaseEncode, true, "Expand the assembler " \ + "instruction required to load the base from EncodeP nodes during " \ + "matching.") \ + product(bool, ExpandLoadingBaseEncode_NN, true, "Expand the assembler " \ + "instruction required to load the base from EncodeP_NN nodes " \ + "during matching.") \ + \ + /* Seems to pay off with 2 pages already. */ \ + product(size_t, MVCLEThreshold, +2*(4*K), \ + "Threshold above which page-aligned MVCLE copy/init is used.") \ + \ + product(bool, PreferLAoverADD, false, \ + "Use LA/LAY instructions over ADD instructions (z/Architecture).") \ + \ + develop(bool, ZapEmptyStackFields, false, "Write 0x0101... to empty stack" \ + " fields. Use this to ease stack debugging.") \ + \ + product(bool, TraceTraps, false, "Trace all traps the signal handler" \ + "handles.") + +#endif // CPU_S390_VM_GLOBALS_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/icBuffer_s390.cpp b/hotspot/src/cpu/s390/vm/icBuffer_s390.cpp new file mode 100644 index 00000000000..0dc936d6fad --- /dev/null +++ b/hotspot/src/cpu/s390/vm/icBuffer_s390.cpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/icBuffer.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "interpreter/bytecodes.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_s390.hpp" +#include "oops/oop.inline.hpp" + +#define __ masm. + +int InlineCacheBuffer::ic_stub_code_size() { + return MacroAssembler::load_const_size() + Assembler::z_brul_size(); +} + +void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_oop, address entry_point) { + ResourceMark rm; + CodeBuffer code(code_begin, ic_stub_code_size()); + MacroAssembler masm(&code); + // Note: even though the code contains an embedded oop, we do not need reloc info + // because + // (1) the oop is old (i.e., doesn't matter for scavenges) + // (2) these ICStubs are removed *before* a GC happens, so the roots disappear. + + // Load the oop, + __ load_const(Z_method, (address) cached_oop); // inline cache reg = Z_method + // and do a tail-call (pc-relative). + __ z_brul((address) entry_point); + __ flush(); +} + +address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // Creation also verifies the object. + return MacroAssembler::get_target_addr_pcrel(move->next_instruction_address()); +} + +void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // Creation also verifies the object. + return (void*)move->data(); +} diff --git a/hotspot/src/cpu/s390/vm/icache_s390.cpp b/hotspot/src/cpu/s390/vm/icache_s390.cpp new file mode 100644 index 00000000000..d35f126c9a2 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/icache_s390.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "runtime/icache.hpp" + +// interface (see ICache::flush_icache_stub_t): +// address addr (Z_R2, ignored) +// int lines (Z_R3, ignored) +// int magic (Z_R4) +// +// returns: int (Z_R2) +// +// Note: z/Architecture doesn't need explicit flushing, so this is implemented as a nop. + +// Call c function (which just does nothing). +int z_flush_icache(address start, int lines, int magic) { return magic; } + +void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) { + *flush_icache_stub = (ICache::flush_icache_stub_t)z_flush_icache; + + // First call to flush itself. + ICache::invalidate_range((address)(*flush_icache_stub), 0); +}; + diff --git a/hotspot/src/cpu/s390/vm/icache_s390.hpp b/hotspot/src/cpu/s390/vm/icache_s390.hpp new file mode 100644 index 00000000000..ee6b3671fff --- /dev/null +++ b/hotspot/src/cpu/s390/vm/icache_s390.hpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_ICACHE_S390_HPP +#define CPU_S390_VM_ICACHE_S390_HPP + +// Interface for updating the instruction cache. Whenever the VM modifies +// code, part of the processor instruction cache potentially has to be flushed. + +class ICache : public AbstractICache { + public: + enum { + stub_size = 0, // Size of the icache flush stub in bytes. + line_size = 2, // There is no explicit flushing on z/Architecture. + // This value is ignored by the flush stub (a nop !). + log2_line_size = 1 + }; + + // Use default implementation. +}; + +#endif // CPU_S390_VM_ICACHE_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/interp_masm_s390.cpp b/hotspot/src/cpu/s390/vm/interp_masm_s390.cpp new file mode 100644 index 00000000000..c53fa419a27 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/interp_masm_s390.cpp @@ -0,0 +1,2127 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// Major contributions by AHa, AS, JL, ML. + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interp_masm_s390.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markOop.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.inline.hpp" + +// Implementation of InterpreterMacroAssembler. +// This file specializes the assember with interpreter-specific macros. + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) +#define BIND(label) bind(label); +#else +#define BLOCK_COMMENT(str) block_comment(str) +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") +#endif + +void InterpreterMacroAssembler::jump_to_entry(address entry, Register Rscratch) { + assert(entry != NULL, "Entry must have been generated by now"); + assert(Rscratch != Z_R0, "Can't use R0 for addressing"); + branch_optimized(Assembler::bcondAlways, entry); +} + +void InterpreterMacroAssembler::empty_expression_stack(void) { + get_monitors(Z_R1_scratch); + add2reg(Z_esp, -Interpreter::stackElementSize, Z_R1_scratch); +} + +// Dispatch code executed in the prolog of a bytecode which does not do it's +// own dispatch. +void InterpreterMacroAssembler::dispatch_prolog(TosState state, int bcp_incr) { + // On z/Architecture we are short on registers, therefore we do not preload the + // dispatch address of the next bytecode. +} + +// Dispatch code executed in the epilog of a bytecode which does not do it's +// own dispatch. +void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { + dispatch_next(state, step); +} + +void InterpreterMacroAssembler::dispatch_next(TosState state, int bcp_incr) { + z_llgc(Z_bytecode, bcp_incr, Z_R0, Z_bcp); // Load next bytecode. + add2reg(Z_bcp, bcp_incr); // Advance bcp. Add2reg produces optimal code. + dispatch_base(state, Interpreter::dispatch_table(state)); +} + +// Common code to dispatch and dispatch_only. +// Dispatch value in Lbyte_code and increment Lbcp. + +void InterpreterMacroAssembler::dispatch_base(TosState state, address* table) { + verify_FPU(1, state); + +#ifdef ASSERT + address reentry = NULL; + { Label OK; + // Check if the frame pointer in Z_fp is correct. + z_cg(Z_fp, 0, Z_SP); + z_bre(OK); + reentry = stop_chain_static(reentry, "invalid frame pointer Z_fp: " FILE_AND_LINE); + bind(OK); + } + { Label OK; + // check if the locals pointer in Z_locals is correct + z_cg(Z_locals, _z_ijava_state_neg(locals), Z_fp); + z_bre(OK); + reentry = stop_chain_static(reentry, "invalid locals pointer Z_locals: " FILE_AND_LINE); + bind(OK); + } +#endif + + // TODO: Maybe implement +VerifyActivationFrameSize here. + // verify_thread(); // Too slow. We will just verify on method entry & exit. + verify_oop(Z_tos, state); +#ifdef FAST_DISPATCH + if (table == Interpreter::dispatch_table(state)) { + // Use IdispatchTables. + add(Lbyte_code, Interpreter::distance_from_dispatch_table(state), Lbyte_code); + // Add offset to correct dispatch table. + sll(Lbyte_code, LogBytesPerWord, Lbyte_code); // Multiply by wordSize. + ld_ptr(IdispatchTables, Lbyte_code, G3_scratch); // Get entry addr. + } else +#endif + { + // Dispatch table to use. + load_absolute_address(Z_tmp_1, (address) table); // Z_tmp_1 = table; + + // 0 <= Z_bytecode < 256 => Use a 32 bit shift, because it is shorter than sllg. + // Z_bytecode must have been loaded zero-extended for this approach to be correct. + z_sll(Z_bytecode, LogBytesPerWord, Z_R0); // Multiply by wordSize. + z_lg(Z_tmp_1, 0, Z_bytecode, Z_tmp_1); // Get entry addr. + } + z_br(Z_tmp_1); +} + +void InterpreterMacroAssembler::dispatch_only(TosState state) { + dispatch_base(state, Interpreter::dispatch_table(state)); +} + +void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { + dispatch_base(state, Interpreter::normal_table(state)); +} + +void InterpreterMacroAssembler::dispatch_via(TosState state, address *table) { + // Load current bytecode. + z_llgc(Z_bytecode, Address(Z_bcp, (intptr_t)0)); + dispatch_base(state, table); +} + +// The following call_VM*_base() methods overload and mask the respective +// declarations/definitions in class MacroAssembler. They are meant as a "detour" +// to perform additional, template interpreter specific tasks before actually +// calling their MacroAssembler counterparts. + +void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point) { + bool allow_relocation = true; // Fenerally valid variant. Assume code is relocated. + // interpreter specific + // Note: No need to save/restore bcp (Z_R13) pointer since these are callee + // saved registers and no blocking/ GC can happen in leaf calls. + + // super call + MacroAssembler::call_VM_leaf_base(entry_point, allow_relocation); +} + +void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, bool allow_relocation) { + // interpreter specific + // Note: No need to save/restore bcp (Z_R13) pointer since these are callee + // saved registers and no blocking/ GC can happen in leaf calls. + + // super call + MacroAssembler::call_VM_leaf_base(entry_point, allow_relocation); +} + +void InterpreterMacroAssembler::call_VM_base(Register oop_result, Register last_java_sp, + address entry_point, bool check_exceptions) { + bool allow_relocation = true; // Fenerally valid variant. Assume code is relocated. + // interpreter specific + + save_bcp(); + save_esp(); + // super call + MacroAssembler::call_VM_base(oop_result, last_java_sp, + entry_point, allow_relocation, check_exceptions); + restore_bcp(); +} + +void InterpreterMacroAssembler::call_VM_base(Register oop_result, Register last_java_sp, + address entry_point, bool allow_relocation, + bool check_exceptions) { + // interpreter specific + + save_bcp(); + save_esp(); + // super call + MacroAssembler::call_VM_base(oop_result, last_java_sp, + entry_point, allow_relocation, check_exceptions); + restore_bcp(); +} + +void InterpreterMacroAssembler::check_and_handle_popframe(Register scratch_reg) { + if (JvmtiExport::can_pop_frame()) { + BLOCK_COMMENT("check_and_handle_popframe {"); + Label L; + // Initiate popframe handling only if it is not already being + // processed. If the flag has the popframe_processing bit set, it + // means that this code is called *during* popframe handling - we + // don't want to reenter. + // TODO: Check if all four state combinations could be visible. + // If (processing and !pending) is an invisible/impossible state, + // there is optimization potential by testing both bits at once. + // Then, All_Zeroes and All_Ones means skip, Mixed means doit. + testbit(Address(Z_thread, JavaThread::popframe_condition_offset()), + exact_log2(JavaThread::popframe_pending_bit)); + z_bfalse(L); + testbit(Address(Z_thread, JavaThread::popframe_condition_offset()), + exact_log2(JavaThread::popframe_processing_bit)); + z_btrue(L); + + // Call Interpreter::remove_activation_preserving_args_entry() to get the + // address of the same-named entrypoint in the generated interpreter code. + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); + // The above call should (as its only effect) return the contents of the field + // _remove_activation_preserving_args_entry in Z_RET. + // We just jump there to have the work done. + z_br(Z_RET); + // There is no way for control to fall thru here. + + bind(L); + BLOCK_COMMENT("} check_and_handle_popframe"); + } +} + + +void InterpreterMacroAssembler::load_earlyret_value(TosState state) { + Register RjvmtiState = Z_R1_scratch; + int tos_off = in_bytes(JvmtiThreadState::earlyret_tos_offset()); + int oop_off = in_bytes(JvmtiThreadState::earlyret_oop_offset()); + int val_off = in_bytes(JvmtiThreadState::earlyret_value_offset()); + int state_off = in_bytes(JavaThread::jvmti_thread_state_offset()); + + z_lg(RjvmtiState, state_off, Z_thread); + + switch (state) { + case atos: z_lg(Z_tos, oop_off, RjvmtiState); + store_const(Address(RjvmtiState, oop_off), 0L, 8, 8, Z_R0_scratch); + break; + case ltos: z_lg(Z_tos, val_off, RjvmtiState); break; + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: z_llgf(Z_tos, val_off, RjvmtiState); break; + case ftos: z_le(Z_ftos, val_off, RjvmtiState); break; + case dtos: z_ld(Z_ftos, val_off, RjvmtiState); break; + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } + + // Clean up tos value in the jvmti thread state. + store_const(Address(RjvmtiState, val_off), 0L, 8, 8, Z_R0_scratch); + // Set tos state field to illegal value. + store_const(Address(RjvmtiState, tos_off), ilgl, 4, 1, Z_R0_scratch); +} + +void InterpreterMacroAssembler::check_and_handle_earlyret(Register scratch_reg) { + if (JvmtiExport::can_force_early_return()) { + BLOCK_COMMENT("check_and_handle_earlyret {"); + Label L; + // arg regs are save, because we are just behind the call in call_VM_base + Register jvmti_thread_state = Z_ARG2; + Register tmp = Z_ARG3; + load_and_test_long(jvmti_thread_state, Address(Z_thread, JavaThread::jvmti_thread_state_offset())); + z_bre(L); // if (thread->jvmti_thread_state() == NULL) exit; + + // Initiate earlyret handling only if it is not already being processed. + // If the flag has the earlyret_processing bit set, it means that this code + // is called *during* earlyret handling - we don't want to reenter. + + assert((JvmtiThreadState::earlyret_pending != 0) && (JvmtiThreadState::earlyret_inactive == 0), + "must fix this check, when changing the values of the earlyret enum"); + assert(JvmtiThreadState::earlyret_pending == 1, "must fix this check, when changing the values of the earlyret enum"); + + load_and_test_int(tmp, Address(jvmti_thread_state, JvmtiThreadState::earlyret_state_offset())); + z_brz(L); // if (thread->jvmti_thread_state()->_earlyret_state != JvmtiThreadState::earlyret_pending) exit; + + // Call Interpreter::remove_activation_early_entry() to get the address of the + // same-named entrypoint in the generated interpreter code. + assert(sizeof(TosState) == 4, "unexpected size"); + z_l(Z_ARG1, Address(jvmti_thread_state, JvmtiThreadState::earlyret_tos_offset())); + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), Z_ARG1); + // The above call should (as its only effect) return the contents of the field + // _remove_activation_preserving_args_entry in Z_RET. + // We just jump there to have the work done. + z_br(Z_RET); + // There is no way for control to fall thru here. + + bind(L); + BLOCK_COMMENT("} check_and_handle_earlyret"); + } +} + +void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { + lgr_if_needed(Z_ARG1, arg_1); + assert(arg_2 != Z_ARG1, "smashed argument"); + lgr_if_needed(Z_ARG2, arg_2); + MacroAssembler::call_VM_leaf_base(entry_point, true); +} + +void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size) { + Address param(Z_bcp, bcp_offset); + + BLOCK_COMMENT("get_cache_index_at_bcp {"); + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + if (index_size == sizeof(u2)) { + load_sized_value(index, param, 2, false /*signed*/); + } else if (index_size == sizeof(u4)) { + + load_sized_value(index, param, 4, false); + + // Check if the secondary index definition is still ~x, otherwise + // we have to change the following assembler code to calculate the + // plain index. + assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); + not_(index); // Convert to plain index. + } else if (index_size == sizeof(u1)) { + z_llgc(index, param); + } else { + ShouldNotReachHere(); + } + BLOCK_COMMENT("}"); +} + + +void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, Register cpe_offset, + int bcp_offset, size_t index_size) { + BLOCK_COMMENT("get_cache_and_index_at_bcp {"); + assert_different_registers(cache, cpe_offset); + get_cache_index_at_bcp(cpe_offset, bcp_offset, index_size); + z_lg(cache, Address(Z_fp, _z_ijava_state_neg(cpoolCache))); + // Convert from field index to ConstantPoolCache offset in bytes. + z_sllg(cpe_offset, cpe_offset, exact_log2(in_words(ConstantPoolCacheEntry::size()) * BytesPerWord)); + BLOCK_COMMENT("}"); +} + +// Kills Z_R0_scratch. +void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, + Register cpe_offset, + Register bytecode, + int byte_no, + int bcp_offset, + size_t index_size) { + BLOCK_COMMENT("get_cache_and_index_and_bytecode_at_bcp {"); + get_cache_and_index_at_bcp(cache, cpe_offset, bcp_offset, index_size); + + // We want to load (from CP cache) the bytecode that corresponds to the passed-in byte_no. + // It is located at (cache + cpe_offset + base_offset + indices_offset + (8-1) (last byte in DW) - (byte_no+1). + // Instead of loading, shifting and masking a DW, we just load that one byte of interest with z_llgc (unsigned). + const int base_ix_off = in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()); + const int off_in_DW = (8-1) - (1+byte_no); + assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask"); + assert(ConstantPoolCacheEntry::bytecode_1_mask == 0xff, ""); + load_sized_value(bytecode, Address(cache, cpe_offset, base_ix_off+off_in_DW), 1, false /*signed*/); + + BLOCK_COMMENT("}"); +} + +// Load object from cpool->resolved_references(index). +void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index) { + assert_different_registers(result, index); + get_constant_pool(result); + + // Convert + // - from field index to resolved_references() index and + // - from word index to byte offset. + // Since this is a java object, it is potentially compressed. + Register tmp = index; // reuse + z_sllg(index, index, LogBytesPerHeapOop); // Offset into resolved references array. + // Load pointer for resolved_references[] objArray. + z_lg(result, ConstantPool::resolved_references_offset_in_bytes(), result); + // JNIHandles::resolve(result) + z_lg(result, 0, result); // Load resolved references array itself. +#ifdef ASSERT + NearLabel index_ok; + z_lgf(Z_R0, Address(result, arrayOopDesc::length_offset_in_bytes())); + z_sllg(Z_R0, Z_R0, LogBytesPerHeapOop); + compare64_and_branch(tmp, Z_R0, Assembler::bcondLow, index_ok); + stop("resolved reference index out of bounds", 0x09256); + bind(index_ok); +#endif + z_agr(result, index); // Address of indexed array element. + load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result); +} + +void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, + Register tmp, + int bcp_offset, + size_t index_size) { + BLOCK_COMMENT("get_cache_entry_pointer_at_bcp {"); + get_cache_and_index_at_bcp(cache, tmp, bcp_offset, index_size); + add2reg_with_index(cache, in_bytes(ConstantPoolCache::base_offset()), tmp, cache); + BLOCK_COMMENT("}"); +} + +// Generate a subtype check: branch to ok_is_subtype if sub_klass is +// a subtype of super_klass. Blows registers Rsuper_klass, Rsub_klass, tmp1, tmp2. +void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, + Register Rsuper_klass, + Register Rtmp1, + Register Rtmp2, + Label &ok_is_subtype) { + // Profile the not-null value's klass. + profile_typecheck(Rtmp1, Rsub_klass, Rtmp2); + + // Do the check. + check_klass_subtype(Rsub_klass, Rsuper_klass, Rtmp1, Rtmp2, ok_is_subtype); + + // Profile the failure of the check. + profile_typecheck_failed(Rtmp1, Rtmp2); +} + +// Pop topmost element from stack. It just disappears. +// Useful if consumed previously by access via stackTop(). +void InterpreterMacroAssembler::popx(int len) { + add2reg(Z_esp, len*Interpreter::stackElementSize); + debug_only(verify_esp(Z_esp, Z_R1_scratch)); +} + +// Get Address object of stack top. No checks. No pop. +// Purpose: - Provide address of stack operand to exploit reg-mem operations. +// - Avoid RISC-like mem2reg - reg-reg-op sequence. +Address InterpreterMacroAssembler::stackTop() { + return Address(Z_esp, Interpreter::expr_offset_in_bytes(0)); +} + +void InterpreterMacroAssembler::pop_i(Register r) { + z_l(r, Interpreter::expr_offset_in_bytes(0), Z_esp); + add2reg(Z_esp, Interpreter::stackElementSize); + assert_different_registers(r, Z_R1_scratch); + debug_only(verify_esp(Z_esp, Z_R1_scratch)); +} + +void InterpreterMacroAssembler::pop_ptr(Register r) { + z_lg(r, Interpreter::expr_offset_in_bytes(0), Z_esp); + add2reg(Z_esp, Interpreter::stackElementSize); + assert_different_registers(r, Z_R1_scratch); + debug_only(verify_esp(Z_esp, Z_R1_scratch)); +} + +void InterpreterMacroAssembler::pop_l(Register r) { + z_lg(r, Interpreter::expr_offset_in_bytes(0), Z_esp); + add2reg(Z_esp, 2*Interpreter::stackElementSize); + assert_different_registers(r, Z_R1_scratch); + debug_only(verify_esp(Z_esp, Z_R1_scratch)); +} + +void InterpreterMacroAssembler::pop_f(FloatRegister f) { + mem2freg_opt(f, Address(Z_esp, Interpreter::expr_offset_in_bytes(0)), false); + add2reg(Z_esp, Interpreter::stackElementSize); + debug_only(verify_esp(Z_esp, Z_R1_scratch)); +} + +void InterpreterMacroAssembler::pop_d(FloatRegister f) { + mem2freg_opt(f, Address(Z_esp, Interpreter::expr_offset_in_bytes(0)), true); + add2reg(Z_esp, 2*Interpreter::stackElementSize); + debug_only(verify_esp(Z_esp, Z_R1_scratch)); +} + +void InterpreterMacroAssembler::push_i(Register r) { + assert_different_registers(r, Z_R1_scratch); + debug_only(verify_esp(Z_esp, Z_R1_scratch)); + z_st(r, Address(Z_esp)); + add2reg(Z_esp, -Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::push_ptr(Register r) { + z_stg(r, Address(Z_esp)); + add2reg(Z_esp, -Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::push_l(Register r) { + assert_different_registers(r, Z_R1_scratch); + debug_only(verify_esp(Z_esp, Z_R1_scratch)); + int offset = -Interpreter::stackElementSize; + z_stg(r, Address(Z_esp, offset)); + clear_mem(Address(Z_esp), Interpreter::stackElementSize); + add2reg(Z_esp, 2 * offset); +} + +void InterpreterMacroAssembler::push_f(FloatRegister f) { + debug_only(verify_esp(Z_esp, Z_R1_scratch)); + freg2mem_opt(f, Address(Z_esp), false); + add2reg(Z_esp, -Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::push_d(FloatRegister d) { + debug_only(verify_esp(Z_esp, Z_R1_scratch)); + int offset = -Interpreter::stackElementSize; + freg2mem_opt(d, Address(Z_esp, offset)); + add2reg(Z_esp, 2 * offset); +} + +void InterpreterMacroAssembler::push(TosState state) { + verify_oop(Z_tos, state); + switch (state) { + case atos: push_ptr(); break; + case btos: push_i(); break; + case ztos: + case ctos: + case stos: push_i(); break; + case itos: push_i(); break; + case ltos: push_l(); break; + case ftos: push_f(); break; + case dtos: push_d(); break; + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } +} + +void InterpreterMacroAssembler::pop(TosState state) { + switch (state) { + case atos: pop_ptr(Z_tos); break; + case btos: pop_i(Z_tos); break; + case ztos: + case ctos: + case stos: pop_i(Z_tos); break; + case itos: pop_i(Z_tos); break; + case ltos: pop_l(Z_tos); break; + case ftos: pop_f(Z_ftos); break; + case dtos: pop_d(Z_ftos); break; + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } + verify_oop(Z_tos, state); +} + +// Helpers for swap and dup. +void InterpreterMacroAssembler::load_ptr(int n, Register val) { + z_lg(val, Address(Z_esp, Interpreter::expr_offset_in_bytes(n))); +} + +void InterpreterMacroAssembler::store_ptr(int n, Register val) { + z_stg(val, Address(Z_esp, Interpreter::expr_offset_in_bytes(n))); +} + +void InterpreterMacroAssembler::prepare_to_jump_from_interpreted(Register method) { + // Satisfy interpreter calling convention (see generate_normal_entry()). + z_lgr(Z_R10, Z_SP); // Set sender sp (aka initial caller sp, aka unextended sp). + // Record top_frame_sp, because the callee might modify it, if it's compiled. + z_stg(Z_SP, _z_ijava_state_neg(top_frame_sp), Z_fp); + save_bcp(); + save_esp(); + z_lgr(Z_method, method); // Set Z_method (kills Z_fp!). +} + +// Jump to from_interpreted entry of a call unless single stepping is possible +// in this thread in which case we must call the i2i entry. +void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) { + assert_different_registers(method, Z_R10 /*used for initial_caller_sp*/, temp); + prepare_to_jump_from_interpreted(method); + + if (JvmtiExport::can_post_interpreter_events()) { + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. + z_lg(Z_R1_scratch, Address(method, Method::from_interpreted_offset())); + MacroAssembler::load_and_test_int(Z_R0_scratch, Address(Z_thread, JavaThread::interp_only_mode_offset())); + z_bcr(bcondEqual, Z_R1_scratch); // Run compiled code if zero. + // Run interpreted. + z_lg(Z_R1_scratch, Address(method, Method::interpreter_entry_offset())); + z_br(Z_R1_scratch); + } else { + // Run compiled code. + z_lg(Z_R1_scratch, Address(method, Method::from_interpreted_offset())); + z_br(Z_R1_scratch); + } +} + +#ifdef ASSERT +void InterpreterMacroAssembler::verify_esp(Register Resp, Register Rtemp) { + // About to read or write Resp[0]. + // Make sure it is not in the monitors or the TOP_IJAVA_FRAME_ABI. + address reentry = NULL; + + { + // Check if the frame pointer in Z_fp is correct. + NearLabel OK; + z_cg(Z_fp, 0, Z_SP); + z_bre(OK); + reentry = stop_chain_static(reentry, "invalid frame pointer Z_fp"); + bind(OK); + } + { + // Resp must not point into or below the operand stack, + // i.e. IJAVA_STATE.monitors > Resp. + NearLabel OK; + Register Rmonitors = Rtemp; + z_lg(Rmonitors, _z_ijava_state_neg(monitors), Z_fp); + compareU64_and_branch(Rmonitors, Resp, bcondHigh, OK); + reentry = stop_chain_static(reentry, "too many pops: Z_esp points into monitor area"); + bind(OK); + } + { + // Resp may point to the last word of TOP_IJAVA_FRAME_ABI, but not below + // i.e. !(Z_SP + frame::z_top_ijava_frame_abi_size - Interpreter::stackElementSize > Resp). + NearLabel OK; + Register Rabi_bottom = Rtemp; + add2reg(Rabi_bottom, frame::z_top_ijava_frame_abi_size - Interpreter::stackElementSize, Z_SP); + compareU64_and_branch(Rabi_bottom, Resp, bcondNotHigh, OK); + reentry = stop_chain_static(reentry, "too many pushes: Z_esp points into TOP_IJAVA_FRAME_ABI"); + bind(OK); + } +} + +void InterpreterMacroAssembler::asm_assert_ijava_state_magic(Register tmp) { + Label magic_ok; + load_const_optimized(tmp, frame::z_istate_magic_number); + z_cg(tmp, Address(Z_fp, _z_ijava_state_neg(magic))); + z_bre(magic_ok); + stop_static("error: wrong magic number in ijava_state access"); + bind(magic_ok); +} +#endif // ASSERT + +void InterpreterMacroAssembler::save_bcp() { + z_stg(Z_bcp, Address(Z_fp, _z_ijava_state_neg(bcp))); + asm_assert_ijava_state_magic(Z_bcp); + NOT_PRODUCT(z_lg(Z_bcp, Address(Z_fp, _z_ijava_state_neg(bcp)))); +} + +void InterpreterMacroAssembler::restore_bcp() { + asm_assert_ijava_state_magic(Z_bcp); + z_lg(Z_bcp, Address(Z_fp, _z_ijava_state_neg(bcp))); +} + +void InterpreterMacroAssembler::save_esp() { + z_stg(Z_esp, Address(Z_fp, _z_ijava_state_neg(esp))); +} + +void InterpreterMacroAssembler::restore_esp() { + asm_assert_ijava_state_magic(Z_esp); + z_lg(Z_esp, Address(Z_fp, _z_ijava_state_neg(esp))); +} + +void InterpreterMacroAssembler::get_monitors(Register reg) { + asm_assert_ijava_state_magic(reg); + mem2reg_opt(reg, Address(Z_fp, _z_ijava_state_neg(monitors))); +} + +void InterpreterMacroAssembler::save_monitors(Register reg) { + reg2mem_opt(reg, Address(Z_fp, _z_ijava_state_neg(monitors))); +} + +void InterpreterMacroAssembler::get_mdp(Register mdp) { + z_lg(mdp, _z_ijava_state_neg(mdx), Z_fp); +} + +void InterpreterMacroAssembler::save_mdp(Register mdp) { + z_stg(mdp, _z_ijava_state_neg(mdx), Z_fp); +} + +// Values that are only read (besides initialization). +void InterpreterMacroAssembler::restore_locals() { + asm_assert_ijava_state_magic(Z_locals); + z_lg(Z_locals, Address(Z_fp, _z_ijava_state_neg(locals))); +} + +void InterpreterMacroAssembler::get_method(Register reg) { + asm_assert_ijava_state_magic(reg); + z_lg(reg, Address(Z_fp, _z_ijava_state_neg(method))); +} + +void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register Rdst, int bcp_offset, + signedOrNot is_signed) { + // Rdst is an 8-byte return value!!! + + // Unaligned loads incur only a small penalty on z/Architecture. The penalty + // is a few (2..3) ticks, even when the load crosses a cache line + // boundary. In case of a cache miss, the stall could, of course, be + // much longer. + + switch (is_signed) { + case Signed: + z_lgh(Rdst, bcp_offset, Z_R0, Z_bcp); + break; + case Unsigned: + z_llgh(Rdst, bcp_offset, Z_R0, Z_bcp); + break; + default: + ShouldNotReachHere(); + } +} + + +void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register Rdst, int bcp_offset, + setCCOrNot set_cc) { + // Rdst is an 8-byte return value!!! + + // Unaligned loads incur only a small penalty on z/Architecture. The penalty + // is a few (2..3) ticks, even when the load crosses a cache line + // boundary. In case of a cache miss, the stall could, of course, be + // much longer. + + // Both variants implement a sign-extending int2long load. + if (set_cc == set_CC) { + load_and_test_int2long(Rdst, Address(Z_bcp, (intptr_t)bcp_offset)); + } else { + mem2reg_signed_opt( Rdst, Address(Z_bcp, (intptr_t)bcp_offset)); + } +} + +void InterpreterMacroAssembler::get_constant_pool(Register Rdst) { + get_method(Rdst); + mem2reg_opt(Rdst, Address(Rdst, Method::const_offset())); + mem2reg_opt(Rdst, Address(Rdst, ConstMethod::constants_offset())); +} + +void InterpreterMacroAssembler::get_cpool_and_tags(Register Rcpool, Register Rtags) { + get_constant_pool(Rcpool); + mem2reg_opt(Rtags, Address(Rcpool, ConstantPool::tags_offset_in_bytes())); +} + +// Unlock if synchronized method. +// +// Unlock the receiver if this is a synchronized method. +// Unlock any Java monitors from syncronized blocks. +// +// If there are locked Java monitors +// If throw_monitor_exception +// throws IllegalMonitorStateException +// Else if install_monitor_exception +// installs IllegalMonitorStateException +// Else +// no error processing +void InterpreterMacroAssembler::unlock_if_synchronized_method(TosState state, + bool throw_monitor_exception, + bool install_monitor_exception) { + NearLabel unlocked, unlock, no_unlock; + + { + Register R_method = Z_ARG2; + Register R_do_not_unlock_if_synchronized = Z_ARG3; + + // Get the value of _do_not_unlock_if_synchronized into G1_scratch. + const Address do_not_unlock_if_synchronized(Z_thread, + JavaThread::do_not_unlock_if_synchronized_offset()); + load_sized_value(R_do_not_unlock_if_synchronized, do_not_unlock_if_synchronized, 1, false /*unsigned*/); + z_mvi(do_not_unlock_if_synchronized, false); // Reset the flag. + + // Check if synchronized method. + get_method(R_method); + verify_oop(Z_tos, state); + push(state); // Save tos/result. + testbit(method2_(R_method, access_flags), JVM_ACC_SYNCHRONIZED_BIT); + z_bfalse(unlocked); + + // Don't unlock anything if the _do_not_unlock_if_synchronized flag + // is set. + compareU64_and_branch(R_do_not_unlock_if_synchronized, (intptr_t)0L, bcondNotEqual, no_unlock); + } + + // unlock monitor + + // BasicObjectLock will be first in list, since this is a + // synchronized method. However, need to check that the object has + // not been unlocked by an explicit monitorexit bytecode. + const Address monitor(Z_fp, -(frame::z_ijava_state_size + (int) sizeof(BasicObjectLock))); + // We use Z_ARG2 so that if we go slow path it will be the correct + // register for unlock_object to pass to VM directly. + load_address(Z_ARG2, monitor); // Address of first monitor. + z_lg(Z_ARG3, Address(Z_ARG2, BasicObjectLock::obj_offset_in_bytes())); + compareU64_and_branch(Z_ARG3, (intptr_t)0L, bcondNotEqual, unlock); + + if (throw_monitor_exception) { + // Entry already unlocked need to throw an exception. + MacroAssembler::call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Monitor already unlocked during a stack unroll. + // If requested, install an illegal_monitor_state_exception. + // Continue with stack unrolling. + if (install_monitor_exception) { + MacroAssembler::call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception)); + } + z_bru(unlocked); + } + + bind(unlock); + + unlock_object(Z_ARG2); + + bind(unlocked); + + // I0, I1: Might contain return value + + // Check that all monitors are unlocked. + { + NearLabel loop, exception, entry, restart; + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + // We use Z_ARG2 so that if we go slow path it will be the correct + // register for unlock_object to pass to VM directly. + Register R_current_monitor = Z_ARG2; + Register R_monitor_block_bot = Z_ARG1; + const Address monitor_block_top(Z_fp, _z_ijava_state_neg(monitors)); + const Address monitor_block_bot(Z_fp, -frame::z_ijava_state_size); + + bind(restart); + // Starting with top-most entry. + z_lg(R_current_monitor, monitor_block_top); + // Points to word before bottom of monitor block. + load_address(R_monitor_block_bot, monitor_block_bot); + z_bru(entry); + + // Entry already locked, need to throw exception. + bind(exception); + + if (throw_monitor_exception) { + // Throw exception. + MacroAssembler::call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime:: + throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Stack unrolling. Unlock object and install illegal_monitor_exception. + // Unlock does not block, so don't have to worry about the frame. + // We don't have to preserve c_rarg1 since we are going to throw an exception. + unlock_object(R_current_monitor); + if (install_monitor_exception) { + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime:: + new_illegal_monitor_state_exception)); + } + z_bru(restart); + } + + bind(loop); + // Check if current entry is used. + load_and_test_long(Z_R0_scratch, Address(R_current_monitor, BasicObjectLock::obj_offset_in_bytes())); + z_brne(exception); + + add2reg(R_current_monitor, entry_size); // Otherwise advance to next entry. + bind(entry); + compareU64_and_branch(R_current_monitor, R_monitor_block_bot, bcondNotEqual, loop); + } + + bind(no_unlock); + pop(state); + verify_oop(Z_tos, state); +} + +// remove activation +// +// Unlock the receiver if this is a synchronized method. +// Unlock any Java monitors from syncronized blocks. +// Remove the activation from the stack. +// +// If there are locked Java monitors +// If throw_monitor_exception +// throws IllegalMonitorStateException +// Else if install_monitor_exception +// installs IllegalMonitorStateException +// Else +// no error processing +void InterpreterMacroAssembler::remove_activation(TosState state, + Register return_pc, + bool throw_monitor_exception, + bool install_monitor_exception, + bool notify_jvmti) { + + unlock_if_synchronized_method(state, throw_monitor_exception, install_monitor_exception); + + // Save result (push state before jvmti call and pop it afterwards) and notify jvmti. + notify_method_exit(false, state, notify_jvmti ? NotifyJVMTI : SkipNotifyJVMTI); + + verify_oop(Z_tos, state); + verify_thread(); + + pop_interpreter_frame(return_pc, Z_ARG2, Z_ARG3); +} + +// lock object +// +// Registers alive +// monitor - Address of the BasicObjectLock to be used for locking, +// which must be initialized with the object to lock. +// object - Address of the object to be locked. +void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { + + if (UseHeavyMonitors) { + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), + monitor, /*check_for_exceptions=*/false); + return; + } + + // template code: + // + // markOop displaced_header = obj->mark().set_unlocked(); + // monitor->lock()->set_displaced_header(displaced_header); + // if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) { + // // We stored the monitor address into the object's mark word. + // } else if (THREAD->is_lock_owned((address)displaced_header)) + // // Simple recursive case. + // monitor->lock()->set_displaced_header(NULL); + // } else { + // // Slow path. + // InterpreterRuntime::monitorenter(THREAD, monitor); + // } + + const Register displaced_header = Z_ARG5; + const Register object_mark_addr = Z_ARG4; + const Register current_header = Z_ARG5; + + NearLabel done; + NearLabel slow_case; + + // markOop displaced_header = obj->mark().set_unlocked(); + + // Load markOop from object into displaced_header. + z_lg(displaced_header, oopDesc::mark_offset_in_bytes(), object); + + if (UseBiasedLocking) { + biased_locking_enter(object, displaced_header, Z_R1, Z_R0, done, &slow_case); + } + + // Set displaced_header to be (markOop of object | UNLOCK_VALUE). + z_oill(displaced_header, markOopDesc::unlocked_value); + + // monitor->lock()->set_displaced_header(displaced_header); + + // Initialize the box (Must happen before we update the object mark!). + z_stg(displaced_header, BasicObjectLock::lock_offset_in_bytes() + + BasicLock::displaced_header_offset_in_bytes(), monitor); + + // if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) { + + // Store stack address of the BasicObjectLock (this is monitor) into object. + add2reg(object_mark_addr, oopDesc::mark_offset_in_bytes(), object); + + z_csg(displaced_header, monitor, 0, object_mark_addr); + assert(current_header==displaced_header, "must be same register"); // Identified two registers from z/Architecture. + + z_bre(done); + + // } else if (THREAD->is_lock_owned((address)displaced_header)) + // // Simple recursive case. + // monitor->lock()->set_displaced_header(NULL); + + // We did not see an unlocked object so try the fast recursive case. + + // Check if owner is self by comparing the value in the markOop of object + // (current_header) with the stack pointer. + z_sgr(current_header, Z_SP); + + assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); + + // The prior sequence "LGR, NGR, LTGR" can be done better + // (Z_R1 is temp and not used after here). + load_const_optimized(Z_R0, (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place)); + z_ngr(Z_R0, current_header); // AND sets CC (result eq/ne 0) + + // If condition is true we are done and hence we can store 0 in the displaced + // header indicating it is a recursive lock and be done. + z_brne(slow_case); + z_release(); // Membar unnecessary on zarch AND because the above csg does a sync before and after. + z_stg(Z_R0/*==0!*/, BasicObjectLock::lock_offset_in_bytes() + + BasicLock::displaced_header_offset_in_bytes(), monitor); + z_bru(done); + + // } else { + // // Slow path. + // InterpreterRuntime::monitorenter(THREAD, monitor); + + // None of the above fast optimizations worked so we have to get into the + // slow case of monitor enter. + bind(slow_case); + + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), + monitor, /*check_for_exceptions=*/false); + + // } + + bind(done); +} + +// Unlocks an object. Used in monitorexit bytecode and remove_activation. +// +// Registers alive +// monitor - address of the BasicObjectLock to be used for locking, +// which must be initialized with the object to lock. +// +// Throw IllegalMonitorException if object is not locked by current thread. +void InterpreterMacroAssembler::unlock_object(Register monitor, Register object) { + + if (UseHeavyMonitors) { + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), + monitor, /*check_for_exceptions=*/ true); + return; + } + +// else { + // template code: + // + // if ((displaced_header = monitor->displaced_header()) == NULL) { + // // Recursive unlock. Mark the monitor unlocked by setting the object field to NULL. + // monitor->set_obj(NULL); + // } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) { + // // We swapped the unlocked mark in displaced_header into the object's mark word. + // monitor->set_obj(NULL); + // } else { + // // Slow path. + // InterpreterRuntime::monitorexit(THREAD, monitor); + // } + + const Register displaced_header = Z_ARG4; + const Register current_header = Z_R1; + Address obj_entry(monitor, BasicObjectLock::obj_offset_in_bytes()); + Label done; + + if (object == noreg) { + // In the template interpreter, we must assure that the object + // entry in the monitor is cleared on all paths. Thus we move + // loading up to here, and clear the entry afterwards. + object = Z_ARG3; // Use Z_ARG3 if caller didn't pass object. + z_lg(object, obj_entry); + } + + assert_different_registers(monitor, object, displaced_header, current_header); + + // if ((displaced_header = monitor->displaced_header()) == NULL) { + // // Recursive unlock. Mark the monitor unlocked by setting the object field to NULL. + // monitor->set_obj(NULL); + + clear_mem(obj_entry, sizeof(oop)); + + if (UseBiasedLocking) { + // The object address from the monitor is in object. + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + biased_locking_exit(object, displaced_header, done); + } + + // Test first if we are in the fast recursive case. + MacroAssembler::load_and_test_long(displaced_header, + Address(monitor, BasicObjectLock::lock_offset_in_bytes() + + BasicLock::displaced_header_offset_in_bytes())); + z_bre(done); // displaced_header == 0 -> goto done + + // } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) { + // // We swapped the unlocked mark in displaced_header into the object's mark word. + // monitor->set_obj(NULL); + + // If we still have a lightweight lock, unlock the object and be done. + + // The markword is expected to be at offset 0. + assert(oopDesc::mark_offset_in_bytes() == 0, "unlock_object: review code below"); + + // We have the displaced header in displaced_header. If the lock is still + // lightweight, it will contain the monitor address and we'll store the + // displaced header back into the object's mark word. + z_lgr(current_header, monitor); + z_csg(current_header, displaced_header, 0, object); + z_bre(done); + + // } else { + // // Slow path. + // InterpreterRuntime::monitorexit(THREAD, monitor); + + // The lock has been converted into a heavy lock and hence + // we need to get into the slow case. + z_stg(object, obj_entry); // Restore object entry, has been cleared above. + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), + monitor, /*check_for_exceptions=*/false); + + // } + + bind(done); +} + +void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, Label& zero_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + load_and_test_long(mdp, Address(Z_fp, _z_ijava_state_neg(mdx))); + z_brz(zero_continue); +} + +// Set the method data pointer for the current bcp. +void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { + assert(ProfileInterpreter, "must be profiling interpreter"); + Label set_mdp; + Register mdp = Z_ARG4; + Register method = Z_ARG5; + + get_method(method); + // Test MDO to avoid the call if it is NULL. + load_and_test_long(mdp, method2_(method, method_data)); + z_brz(set_mdp); + + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), method, Z_bcp); + // Z_RET: mdi + // Mdo is guaranteed to be non-zero here, we checked for it before the call. + assert(method->is_nonvolatile(), "choose nonvolatile reg or reload from frame"); + z_lg(mdp, method2_(method, method_data)); // Must reload, mdp is volatile reg. + add2reg_with_index(mdp, in_bytes(MethodData::data_offset()), Z_RET, mdp); + + bind(set_mdp); + save_mdp(mdp); +} + +void InterpreterMacroAssembler::verify_method_data_pointer() { + assert(ProfileInterpreter, "must be profiling interpreter"); +#ifdef ASSERT + NearLabel verify_continue; + Register bcp_expected = Z_ARG3; + Register mdp = Z_ARG4; + Register method = Z_ARG5; + + test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue + get_method(method); + + // If the mdp is valid, it will point to a DataLayout header which is + // consistent with the bcp. The converse is highly probable also. + load_sized_value(bcp_expected, Address(mdp, DataLayout::bci_offset()), 2, false /*signed*/); + z_ag(bcp_expected, Address(method, Method::const_offset())); + load_address(bcp_expected, Address(bcp_expected, ConstMethod::codes_offset())); + compareU64_and_branch(bcp_expected, Z_bcp, bcondEqual, verify_continue); + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, Z_bcp, mdp); + bind(verify_continue); +#endif // ASSERT +} + +void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, int constant, Register value) { + assert(ProfileInterpreter, "must be profiling interpreter"); + z_stg(value, constant, mdp_in); +} + +void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, + int constant, + Register tmp, + bool decrement) { + assert_different_registers(mdp_in, tmp); + // counter address + Address data(mdp_in, constant); + const int delta = decrement ? -DataLayout::counter_increment : DataLayout::counter_increment; + add2mem_64(Address(mdp_in, constant), delta, tmp); +} + +void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, + int flag_byte_constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + // Set the flag. + z_oi(Address(mdp_in, DataLayout::flags_offset()), flag_byte_constant); +} + +void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, + int offset, + Register value, + Register test_value_out, + Label& not_equal_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + if (test_value_out == noreg) { + z_cg(value, Address(mdp_in, offset)); + z_brne(not_equal_continue); + } else { + // Put the test value into a register, so caller can use it: + z_lg(test_value_out, Address(mdp_in, offset)); + compareU64_and_branch(test_value_out, value, bcondNotEqual, not_equal_continue); + } +} + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, int offset_of_disp) { + update_mdp_by_offset(mdp_in, noreg, offset_of_disp); +} + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, + Register dataidx, + int offset_of_disp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + Address disp_address(mdp_in, dataidx, offset_of_disp); + Assembler::z_ag(mdp_in, disp_address); + save_mdp(mdp_in); +} + +void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, int constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + add2reg(mdp_in, constant); + save_mdp(mdp_in); +} + +void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { + assert(ProfileInterpreter, "must be profiling interpreter"); + assert(return_bci->is_nonvolatile(), "choose nonvolatile reg or save/restore"); + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), + return_bci); +} + +void InterpreterMacroAssembler::profile_taken_branch(Register mdp, Register bumped_count) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + // Otherwise, assign to mdp. + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the taken count. + // We inline increment_mdp_data_at to return bumped_count in a register + //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset())); + Address data(mdp, JumpData::taken_offset()); + z_lg(bumped_count, data); + // 64-bit overflow is very unlikely. Saturation to 32-bit values is + // performed when reading the counts. + add2reg(bumped_count, DataLayout::counter_increment); + z_stg(bumped_count, data); // Store back out + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); + bind(profile_continue); + } +} + +// Kills Z_R1_scratch. +void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the not taken count. + increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()), Z_R1_scratch); + + // The method data pointer needs to be updated to correspond to + // the next bytecode. + update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); + bind(profile_continue); + } +} + +// Kills: Z_R1_scratch. +void InterpreterMacroAssembler::profile_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_final_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_virtual_call(Register receiver, + Register mdp, + Register reg2, + bool receiver_can_be_null) { + if (ProfileInterpreter) { + NearLabel profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + NearLabel skip_receiver_profile; + if (receiver_can_be_null) { + NearLabel not_null; + compareU64_and_branch(receiver, (intptr_t)0L, bcondNotEqual, not_null); + // We are making a call. Increment the count for null receiver. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + z_bru(skip_receiver_profile); + bind(not_null); + } + + // Record the receiver type. + record_klass_in_profile(receiver, mdp, reg2, true); + bind(skip_receiver_profile); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); + bind(profile_continue); + } +} + +// This routine creates a state machine for updating the multi-row +// type profile at a virtual call site (or other type-sensitive bytecode). +// The machine visits each row (of receiver/count) until the receiver type +// is found, or until it runs out of rows. At the same time, it remembers +// the location of the first empty row. (An empty row records null for its +// receiver, and can be allocated for a newly-observed receiver type.) +// Because there are two degrees of freedom in the state, a simple linear +// search will not work; it must be a decision tree. Hence this helper +// function is recursive, to generate the required tree structured code. +// It's the interpreter, so we are trading off code space for speed. +// See below for example code. +void InterpreterMacroAssembler::record_klass_in_profile_helper( + Register receiver, Register mdp, + Register reg2, int start_row, + Label& done, bool is_virtual_call) { + if (TypeProfileWidth == 0) { + if (is_virtual_call) { + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + } + return; + } + + int last_row = VirtualCallData::row_limit() - 1; + assert(start_row <= last_row, "must be work left to do"); + // Test this row for both the receiver and for null. + // Take any of three different outcomes: + // 1. found receiver => increment count and goto done + // 2. found null => keep looking for case 1, maybe allocate this cell + // 3. found something else => keep looking for cases 1 and 2 + // Case 3 is handled by a recursive call. + for (int row = start_row; row <= last_row; row++) { + NearLabel next_test; + bool test_for_null_also = (row == start_row); + + // See if the receiver is receiver[n]. + int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row)); + test_mdp_data_at(mdp, recvr_offset, receiver, + (test_for_null_also ? reg2 : noreg), + next_test); + // (Reg2 now contains the receiver from the CallData.) + + // The receiver is receiver[n]. Increment count[n]. + int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row)); + increment_mdp_data_at(mdp, count_offset); + z_bru(done); + bind(next_test); + + if (test_for_null_also) { + Label found_null; + // Failed the equality check on receiver[n]... Test for null. + z_ltgr(reg2, reg2); + if (start_row == last_row) { + // The only thing left to do is handle the null case. + if (is_virtual_call) { + z_brz(found_null); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + z_bru(done); + bind(found_null); + } else { + z_brnz(done); + } + break; + } + // Since null is rare, make it be the branch-taken case. + z_brz(found_null); + + // Put all the "Case 3" tests here. + record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call); + + // Found a null. Keep searching for a matching receiver, + // but remember that this is an empty (unused) slot. + bind(found_null); + } + } + + // In the fall-through case, we found no matching receiver, but we + // observed the receiver[start_row] is NULL. + + // Fill in the receiver field and increment the count. + int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row)); + set_mdp_data_at(mdp, recvr_offset, receiver); + int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row)); + load_const_optimized(reg2, DataLayout::counter_increment); + set_mdp_data_at(mdp, count_offset, reg2); + if (start_row > 0) { + z_bru(done); + } +} + +// Example state machine code for three profile rows: +// // main copy of decision tree, rooted at row[1] +// if (row[0].rec == rec) { row[0].incr(); goto done; } +// if (row[0].rec != NULL) { +// // inner copy of decision tree, rooted at row[1] +// if (row[1].rec == rec) { row[1].incr(); goto done; } +// if (row[1].rec != NULL) { +// // degenerate decision tree, rooted at row[2] +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// if (row[2].rec != NULL) { count.incr(); goto done; } // overflow +// row[2].init(rec); goto done; +// } else { +// // remember row[1] is empty +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// row[1].init(rec); goto done; +// } +// } else { +// // remember row[0] is empty +// if (row[1].rec == rec) { row[1].incr(); goto done; } +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// row[0].init(rec); goto done; +// } +// done: + +void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, + Register mdp, Register reg2, + bool is_virtual_call) { + assert(ProfileInterpreter, "must be profiling"); + Label done; + + record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call); + + bind (done); +} + +void InterpreterMacroAssembler::profile_ret(Register return_bci, Register mdp) { + if (ProfileInterpreter) { + NearLabel profile_continue; + uint row; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the total ret count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + for (row = 0; row < RetData::row_limit(); row++) { + NearLabel next_test; + + // See if return_bci is equal to bci[n]: + test_mdp_data_at(mdp, + in_bytes(RetData::bci_offset(row)), + return_bci, noreg, + next_test); + + // Return_bci is equal to bci[n]. Increment the count. + increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, in_bytes(RetData::bci_displacement_offset(row))); + z_bru(profile_continue); + bind(next_test); + } + + update_mdp_for_ret(return_bci); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_null_seen(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp, Register tmp) { + if (ProfileInterpreter && TypeProfileCasts) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + int count_offset = in_bytes(CounterData::count_offset()); + // Back up the address, since we have already bumped the mdp. + count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); + + // *Decrement* the counter. We expect to see zero or small negatives. + increment_mdp_data_at(mdp, count_offset, tmp, true); + + bind (profile_continue); + } +} + +void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + + // Record the object type. + record_klass_in_profile(klass, mdp, reg2, false); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_switch_default(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the default case count. + increment_mdp_data_at(mdp, in_bytes(MultiBranchData::default_count_offset())); + + // The method data pointer needs to be updated. + update_mdp_by_offset(mdp, in_bytes(MultiBranchData::default_displacement_offset())); + + bind(profile_continue); + } +} + +// Kills: index, scratch1, scratch2. +void InterpreterMacroAssembler::profile_switch_case(Register index, + Register mdp, + Register scratch1, + Register scratch2) { + if (ProfileInterpreter) { + Label profile_continue; + assert_different_registers(index, mdp, scratch1, scratch2); + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Build the base (index * per_case_size_in_bytes()) + + // case_array_offset_in_bytes(). + z_sllg(index, index, exact_log2(in_bytes(MultiBranchData::per_case_size()))); + add2reg(index, in_bytes(MultiBranchData::case_array_offset())); + + // Add the calculated base to the mdp -> address of the case' data. + Address case_data_addr(mdp, index); + Register case_data = scratch1; + load_address(case_data, case_data_addr); + + // Update the case count. + increment_mdp_data_at(case_data, + in_bytes(MultiBranchData::relative_count_offset()), + scratch2); + + // The method data pointer needs to be updated. + update_mdp_by_offset(mdp, + index, + in_bytes(MultiBranchData::relative_displacement_offset())); + + bind(profile_continue); + } +} + +// kills: R0, R1, flags, loads klass from obj (if not null) +void InterpreterMacroAssembler::profile_obj_type(Register obj, Address mdo_addr, Register klass, bool cmp_done) { + NearLabel null_seen, init_klass, do_nothing, do_update; + + // Klass = obj is allowed. + const Register tmp = Z_R1; + assert_different_registers(obj, mdo_addr.base(), tmp, Z_R0); + assert_different_registers(klass, mdo_addr.base(), tmp, Z_R0); + + z_lg(tmp, mdo_addr); + if (cmp_done) { + z_brz(null_seen); + } else { + compareU64_and_branch(obj, (intptr_t)0, Assembler::bcondEqual, null_seen); + } + + verify_oop(obj); + load_klass(klass, obj); + + // Klass seen before, nothing to do (regardless of unknown bit). + z_lgr(Z_R0, tmp); + assert(Immediate::is_uimm(~TypeEntries::type_klass_mask, 16), "or change following instruction"); + z_nill(Z_R0, TypeEntries::type_klass_mask & 0xFFFF); + compareU64_and_branch(Z_R0, klass, Assembler::bcondEqual, do_nothing); + + // Already unknown. Nothing to do anymore. + z_tmll(tmp, TypeEntries::type_unknown); + z_brc(Assembler::bcondAllOne, do_nothing); + + z_lgr(Z_R0, tmp); + assert(Immediate::is_uimm(~TypeEntries::type_mask, 16), "or change following instruction"); + z_nill(Z_R0, TypeEntries::type_mask & 0xFFFF); + compareU64_and_branch(Z_R0, (intptr_t)0, Assembler::bcondEqual, init_klass); + + // Different than before. Cannot keep accurate profile. + z_oill(tmp, TypeEntries::type_unknown); + z_bru(do_update); + + bind(init_klass); + // Combine klass and null_seen bit (only used if (tmp & type_mask)==0). + z_ogr(tmp, klass); + z_bru(do_update); + + bind(null_seen); + // Set null_seen if obj is 0. + z_oill(tmp, TypeEntries::null_seen); + // fallthru: z_bru(do_update); + + bind(do_update); + z_stg(tmp, mdo_addr); + + bind(do_nothing); +} + +void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { + if (!ProfileInterpreter) { + return; + } + + assert_different_registers(mdp, callee, tmp); + + if (MethodData::profile_arguments() || MethodData::profile_return()) { + Label profile_continue; + + test_method_data_pointer(mdp, profile_continue); + + int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); + + z_cliy(in_bytes(DataLayout::tag_offset()) - off_to_start, mdp, + is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag); + z_brne(profile_continue); + + if (MethodData::profile_arguments()) { + NearLabel done; + int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); + add2reg(mdp, off_to_args); + + for (int i = 0; i < TypeProfileArgsLimit; i++) { + if (i > 0 || MethodData::profile_return()) { + // If return value type is profiled we may have no argument to profile. + z_lg(tmp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args, mdp); + add2reg(tmp, -i*TypeStackSlotEntries::per_arg_count()); + compare64_and_branch(tmp, TypeStackSlotEntries::per_arg_count(), Assembler::bcondLow, done); + } + z_lg(tmp, Address(callee, Method::const_offset())); + z_lgh(tmp, Address(tmp, ConstMethod::size_of_parameters_offset())); + // Stack offset o (zero based) from the start of the argument + // list. For n arguments translates into offset n - o - 1 from + // the end of the argument list. But there is an extra slot at + // the top of the stack. So the offset is n - o from Lesp. + z_sg(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args)); + z_sllg(tmp, tmp, Interpreter::logStackElementSize); + Address stack_slot_addr(tmp, Z_esp); + z_ltg(tmp, stack_slot_addr); + + Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args); + profile_obj_type(tmp, mdo_arg_addr, tmp, /*ltg did compare to 0*/ true); + + int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); + add2reg(mdp, to_add); + off_to_args += to_add; + } + + if (MethodData::profile_return()) { + z_lg(tmp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args, mdp); + add2reg(tmp, -TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count()); + } + + bind(done); + + if (MethodData::profile_return()) { + // We're right after the type profile for the last + // argument. Tmp is the number of cells left in the + // CallTypeData/VirtualCallTypeData to reach its end. Non null + // if there's a return to profile. + assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); + z_sllg(tmp, tmp, exact_log2(DataLayout::cell_size)); + z_agr(mdp, tmp); + } + z_stg(mdp, _z_ijava_state_neg(mdx), Z_fp); + } else { + assert(MethodData::profile_return(), "either profile call args or call ret"); + update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); + } + + // Mdp points right after the end of the + // CallTypeData/VirtualCallTypeData, right after the cells for the + // return value type if there's one. + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { + assert_different_registers(mdp, ret, tmp); + if (ProfileInterpreter && MethodData::profile_return()) { + Label profile_continue; + + test_method_data_pointer(mdp, profile_continue); + + if (MethodData::profile_return_jsr292_only()) { + // If we don't profile all invoke bytecodes we must make sure + // it's a bytecode we indeed profile. We can't go back to the + // beginning of the ProfileData we intend to update to check its + // type because we're right after it and we don't known its + // length. + NearLabel do_profile; + Address bc(Z_bcp); + z_lb(tmp, bc); + compare32_and_branch(tmp, Bytecodes::_invokedynamic, Assembler::bcondEqual, do_profile); + compare32_and_branch(tmp, Bytecodes::_invokehandle, Assembler::bcondEqual, do_profile); + get_method(tmp); + // Supplement to 8139891: _intrinsic_id exceeded 1-byte size limit. + if (Method::intrinsic_id_size_in_bytes() == 1) { + z_cli(Method::intrinsic_id_offset_in_bytes(), tmp, vmIntrinsics::_compiledLambdaForm); + } else { + assert(Method::intrinsic_id_size_in_bytes() == 2, "size error: check Method::_intrinsic_id"); + z_lh(tmp, Method::intrinsic_id_offset_in_bytes(), Z_R0, tmp); + z_chi(tmp, vmIntrinsics::_compiledLambdaForm); + } + z_brne(profile_continue); + + bind(do_profile); + } + + Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); + profile_obj_type(ret, mdo_ret_addr, tmp); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) { + if (ProfileInterpreter && MethodData::profile_parameters()) { + Label profile_continue, done; + + test_method_data_pointer(mdp, profile_continue); + + // Load the offset of the area within the MDO used for + // parameters. If it's negative we're not profiling any parameters. + Address parm_di_addr(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())); + load_and_test_int2long(tmp1, parm_di_addr); + z_brl(profile_continue); + + // Compute a pointer to the area for parameters from the offset + // and move the pointer to the slot for the last + // parameters. Collect profiling from last parameter down. + // mdo start + parameters offset + array length - 1 + + // Pointer to the parameter area in the MDO. + z_agr(mdp, tmp1); + + // Offset of the current profile entry to update. + const Register entry_offset = tmp1; + // entry_offset = array len in number of cells. + z_lg(entry_offset, Address(mdp, ArrayData::array_len_offset())); + // entry_offset (number of cells) = array len - size of 1 entry + add2reg(entry_offset, -TypeStackSlotEntries::per_arg_count()); + // entry_offset in bytes + z_sllg(entry_offset, entry_offset, exact_log2(DataLayout::cell_size)); + + Label loop; + bind(loop); + + Address arg_off(mdp, entry_offset, ParametersTypeData::stack_slot_offset(0)); + Address arg_type(mdp, entry_offset, ParametersTypeData::type_offset(0)); + + // Load offset on the stack from the slot for this parameter. + z_lg(tmp2, arg_off); + z_sllg(tmp2, tmp2, Interpreter::logStackElementSize); + z_lcgr(tmp2); // Negate. + + // Profile the parameter. + z_ltg(tmp2, Address(Z_locals, tmp2)); + profile_obj_type(tmp2, arg_type, tmp2, /*ltg did compare to 0*/ true); + + // Go to next parameter. + z_aghi(entry_offset, -TypeStackSlotEntries::per_arg_count() * DataLayout::cell_size); + z_brnl(loop); + + bind(profile_continue); + } +} + +// Jump if ((*counter_addr += increment) & mask) satisfies the condition. +void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, + int increment, + Address mask, + Register scratch, + bool preloaded, + branch_condition cond, + Label *where) { + assert_different_registers(counter_addr.base(), scratch); + if (preloaded) { + add2reg(scratch, increment); + reg2mem_opt(scratch, counter_addr, false); + } else { + if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(increment) && counter_addr.is_RSYform()) { + z_alsi(counter_addr.disp20(), counter_addr.base(), increment); + mem2reg_signed_opt(scratch, counter_addr); + } else { + mem2reg_signed_opt(scratch, counter_addr); + add2reg(scratch, increment); + reg2mem_opt(scratch, counter_addr, false); + } + } + z_n(scratch, mask); + if (where) { z_brc(cond, *where); } +} + +// Get MethodCounters object for given method. Lazily allocated if necessary. +// method - Ptr to Method object. +// Rcounters - Ptr to MethodCounters object associated with Method object. +// skip - Exit point if MethodCounters object can't be created (OOM condition). +void InterpreterMacroAssembler::get_method_counters(Register Rmethod, + Register Rcounters, + Label& skip) { + assert_different_registers(Rmethod, Rcounters); + + BLOCK_COMMENT("get MethodCounters object {"); + + Label has_counters; + load_and_test_long(Rcounters, Address(Rmethod, Method::method_counters_offset())); + z_brnz(has_counters); + + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters), Rmethod, false); + z_ltgr(Rcounters, Z_RET); // Runtime call returns MethodCounters object. + z_brz(skip); // No MethodCounters, out of memory. + + bind(has_counters); + + BLOCK_COMMENT("} get MethodCounters object"); +} + +// Increment invocation counter in MethodCounters object. +// Return (invocation_counter+backedge_counter) as "result" in RctrSum. +// Counter values are all unsigned. +void InterpreterMacroAssembler::increment_invocation_counter(Register Rcounters, Register RctrSum) { + assert(UseCompiler || LogTouchedMethods, "incrementing must be useful"); + assert_different_registers(Rcounters, RctrSum); + + int increment = InvocationCounter::count_increment; + int inv_counter_offset = in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset()); + int be_counter_offset = in_bytes(MethodCounters::backedge_counter_offset() + InvocationCounter::counter_offset()); + + BLOCK_COMMENT("Increment invocation counter {"); + + if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(increment)) { + // Increment the invocation counter in place, + // then add the incremented value to the backedge counter. + z_l(RctrSum, be_counter_offset, Rcounters); + z_alsi(inv_counter_offset, Rcounters, increment); // Atomic increment @no extra cost! + z_nilf(RctrSum, InvocationCounter::count_mask_value); // Mask off state bits. + z_al(RctrSum, inv_counter_offset, Z_R0, Rcounters); + } else { + // This path is optimized for low register consumption + // at the cost of somewhat higher operand delays. + // It does not need an extra temp register. + + // Update the invocation counter. + z_l(RctrSum, inv_counter_offset, Rcounters); + if (RctrSum == Z_R0) { + z_ahi(RctrSum, increment); + } else { + add2reg(RctrSum, increment); + } + z_st(RctrSum, inv_counter_offset, Rcounters); + + // Mask off the state bits. + z_nilf(RctrSum, InvocationCounter::count_mask_value); + + // Add the backedge counter to the updated invocation counter to + // form the result. + z_al(RctrSum, be_counter_offset, Z_R0, Rcounters); + } + + BLOCK_COMMENT("} Increment invocation counter"); + + // Note that this macro must leave the backedge_count + invocation_count in Rtmp! +} + + +// increment backedge counter in MethodCounters object. +// return (invocation_counter+backedge_counter) as "result" in RctrSum +// counter values are all unsigned! +void InterpreterMacroAssembler::increment_backedge_counter(Register Rcounters, Register RctrSum) { + assert(UseCompiler, "incrementing must be useful"); + assert_different_registers(Rcounters, RctrSum); + + int increment = InvocationCounter::count_increment; + int inv_counter_offset = in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset()); + int be_counter_offset = in_bytes(MethodCounters::backedge_counter_offset() + InvocationCounter::counter_offset()); + + BLOCK_COMMENT("Increment backedge counter {"); + + if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(increment)) { + // Increment the invocation counter in place, + // then add the incremented value to the backedge counter. + z_l(RctrSum, inv_counter_offset, Rcounters); + z_alsi(be_counter_offset, Rcounters, increment); // Atomic increment @no extra cost! + z_nilf(RctrSum, InvocationCounter::count_mask_value); // Mask off state bits. + z_al(RctrSum, be_counter_offset, Z_R0, Rcounters); + } else { + // This path is optimized for low register consumption + // at the cost of somewhat higher operand delays. + // It does not need an extra temp register. + + // Update the invocation counter. + z_l(RctrSum, be_counter_offset, Rcounters); + if (RctrSum == Z_R0) { + z_ahi(RctrSum, increment); + } else { + add2reg(RctrSum, increment); + } + z_st(RctrSum, be_counter_offset, Rcounters); + + // Mask off the state bits. + z_nilf(RctrSum, InvocationCounter::count_mask_value); + + // Add the backedge counter to the updated invocation counter to + // form the result. + z_al(RctrSum, inv_counter_offset, Z_R0, Rcounters); + } + + BLOCK_COMMENT("} Increment backedge counter"); + + // Note that this macro must leave the backedge_count + invocation_count in Rtmp! +} + +// Add an InterpMonitorElem to stack (see frame_s390.hpp). +void InterpreterMacroAssembler::add_monitor_to_stack(bool stack_is_empty, + Register Rtemp1, + Register Rtemp2, + Register Rtemp3) { + + const Register Rcurr_slot = Rtemp1; + const Register Rlimit = Rtemp2; + const jint delta = -frame::interpreter_frame_monitor_size() * wordSize; + + assert((delta & LongAlignmentMask) == 0, + "sizeof BasicObjectLock must be even number of doublewords"); + assert(2 * wordSize == -delta, "this works only as long as delta == -2*wordSize"); + assert(Rcurr_slot != Z_R0, "Register must be usable as base register"); + assert_different_registers(Rlimit, Rcurr_slot, Rtemp3); + + get_monitors(Rlimit); + + // Adjust stack pointer for additional monitor entry. + resize_frame(RegisterOrConstant((intptr_t) delta), Z_fp, false); + + if (!stack_is_empty) { + // Must copy stack contents down. + NearLabel next, done; + + // Rtemp := addr(Tos), Z_esp is pointing below it! + add2reg(Rcurr_slot, wordSize, Z_esp); + + // Nothing to do, if already at monitor area. + compareU64_and_branch(Rcurr_slot, Rlimit, bcondNotLow, done); + + bind(next); + + // Move one stack slot. + mem2reg_opt(Rtemp3, Address(Rcurr_slot)); + reg2mem_opt(Rtemp3, Address(Rcurr_slot, delta)); + add2reg(Rcurr_slot, wordSize); + compareU64_and_branch(Rcurr_slot, Rlimit, bcondLow, next); // Are we done? + + bind(done); + // Done copying stack. + } + + // Adjust expression stack and monitor pointers. + add2reg(Z_esp, delta); + add2reg(Rlimit, delta); + save_monitors(Rlimit); +} + +// Note: Index holds the offset in bytes afterwards. +// You can use this to store a new value (with Llocals as the base). +void InterpreterMacroAssembler::access_local_int(Register index, Register dst) { + z_sllg(index, index, LogBytesPerWord); + mem2reg_opt(dst, Address(Z_locals, index), false); +} + +void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) { + if (state == atos) { MacroAssembler::verify_oop(reg); } +} + +// Inline assembly for: +// +// if (thread is in interp_only_mode) { +// InterpreterRuntime::post_method_entry(); +// } + +void InterpreterMacroAssembler::notify_method_entry() { + + // JVMTI + // Whenever JVMTI puts a thread in interp_only_mode, method + // entry/exit events are sent for that thread to track stack + // depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (JvmtiExport::can_post_interpreter_events()) { + Label jvmti_post_done; + MacroAssembler::load_and_test_int(Z_R0, Address(Z_thread, JavaThread::interp_only_mode_offset())); + z_bre(jvmti_post_done); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry), /*check_exceptions=*/false); + bind(jvmti_post_done); + } +} + +// Inline assembly for: +// +// if (thread is in interp_only_mode) { +// if (!native_method) save result +// InterpreterRuntime::post_method_exit(); +// if (!native_method) restore result +// } +// if (DTraceMethodProbes) { +// SharedRuntime::dtrace_method_exit(thread, method); +// } +// +// For native methods their result is stored in z_ijava_state.lresult +// and z_ijava_state.fresult before coming here. +// Java methods have their result stored in the expression stack. +// +// Notice the dependency to frame::interpreter_frame_result(). +void InterpreterMacroAssembler::notify_method_exit(bool native_method, + TosState state, + NotifyMethodExitMode mode) { + // JVMTI + // Whenever JVMTI puts a thread in interp_only_mode, method + // entry/exit events are sent for that thread to track stack + // depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { + Label jvmti_post_done; + MacroAssembler::load_and_test_int(Z_R0, Address(Z_thread, JavaThread::interp_only_mode_offset())); + z_bre(jvmti_post_done); + if (!native_method) push(state); // see frame::interpreter_frame_result() + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit), /*check_exceptions=*/false); + if (!native_method) pop(state); + bind(jvmti_post_done); + } + +#if 0 + // Dtrace currently not supported on z/Architecture. + { + SkipIfEqual skip(this, &DTraceMethodProbes, false); + push(state); + get_method(c_rarg1); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), + r15_thread, c_rarg1); + pop(state); + } +#endif +} + +void InterpreterMacroAssembler::skip_if_jvmti_mode(Label &Lskip, Register Rscratch) { + if (!JvmtiExport::can_post_interpreter_events()) { + return; + } + + load_and_test_int(Rscratch, Address(Z_thread, JavaThread::interp_only_mode_offset())); + z_brnz(Lskip); + +} + +// Pop the topmost TOP_IJAVA_FRAME and set it's sender_sp as new Z_SP. +// The return pc is loaded into the register return_pc. +// +// Registers updated: +// return_pc - The return pc of the calling frame. +// tmp1, tmp2 - scratch +void InterpreterMacroAssembler::pop_interpreter_frame(Register return_pc, Register tmp1, Register tmp2) { + // F0 Z_SP -> caller_sp (F1's) + // ... + // sender_sp (F1's) + // ... + // F1 Z_fp -> caller_sp (F2's) + // return_pc (Continuation after return from F0.) + // ... + // F2 caller_sp + + // Remove F0's activation. Restoring Z_SP to sender_sp reverts modifications + // (a) by a c2i adapter and (b) by generate_fixed_frame(). + // In case (a) the new top frame F1 is an unextended compiled frame. + // In case (b) F1 is converted from PARENT_IJAVA_FRAME to TOP_IJAVA_FRAME. + + // Case (b) seems to be redundant when returning to a interpreted caller, + // because then the caller's top_frame_sp is installed as sp (see + // TemplateInterpreterGenerator::generate_return_entry_for ()). But + // pop_interpreter_frame() is also used in exception handling and there the + // frame type of the caller is unknown, therefore top_frame_sp cannot be used, + // so it is important that sender_sp is the caller's sp as TOP_IJAVA_FRAME. + + Register R_f1_sender_sp = tmp1; + Register R_f2_sp = tmp2; + + // Tirst check the for the interpreter frame's magic. + asm_assert_ijava_state_magic(R_f2_sp/*tmp*/); + z_lg(R_f2_sp, _z_parent_ijava_frame_abi(callers_sp), Z_fp); + z_lg(R_f1_sender_sp, _z_ijava_state_neg(sender_sp), Z_fp); + if (return_pc->is_valid()) + z_lg(return_pc, _z_parent_ijava_frame_abi(return_pc), Z_fp); + // Pop F0 by resizing to R_f1_sender_sp and using R_f2_sp as fp. + resize_frame_absolute(R_f1_sender_sp, R_f2_sp, false/*load fp*/); + +#ifdef ASSERT + // The return_pc in the new top frame is dead... at least that's my + // current understanding; to assert this I overwrite it. + load_const_optimized(Z_ARG3, 0xb00b1); + z_stg(Z_ARG3, _z_parent_ijava_frame_abi(return_pc), Z_SP); +#endif +} + +void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { + if (VerifyFPU) { + unimplemented("verfiyFPU"); + } +} + diff --git a/hotspot/src/cpu/s390/vm/interp_masm_s390.hpp b/hotspot/src/cpu/s390/vm/interp_masm_s390.hpp new file mode 100644 index 00000000000..5f29a606481 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/interp_masm_s390.hpp @@ -0,0 +1,329 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_INTERP_MASM_ZARCH_64_64_HPP +#define CPU_S390_VM_INTERP_MASM_ZARCH_64_64_HPP + +#include "asm/macroAssembler.hpp" +#include "interpreter/invocationCounter.hpp" + +// This file specializes the assember with interpreter-specific macros. + +class InterpreterMacroAssembler: public MacroAssembler { + + protected: + // Interpreter specific version of call_VM_base(). + virtual void call_VM_leaf_base(address entry_point); + virtual void call_VM_leaf_base(address entry_point, bool allow_relocation); + + virtual void call_VM_base(Register oop_result, + Register last_java_sp, + address entry_point, + bool check_exceptions); + virtual void call_VM_base(Register oop_result, + Register last_java_sp, + address entry_point, + bool allow_relocation, + bool check_exceptions); + + virtual void check_and_handle_popframe(Register java_thread); + virtual void check_and_handle_earlyret(Register java_thread); + + // Base routine for all dispatches. + void dispatch_base(TosState state, address* table); + + public: + InterpreterMacroAssembler(CodeBuffer* c) + : MacroAssembler(c) {} + + void jump_to_entry(address entry, Register Rscratch); + + virtual void load_earlyret_value(TosState state); + + static const Address l_tmp; + static const Address d_tmp; + + // Handy address generation macros. +#define thread_(field_name) Address(Z_thread, JavaThread::field_name ## _offset()) +#define method_(field_name) Address(Z_method, Method::field_name ## _offset()) +#define method2_(Rmethod, field_name) Address(Rmethod, Method::field_name ## _offset()) + + // Helper routine for frame allocation/deallocation. + // Compute the delta by which the caller's SP has to + // be adjusted to accomodate for the non-argument locals. + void compute_extra_locals_size_in_bytes(Register args_size, Register locals_size, Register delta); + + // dispatch routines + void dispatch_prolog(TosState state, int step = 0); + void dispatch_epilog(TosState state, int step = 0); + void dispatch_only(TosState state); + // Dispatch normal table via Z_bytecode (assume Z_bytecode is loaded already). + void dispatch_only_normal(TosState state); + void dispatch_normal(TosState state); + void dispatch_next(TosState state, int step = 0); + void dispatch_next_noverify_oop(TosState state, int step = 0); + void dispatch_via(TosState state, address* table); + + // Jump to an invoked target. + void prepare_to_jump_from_interpreted(Register method); + void jump_from_interpreted(Register method, Register temp); + + // Removes the current activation (incl. unlocking of monitors). + // Additionally this code is used for earlyReturn in which case we + // want to skip throwing an exception and installing an exception. + void remove_activation(TosState state, + Register return_pc, + bool throw_monitor_exception = true, + bool install_monitor_exception = true, + bool notify_jvmti = true); + + public: + // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls. + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); + void super_call_VM(Register thread_cache, Register oop_result, Register last_java_sp, + address entry_point, Register arg_1, Register arg_2, bool check_exception = true); + + // Generate a subtype check: branch to ok_is_subtype if sub_klass is + // a subtype of super_klass. Blows registers tmp1, tmp2 and tmp3. + void gen_subtype_check(Register sub_klass, Register super_klass, Register tmp1, Register tmp2, Label &ok_is_subtype); + + void get_cache_and_index_at_bcp(Register cache, Register cpe_offset, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register cpe_offset, Register bytecode, + int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); + void load_resolved_reference_at_index(Register result, Register index); + + // Pop topmost element from stack. It just disappears. Useful if + // consumed previously by access via stackTop(). + void popx(int len); + void pop_i() { popx(1); } + void pop_ptr() { popx(1); } + void pop_l() { popx(2); } + void pop_f() { popx(1); } + void pop_d() { popx(2); } + // Get Address object of stack top. No checks. No pop. + // Purpose: provide address of stack operand to exploit reg-mem operations. + // Avoid RISC-like mem2reg - reg-reg-op sequence. + Address stackTop(); + + // Helpers for expression stack. + void pop_i( Register r); + void pop_ptr( Register r); + void pop_l( Register r); + void pop_f(FloatRegister f); + void pop_d(FloatRegister f); + + void push_i( Register r = Z_tos); + void push_ptr( Register r = Z_tos); + void push_l( Register r = Z_tos); + void push_f(FloatRegister f = Z_ftos); + void push_d(FloatRegister f = Z_ftos); + + // Helpers for swap and dup. + void load_ptr(int n, Register val); + void store_ptr(int n, Register val); + + void pop (TosState state); // transition vtos -> state + void push(TosState state); // transition state -> vtos + void empty_expression_stack(void); + +#ifdef ASSERT + void verify_sp(Register Rsp, Register Rtemp); + void verify_esp(Register Resp, Register Rtemp); // Verify that Resp points to a word in the operand stack. +#endif // ASSERT + + public: + void if_cmp(Condition cc, bool ptr_compare); + + // Accessors to the template interpreter state. + + void asm_assert_ijava_state_magic(Register tmp) PRODUCT_RETURN; + + void save_bcp(); + + void restore_bcp(); + + void save_esp(); + + void restore_esp(); + + void get_monitors(Register reg); + + void save_monitors(Register reg); + + void get_mdp(Register mdp); + + void save_mdp(Register mdp); + + // Values that are only read (besides initialization). + void restore_locals(); + + void get_method(Register reg); + + // Load values from bytecode stream: + + enum signedOrNot { Signed, Unsigned }; + enum setCCOrNot { set_CC, dont_set_CC }; + + void get_2_byte_integer_at_bcp(Register Rdst, + int bcp_offset, + signedOrNot is_signed ); + + void get_4_byte_integer_at_bcp(Register Rdst, + int bcp_offset, + setCCOrNot should_set_CC = dont_set_CC); + + // common code + + void field_offset_at(int n, Register tmp, Register dest, Register base); + int field_offset_at(Register object, address bcp, int offset); + void fast_iaaccess(int n, address bcp); + void fast_iaputfield(address bcp, bool do_store_check); + + void index_check(Register array, Register index, int index_shift, Register tmp, Register res); + void index_check_without_pop(Register array, Register index, int index_shift, Register tmp, Register res); + + void get_constant_pool(Register Rdst); + void get_constant_pool_cache(Register Rdst); + void get_cpool_and_tags(Register Rcpool, Register Rtags); + void is_a(Label& L); + + + // -------------------------------------------------- + + void unlock_if_synchronized_method(TosState state, bool throw_monitor_exception = true, bool install_monitor_exception = true); + + void add_monitor_to_stack(bool stack_is_empty, + Register Rtemp, + Register Rtemp2, + Register Rtemp3); + + void access_local_int(Register index, Register dst); + void access_local_ptr(Register index, Register dst); + void access_local_long(Register index, Register dst); + void access_local_float(Register index, FloatRegister dst); + void access_local_double(Register index, FloatRegister dst); +#ifdef ASSERT + void check_for_regarea_stomp(Register Rindex, int offset, Register Rlimit, Register Rscratch, Register Rscratch1); +#endif // ASSERT + void store_local_int(Register index, Register src); + void store_local_ptr(Register index, Register src); + void store_local_long(Register index, Register src); + void store_local_float(Register index, FloatRegister src); + void store_local_double(Register index, FloatRegister src); + + + Address first_local_in_stack(); + static int top_most_monitor_byte_offset(); // Offset in bytes to top of monitor block. + Address top_most_monitor(); + void compute_stack_base(Register Rdest); + + enum LoadOrStore { load, store }; + void static_iload_or_store(int which_local, LoadOrStore direction, Register Rtmp); + void static_aload_or_store(int which_local, LoadOrStore direction, Register Rtmp); + void static_dload_or_store(int which_local, LoadOrStore direction); + + void static_iinc( int which_local, jint increment, Register Rtmp, Register Rtmp2); + + void get_method_counters(Register Rmethod, Register Rcounters, Label& skip); + void increment_invocation_counter(Register Rcounters, Register RctrSum); + void increment_backedge_counter(Register Rcounters, Register RctrSum); + void test_backedge_count_for_osr(Register backedge_count, Register branch_bcp, Register Rtmp); + + void record_static_call_in_profile(Register Rentry, Register Rtmp); + void record_receiver_call_in_profile(Register Rklass, Register Rentry, Register Rtmp); + + // Object locking + void lock_object (Register lock_reg, Register obj_reg); + void unlock_object(Register lock_reg, Register obj_reg=noreg); + + // Interpreter profiling operations + void set_method_data_pointer_for_bcp(); + void test_method_data_pointer(Register mdp, Label& zero_continue); + void verify_method_data_pointer(); + + void set_mdp_data_at(Register mdp_in, int constant, Register value); + void increment_mdp_data_at(Register mdp_in, int constant, + Register tmp = Z_R1_scratch, bool decrement = false); + void increment_mask_and_jump(Address counter_addr, + int increment, Address mask, + Register scratch, bool preloaded, + branch_condition cond, Label* where); + void set_mdp_flag_at(Register mdp_in, int flag_constant); + void test_mdp_data_at(Register mdp_in, int offset, Register value, + Register test_value_out, + Label& not_equal_continue); + + void record_klass_in_profile(Register receiver, Register mdp, + Register reg2, bool is_virtual_call); + void record_klass_in_profile_helper(Register receiver, Register mdp, + Register reg2, int start_row, + Label& done, bool is_virtual_call); + + void update_mdp_by_offset(Register mdp_in, int offset_of_offset); + void update_mdp_by_offset(Register mdp_in, Register dataidx, int offset_of_disp); + void update_mdp_by_constant(Register mdp_in, int constant); + void update_mdp_for_ret(Register return_bci); + + void profile_taken_branch(Register mdp, Register bumped_count); + void profile_not_taken_branch(Register mdp); + void profile_call(Register mdp); + void profile_final_call(Register mdp); + void profile_virtual_call(Register receiver, Register mdp, + Register scratch2, + bool receiver_can_be_null = false); + void profile_ret(Register return_bci, Register mdp); + void profile_null_seen(Register mdp); + void profile_typecheck(Register mdp, Register klass, Register scratch); + void profile_typecheck_failed(Register mdp, Register tmp); + void profile_switch_default(Register mdp); + void profile_switch_case(Register index_in_scratch, Register mdp, + Register scratch1, Register scratch2); + + void profile_obj_type(Register obj, Address mdo_addr, Register klass, bool cmp_done = false); + void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); + void profile_return_type(Register mdp, Register ret, Register tmp); + void profile_parameters_type(Register mdp, Register tmp1, Register tmp2); + + // Debugging + void verify_oop(Register reg, TosState state = atos); // Only if +VerifyOops && state == atos. + void verify_oop_or_return_address(Register reg, Register rtmp); // for astore + void verify_FPU(int stack_depth, TosState state = ftos); + + // JVMTI helpers + void skip_if_jvmti_mode(Label &Lskip, Register Rscratch = Z_R0); + + // support for JVMTI/Dtrace + typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; + void notify_method_entry(); + void notify_method_exit(bool native_method, TosState state, NotifyMethodExitMode mode); + + // Pop the topmost TOP_IJAVA_FRAME and set it's sender_sp as new Z_SP. + // The return pc is loaded into the Register return_pc. + void pop_interpreter_frame(Register return_pc, Register tmp1, Register tmp2); +}; + +#endif // CPU_S390_VM_INTERP_MASM_ZARCH_64_64_HPP diff --git a/hotspot/src/cpu/s390/vm/interpreterRT_s390.cpp b/hotspot/src/cpu/s390/vm/interpreterRT_s390.cpp new file mode 100644 index 00000000000..ee4a7b6f544 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/interpreterRT_s390.cpp @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "memory/universe.inline.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/icache.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/signature.hpp" + +// Access macros for Java and C arguments. +// First Java argument is at index-1. +#define locals_j_arg_at(index) Address(Z_R1/*locals*/, in_ByteSize((-(index)*wordSize))) + +#define __ _masm-> + +static int sp_c_int_arg_offset(int arg_nr, int fp_arg_nr) { + int int_arg_nr = arg_nr-fp_arg_nr; + + // arg_nr, fp_arg_nr start with 1 => int_arg_nr starts with 0 + if (int_arg_nr < 5) { + return int_arg_nr * wordSize + _z_abi(carg_1); + } + int offset = int_arg_nr - 5 + (fp_arg_nr > 4 ? fp_arg_nr - 4 : 0); + return offset * wordSize + _z_abi(remaining_cargs); +} + +static int sp_c_fp_arg_offset(int arg_nr, int fp_arg_nr) { + int int_arg_nr = arg_nr-fp_arg_nr; + + // Arg_nr, fp_arg_nr start with 1 => int_arg_nr starts with 0. + if (fp_arg_nr < 5) { + return (fp_arg_nr - 1 ) * wordSize + _z_abi(cfarg_1); + } + int offset = fp_arg_nr - 5 + (int_arg_nr > 4 ? int_arg_nr - 4 : 0); + return offset * wordSize + _z_abi(remaining_cargs); +} + +// Implementation of SignatureHandlerGenerator + +void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { + int int_arg_nr = jni_offset() - _fp_arg_nr; + Register r = (int_arg_nr < 5 /*max_int_register_arguments*/) ? + as_Register(int_arg_nr) + Z_ARG1->encoding() : Z_R0; + + __ z_lgf(r, locals_j_arg_at(offset())); + if (DEBUG_ONLY(true ||) int_arg_nr >= 5) { + __ z_stg(r, sp_c_int_arg_offset(jni_offset(), _fp_arg_nr), Z_SP); + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { + int int_arg_nr = jni_offset() - _fp_arg_nr; + Register r = (int_arg_nr < 5 /*max_int_register_arguments*/) ? + as_Register(int_arg_nr) + Z_ARG1->encoding() : Z_R0; + + __ z_lg(r, locals_j_arg_at(offset() + 1)); // Long resides in upper slot. + if (DEBUG_ONLY(true ||) int_arg_nr >= 5) { + __ z_stg(r, sp_c_int_arg_offset(jni_offset(), _fp_arg_nr), Z_SP); + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { + FloatRegister fp_reg = (_fp_arg_nr < 4/*max_fp_register_arguments*/) ? + as_FloatRegister((_fp_arg_nr * 2) + Z_FARG1->encoding()) : Z_F1; + _fp_arg_nr++; + __ z_ley(fp_reg, locals_j_arg_at(offset())); + if (DEBUG_ONLY(true ||) _fp_arg_nr > 4) { + __ z_ste(fp_reg, sp_c_fp_arg_offset(jni_offset(), _fp_arg_nr) + 4, Z_SP); + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { + FloatRegister fp_reg = (_fp_arg_nr < 4/*max_fp_register_arguments*/) ? + as_FloatRegister((_fp_arg_nr*2) + Z_FARG1->encoding()) : Z_F1; + _fp_arg_nr++; + __ z_ldy(fp_reg, locals_j_arg_at(offset()+1)); + if (DEBUG_ONLY(true ||) _fp_arg_nr > 4) { + __ z_std(fp_reg, sp_c_fp_arg_offset(jni_offset(), _fp_arg_nr), Z_SP); + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { + int int_arg_nr = jni_offset() - _fp_arg_nr; + Register r = (int_arg_nr < 5 /*max_int_register_arguments*/) ? + as_Register(int_arg_nr) + Z_ARG1->encoding() : Z_R0; + + // The handle for a receiver will never be null. + bool do_NULL_check = offset() != 0 || is_static(); + + Label do_null; + if (do_NULL_check) { + __ clear_reg(r, true, false); + __ load_and_test_long(Z_R0, locals_j_arg_at(offset())); + __ z_bre(do_null); + } + __ add2reg(r, -offset() * wordSize, Z_R1 /* locals */); + __ bind(do_null); + if (DEBUG_ONLY(true ||) int_arg_nr >= 5) { + __ z_stg(r, sp_c_int_arg_offset(jni_offset(), _fp_arg_nr), Z_SP); + } +} + + +void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { + __ z_lgr(Z_R1, Z_ARG1); // Z_R1 is used in locals_j_arg_at(index) macro. + + // Generate code to handle arguments. + iterate(fingerprint); + __ load_const_optimized(Z_RET, AbstractInterpreter::result_handler(method()->result_type())); + __ z_br(Z_R14); + __ flush(); +} + +#undef __ + +// Implementation of SignatureHandlerLibrary + +void SignatureHandlerLibrary::pd_set_handler(address handler) {} + +IRT_ENTRY(address, InterpreterRuntime::get_signature(JavaThread* thread, Method* method)) + methodHandle m(thread, method); + assert(m->is_native(), "sanity check"); + Symbol *s = m->signature(); + return (address) s->base(); +IRT_END + +IRT_ENTRY(address, InterpreterRuntime::get_result_handler(JavaThread* thread, Method* method)) + methodHandle m(thread, method); + assert(m->is_native(), "sanity check"); + return AbstractInterpreter::result_handler(m->result_type()); +IRT_END diff --git a/hotspot/src/cpu/s390/vm/interpreterRT_s390.hpp b/hotspot/src/cpu/s390/vm/interpreterRT_s390.hpp new file mode 100644 index 00000000000..67e3b914c34 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/interpreterRT_s390.hpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_INTERPRETERRT_S390_HPP +#define CPU_S390_VM_INTERPRETERRT_S390_HPP + +#include "memory/allocation.hpp" + +static int binary_search(int key, LookupswitchPair* array, int n); + +static address iload (JavaThread* thread); +static address aload (JavaThread* thread); +static address istore(JavaThread* thread); +static address astore(JavaThread* thread); +static address iinc (JavaThread* thread); + +// native method calls + +class SignatureHandlerGenerator: public NativeSignatureIterator { + private: + MacroAssembler* _masm; + int _fp_arg_nr; + + void pass_int(); + void pass_long(); + void pass_double(); + void pass_float(); + void pass_object(); + + public: + // creation + SignatureHandlerGenerator(methodHandle method, CodeBuffer* buffer) : NativeSignatureIterator(method) { + _masm = new MacroAssembler(buffer); + _fp_arg_nr = 0; + } + + // code generation + void generate(uint64_t fingerprint); +}; + +static address get_result_handler(JavaThread* thread, Method* method); + +static address get_signature(JavaThread* thread, Method* method); + +#endif // CPU_S390_VM_INTERPRETERRT_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/javaFrameAnchor_s390.hpp b/hotspot/src/cpu/s390/vm/javaFrameAnchor_s390.hpp new file mode 100644 index 00000000000..5ef09d033a3 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/javaFrameAnchor_s390.hpp @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_JAVAFRAMEANCHOR_S390_HPP +#define CPU_S390_VM_JAVAFRAMEANCHOR_S390_HPP + + public: + + // Each arch must define reset, save, restore. + // These are used by objects that only care about: + // 1 - initializing a new state (thread creation, javaCalls) + // 2 - saving a current state (javaCalls) + // 3 - restoring an old state (javaCalls). + + inline void clear(void) { + // Clearing _last_Java_sp must be first. + OrderAccess::release(); + _last_Java_sp = NULL; + // Fence? + OrderAccess::fence(); + + _last_Java_pc = NULL; + } + + inline void set(intptr_t* sp, address pc) { + _last_Java_pc = pc; + + OrderAccess::release(); + _last_Java_sp = sp; + } + + void copy(JavaFrameAnchor* src) { + // In order to make sure the transition state is valid for "this" + // we must clear _last_Java_sp before copying the rest of the new data. + // Hack Alert: Temporary bugfix for 4717480/4721647 + // To act like previous version (pd_cache_state) don't NULL _last_Java_sp + // unless the value is changing. + // + if (_last_Java_sp != src->_last_Java_sp) { + OrderAccess::release(); + _last_Java_sp = NULL; + OrderAccess::fence(); + } + _last_Java_pc = src->_last_Java_pc; + // Must be last so profiler will always see valid frame if has_last_frame() is true. + + OrderAccess::release(); + _last_Java_sp = src->_last_Java_sp; + } + + // We don't have to flush registers, so the stack is always walkable. + inline bool walkable(void) { return true; } + inline void make_walkable(JavaThread* thread) { } + + public: + + // We don't have a frame pointer. + intptr_t* last_Java_fp(void) { return NULL; } + + intptr_t* last_Java_sp() const { return _last_Java_sp; } + void set_last_Java_sp(intptr_t* sp) { OrderAccess::release(); _last_Java_sp = sp; } + + address last_Java_pc(void) { return _last_Java_pc; } + +#endif // CPU_S390_VM_JAVAFRAMEANCHOR_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/jniFastGetField_s390.cpp b/hotspot/src/cpu/s390/vm/jniFastGetField_s390.cpp new file mode 100644 index 00000000000..6efc4532cea --- /dev/null +++ b/hotspot/src/cpu/s390/vm/jniFastGetField_s390.cpp @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" + +// TSO ensures that loads are blocking and ordered with respect to +// to earlier loads, so we don't need LoadLoad membars. + +#define __ masm-> + +#define BUFFER_SIZE 30*sizeof(jint) + +address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { + // Don't use fast jni accessors. + return (address) -1; +} + +address JNI_FastGetField::generate_fast_get_boolean_field() { + return generate_fast_get_int_field0(T_BOOLEAN); +} + +address JNI_FastGetField::generate_fast_get_byte_field() { + return generate_fast_get_int_field0(T_BYTE); +} + +address JNI_FastGetField::generate_fast_get_char_field() { + return generate_fast_get_int_field0(T_CHAR); +} + +address JNI_FastGetField::generate_fast_get_short_field() { + return generate_fast_get_int_field0(T_SHORT); +} + +address JNI_FastGetField::generate_fast_get_int_field() { + return generate_fast_get_int_field0(T_INT); +} + +address JNI_FastGetField::generate_fast_get_long_field() { + // Don't use fast jni accessors. + return (address) -1; +} + +address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) { + // Don't use fast jni accessors. + return (address) -1; +} + +address JNI_FastGetField::generate_fast_get_float_field() { + return generate_fast_get_float_field0(T_FLOAT); +} + +address JNI_FastGetField::generate_fast_get_double_field() { + return generate_fast_get_float_field0(T_DOUBLE); +} diff --git a/hotspot/src/cpu/s390/vm/jniTypes_s390.hpp b/hotspot/src/cpu/s390/vm/jniTypes_s390.hpp new file mode 100644 index 00000000000..de7bfb49251 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/jniTypes_s390.hpp @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_JNITYPES_S390_HPP +#define CPU_S390_VM_JNITYPES_S390_HPP + +// This file holds platform-dependent routines used to write primitive +// jni types to the array of arguments passed into JavaCalls::call. + +#include "memory/allocation.hpp" +#include "oops/oop.hpp" +#include "prims/jni.h" + +class JNITypes : AllStatic { + // These functions write a java primitive type (in native format) to + // a java stack slot array to be passed as an argument to + // JavaCalls:calls. I.e., they are functionally 'push' operations + // if they have a 'pos' formal parameter. Note that jlongs and + // jdoubles are written _in reverse_ of the order in which they + // appear in the interpreter stack. This is because call stubs (see + // stubGenerator_s390.cpp) reverse the argument list constructed by + // JavaCallArguments (see javaCalls.hpp). + + public: + // Ints are stored in native format in one JavaCallArgument slot at *to. + static inline void put_int(jint from, intptr_t *to) { + *(jint*) to = from; + } + + static inline void put_int(jint from, intptr_t *to, int& pos) { + *(jint*) (to + pos++) = from; + } + + static inline void put_int(jint *from, intptr_t *to, int& pos) { + *(jint*) (to + pos++) = *from; + } + + // Longs are stored in native format in one JavaCallArgument slot at *(to+1). + static inline void put_long(jlong from, intptr_t *to) { + *(jlong*) (to + 1) = from; + } + + static inline void put_long(jlong from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = from; + pos += 2; + } + + static inline void put_long(jlong *from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = *from; + pos += 2; + } + + // Oops are stored in native format in one JavaCallArgument slot at *to. + static inline void put_obj(oop from, intptr_t *to) { + *(oop*) to = from; + } + + static inline void put_obj(oop from, intptr_t *to, int& pos) { + *(oop*) (to + pos++) = from; + } + + static inline void put_obj(oop *from, intptr_t *to, int& pos) { + *(oop*) (to + pos++) = *from; + } + + // Floats are stored in native format in one JavaCallArgument slot at *to. + static inline void put_float(jfloat from, intptr_t *to) { + *(jfloat*) to = from; + } + + static inline void put_float(jfloat from, intptr_t *to, int& pos) { + *(jfloat*) (to + pos++) = from; + } + + static inline void put_float(jfloat *from, intptr_t *to, int& pos) { + *(jfloat*) (to + pos++) = *from; + } + + // Doubles are stored in native word format in one JavaCallArgument + // slot at *(to+1). + static inline void put_double(jdouble from, intptr_t *to) { + *(jdouble*) (to + 1) = from; + } + + static inline void put_double(jdouble from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = from; + pos += 2; + } + + static inline void put_double(jdouble *from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = *from; + pos += 2; + } + + // The get_xxx routines, on the other hand, actually _do_ fetch + // java primitive types from the interpreter stack. + // No need to worry about alignment on z/Architecture. + static inline jint get_int(intptr_t *from) { + return *(jint*) from; + } + + static inline jlong get_long(intptr_t *from) { + return *(jlong*) (from + 1); + } + + static inline oop get_obj(intptr_t *from) { + return *(oop*) from; + } + + static inline jfloat get_float(intptr_t *from) { + return *(jfloat*) from; + } + + static inline jdouble get_double(intptr_t *from) { + return *(jdouble*) (from + 1); + } +}; + +#endif // CPU_S390_VM_JNITYPES_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/jni_s390.h b/hotspot/src/cpu/s390/vm/jni_s390.h new file mode 100644 index 00000000000..fac3f8fe157 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/jni_s390.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef _JAVASOFT_JNI_MD_H_ +#define _JAVASOFT_JNI_MD_H_ + +#if defined(__GNUC__) && (__GNUC__ >= 4) + #define JNIEXPORT __attribute__((visibility("default"))) + #define JNIIMPORT __attribute__((visibility("default"))) +#else + #define JNIEXPORT + #define JNIIMPORT +#endif + +#define JNICALL + +typedef int jint; + +typedef long int jlong; + +typedef signed char jbyte; + +#endif // _JAVASOFT_JNI_MD_H_ diff --git a/hotspot/src/cpu/s390/vm/jvmciCodeInstaller_s390.cpp b/hotspot/src/cpu/s390/vm/jvmciCodeInstaller_s390.cpp new file mode 100644 index 00000000000..e4f019a8d68 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/jvmciCodeInstaller_s390.cpp @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// JVMCI (JEP 243): +// So far, the JVMCI is not supported/implemented on SAP platforms. +// This file just serves as a placeholder which may be filled with life +// should the JVMCI ever be implemented. +#if INCLUDE_JVMCI + +#include "jvmci/jvmciCodeInstaller.hpp" +#include "jvmci/jvmciRuntime.hpp" +#include "jvmci/jvmciCompilerToVM.hpp" +#include "jvmci/jvmciJavaClasses.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_s390.inline.hpp" + +jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) { + Unimplemented(); + return 0; +} + +void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) { + Unimplemented(); +} + +void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) { + Unimplemented(); +} + +void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) { + Unimplemented(); +} + +void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) { + Unimplemented(); +} + +void CodeInstaller::pd_relocate_CodeBlob(CodeBlob* cb, NativeInstruction* inst) { + Unimplemented(); +} + +void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) { + Unimplemented(); +} + +void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) { + Unimplemented(); +} + +void CodeInstaller::pd_relocate_poll(address pc, jint mark) { + Unimplemented(); +} + +// Convert JVMCI register indices (as used in oop maps) to HotSpot registers. +VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) { + return NULL; +} + +bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) { + return false; +} + +#endif // INLCUDE_JVMCI diff --git a/hotspot/src/cpu/s390/vm/macroAssembler_s390.cpp b/hotspot/src/cpu/s390/vm/macroAssembler_s390.cpp new file mode 100644 index 00000000000..a7cbd706255 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/macroAssembler_s390.cpp @@ -0,0 +1,6691 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/codeBuffer.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "compiler/disassembler.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "gc/shared/cardTableModRefBS.hpp" +#include "memory/resourceArea.hpp" +#include "memory/universe.hpp" +#include "oops/klass.inline.hpp" +#include "opto/compile.hpp" +#include "opto/intrinsicnode.hpp" +#include "opto/matcher.hpp" +#include "prims/methodHandles.hpp" +#include "registerSaver_s390.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/icache.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/events.hpp" +#include "utilities/macros.hpp" +#if INCLUDE_ALL_GCS +#include "gc/g1/g1CollectedHeap.inline.hpp" +#include "gc/g1/g1SATBCardTableModRefBS.hpp" +#include "gc/g1/heapRegion.hpp" +#endif + +#include + +#define BLOCK_COMMENT(str) block_comment(str) +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +// Move 32-bit register if destination and source are different. +void MacroAssembler::lr_if_needed(Register rd, Register rs) { + if (rs != rd) { z_lr(rd, rs); } +} + +// Move register if destination and source are different. +void MacroAssembler::lgr_if_needed(Register rd, Register rs) { + if (rs != rd) { z_lgr(rd, rs); } +} + +// Zero-extend 32-bit register into 64-bit register if destination and source are different. +void MacroAssembler::llgfr_if_needed(Register rd, Register rs) { + if (rs != rd) { z_llgfr(rd, rs); } +} + +// Move float register if destination and source are different. +void MacroAssembler::ldr_if_needed(FloatRegister rd, FloatRegister rs) { + if (rs != rd) { z_ldr(rd, rs); } +} + +// Move integer register if destination and source are different. +// It is assumed that shorter-than-int types are already +// appropriately sign-extended. +void MacroAssembler::move_reg_if_needed(Register dst, BasicType dst_type, Register src, + BasicType src_type) { + assert((dst_type != T_FLOAT) && (dst_type != T_DOUBLE), "use move_freg for float types"); + assert((src_type != T_FLOAT) && (src_type != T_DOUBLE), "use move_freg for float types"); + + if (dst_type == src_type) { + lgr_if_needed(dst, src); // Just move all 64 bits. + return; + } + + switch (dst_type) { + // Do not support these types for now. + // case T_BOOLEAN: + case T_BYTE: // signed byte + switch (src_type) { + case T_INT: + z_lgbr(dst, src); + break; + default: + ShouldNotReachHere(); + } + return; + + case T_CHAR: + case T_SHORT: + switch (src_type) { + case T_INT: + if (dst_type == T_CHAR) { + z_llghr(dst, src); + } else { + z_lghr(dst, src); + } + break; + default: + ShouldNotReachHere(); + } + return; + + case T_INT: + switch (src_type) { + case T_BOOLEAN: + case T_BYTE: + case T_CHAR: + case T_SHORT: + case T_INT: + case T_LONG: + case T_OBJECT: + case T_ARRAY: + case T_VOID: + case T_ADDRESS: + lr_if_needed(dst, src); + // llgfr_if_needed(dst, src); // zero-extend (in case we need to find a bug). + return; + + default: + assert(false, "non-integer src type"); + return; + } + case T_LONG: + switch (src_type) { + case T_BOOLEAN: + case T_BYTE: + case T_CHAR: + case T_SHORT: + case T_INT: + z_lgfr(dst, src); // sign extension + return; + + case T_LONG: + case T_OBJECT: + case T_ARRAY: + case T_VOID: + case T_ADDRESS: + lgr_if_needed(dst, src); + return; + + default: + assert(false, "non-integer src type"); + return; + } + return; + case T_OBJECT: + case T_ARRAY: + case T_VOID: + case T_ADDRESS: + switch (src_type) { + // These types don't make sense to be converted to pointers: + // case T_BOOLEAN: + // case T_BYTE: + // case T_CHAR: + // case T_SHORT: + + case T_INT: + z_llgfr(dst, src); // zero extension + return; + + case T_LONG: + case T_OBJECT: + case T_ARRAY: + case T_VOID: + case T_ADDRESS: + lgr_if_needed(dst, src); + return; + + default: + assert(false, "non-integer src type"); + return; + } + return; + default: + assert(false, "non-integer dst type"); + return; + } +} + +// Move float register if destination and source are different. +void MacroAssembler::move_freg_if_needed(FloatRegister dst, BasicType dst_type, + FloatRegister src, BasicType src_type) { + assert((dst_type == T_FLOAT) || (dst_type == T_DOUBLE), "use move_reg for int types"); + assert((src_type == T_FLOAT) || (src_type == T_DOUBLE), "use move_reg for int types"); + if (dst_type == src_type) { + ldr_if_needed(dst, src); // Just move all 64 bits. + } else { + switch (dst_type) { + case T_FLOAT: + assert(src_type == T_DOUBLE, "invalid float type combination"); + z_ledbr(dst, src); + return; + case T_DOUBLE: + assert(src_type == T_FLOAT, "invalid float type combination"); + z_ldebr(dst, src); + return; + default: + assert(false, "non-float dst type"); + return; + } + } +} + +// Optimized emitter for reg to mem operations. +// Uses modern instructions if running on modern hardware, classic instructions +// otherwise. Prefers (usually shorter) classic instructions if applicable. +// Data register (reg) cannot be used as work register. +// +// Don't rely on register locking, instead pass a scratch register (Z_R0 by default). +// CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs! +void MacroAssembler::freg2mem_opt(FloatRegister reg, + int64_t disp, + Register index, + Register base, + void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register), + void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register), + Register scratch) { + index = (index == noreg) ? Z_R0 : index; + if (Displacement::is_shortDisp(disp)) { + (this->*classic)(reg, disp, index, base); + } else { + if (Displacement::is_validDisp(disp)) { + (this->*modern)(reg, disp, index, base); + } else { + if (scratch != Z_R0 && scratch != Z_R1) { + (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. + } else { + if (scratch != Z_R0) { // scratch == Z_R1 + if ((scratch == index) || (index == base)) { + (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. + } else { + add2reg(scratch, disp, base); + (this->*classic)(reg, 0, index, scratch); + if (base == scratch) { + add2reg(base, -disp); // Restore base. + } + } + } else { // scratch == Z_R0 + z_lgr(scratch, base); + add2reg(base, disp); + (this->*classic)(reg, 0, index, base); + z_lgr(base, scratch); // Restore base. + } + } + } + } +} + +void MacroAssembler::freg2mem_opt(FloatRegister reg, const Address &a, bool is_double) { + if (is_double) { + freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stdy), CLASSIC_FFUN(z_std)); + } else { + freg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_stey), CLASSIC_FFUN(z_ste)); + } +} + +// Optimized emitter for mem to reg operations. +// Uses modern instructions if running on modern hardware, classic instructions +// otherwise. Prefers (usually shorter) classic instructions if applicable. +// data register (reg) cannot be used as work register. +// +// Don't rely on register locking, instead pass a scratch register (Z_R0 by default). +// CAUTION! Passing registers >= Z_R2 may produce bad results on old CPUs! +void MacroAssembler::mem2freg_opt(FloatRegister reg, + int64_t disp, + Register index, + Register base, + void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register), + void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register), + Register scratch) { + index = (index == noreg) ? Z_R0 : index; + if (Displacement::is_shortDisp(disp)) { + (this->*classic)(reg, disp, index, base); + } else { + if (Displacement::is_validDisp(disp)) { + (this->*modern)(reg, disp, index, base); + } else { + if (scratch != Z_R0 && scratch != Z_R1) { + (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. + } else { + if (scratch != Z_R0) { // scratch == Z_R1 + if ((scratch == index) || (index == base)) { + (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. + } else { + add2reg(scratch, disp, base); + (this->*classic)(reg, 0, index, scratch); + if (base == scratch) { + add2reg(base, -disp); // Restore base. + } + } + } else { // scratch == Z_R0 + z_lgr(scratch, base); + add2reg(base, disp); + (this->*classic)(reg, 0, index, base); + z_lgr(base, scratch); // Restore base. + } + } + } + } +} + +void MacroAssembler::mem2freg_opt(FloatRegister reg, const Address &a, bool is_double) { + if (is_double) { + mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ldy), CLASSIC_FFUN(z_ld)); + } else { + mem2freg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_FFUN(z_ley), CLASSIC_FFUN(z_le)); + } +} + +// Optimized emitter for reg to mem operations. +// Uses modern instructions if running on modern hardware, classic instructions +// otherwise. Prefers (usually shorter) classic instructions if applicable. +// Data register (reg) cannot be used as work register. +// +// Don't rely on register locking, instead pass a scratch register +// (Z_R0 by default) +// CAUTION! passing registers >= Z_R2 may produce bad results on old CPUs! +void MacroAssembler::reg2mem_opt(Register reg, + int64_t disp, + Register index, + Register base, + void (MacroAssembler::*modern) (Register, int64_t, Register, Register), + void (MacroAssembler::*classic)(Register, int64_t, Register, Register), + Register scratch) { + index = (index == noreg) ? Z_R0 : index; + if (Displacement::is_shortDisp(disp)) { + (this->*classic)(reg, disp, index, base); + } else { + if (Displacement::is_validDisp(disp)) { + (this->*modern)(reg, disp, index, base); + } else { + if (scratch != Z_R0 && scratch != Z_R1) { + (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. + } else { + if (scratch != Z_R0) { // scratch == Z_R1 + if ((scratch == index) || (index == base)) { + (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. + } else { + add2reg(scratch, disp, base); + (this->*classic)(reg, 0, index, scratch); + if (base == scratch) { + add2reg(base, -disp); // Restore base. + } + } + } else { // scratch == Z_R0 + if ((scratch == reg) || (scratch == base) || (reg == base)) { + (this->*modern)(reg, disp, index, base); // Will fail with disp out of range. + } else { + z_lgr(scratch, base); + add2reg(base, disp); + (this->*classic)(reg, 0, index, base); + z_lgr(base, scratch); // Restore base. + } + } + } + } + } +} + +int MacroAssembler::reg2mem_opt(Register reg, const Address &a, bool is_double) { + int store_offset = offset(); + if (is_double) { + reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_stg), CLASSIC_IFUN(z_stg)); + } else { + reg2mem_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_sty), CLASSIC_IFUN(z_st)); + } + return store_offset; +} + +// Optimized emitter for mem to reg operations. +// Uses modern instructions if running on modern hardware, classic instructions +// otherwise. Prefers (usually shorter) classic instructions if applicable. +// Data register (reg) will be used as work register where possible. +void MacroAssembler::mem2reg_opt(Register reg, + int64_t disp, + Register index, + Register base, + void (MacroAssembler::*modern) (Register, int64_t, Register, Register), + void (MacroAssembler::*classic)(Register, int64_t, Register, Register)) { + index = (index == noreg) ? Z_R0 : index; + if (Displacement::is_shortDisp(disp)) { + (this->*classic)(reg, disp, index, base); + } else { + if (Displacement::is_validDisp(disp)) { + (this->*modern)(reg, disp, index, base); + } else { + if ((reg == index) && (reg == base)) { + z_sllg(reg, reg, 1); + add2reg(reg, disp); + (this->*classic)(reg, 0, noreg, reg); + } else if ((reg == index) && (reg != Z_R0)) { + add2reg(reg, disp); + (this->*classic)(reg, 0, reg, base); + } else if (reg == base) { + add2reg(reg, disp); + (this->*classic)(reg, 0, index, reg); + } else if (reg != Z_R0) { + add2reg(reg, disp, base); + (this->*classic)(reg, 0, index, reg); + } else { // reg == Z_R0 && reg != base here + add2reg(base, disp); + (this->*classic)(reg, 0, index, base); + add2reg(base, -disp); + } + } + } +} + +void MacroAssembler::mem2reg_opt(Register reg, const Address &a, bool is_double) { + if (is_double) { + z_lg(reg, a); + } else { + mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_ly), CLASSIC_IFUN(z_l)); + } +} + +void MacroAssembler::mem2reg_signed_opt(Register reg, const Address &a) { + mem2reg_opt(reg, a.disp20(), a.indexOrR0(), a.baseOrR0(), MODERN_IFUN(z_lgf), CLASSIC_IFUN(z_lgf)); +} + +void MacroAssembler::and_imm(Register r, long mask, + Register tmp /* = Z_R0 */, + bool wide /* = false */) { + assert(wide || Immediate::is_simm32(mask), "mask value too large"); + + if (!wide) { + z_nilf(r, mask); + return; + } + + assert(r != tmp, " need a different temporary register !"); + load_const_optimized(tmp, mask); + z_ngr(r, tmp); +} + +// Calculate the 1's complement. +// Note: The condition code is neither preserved nor correctly set by this code!!! +// Note: (wide == false) does not protect the high order half of the target register +// from alteration. It only serves as optimization hint for 32-bit results. +void MacroAssembler::not_(Register r1, Register r2, bool wide) { + + if ((r2 == noreg) || (r2 == r1)) { // Calc 1's complement in place. + z_xilf(r1, -1); + if (wide) { + z_xihf(r1, -1); + } + } else { // Distinct src and dst registers. + if (VM_Version::has_DistinctOpnds()) { + load_const_optimized(r1, -1); + z_xgrk(r1, r2, r1); + } else { + if (wide) { + z_lgr(r1, r2); + z_xilf(r1, -1); + z_xihf(r1, -1); + } else { + z_lr(r1, r2); + z_xilf(r1, -1); + } + } + } +} + +unsigned long MacroAssembler::create_mask(int lBitPos, int rBitPos) { + assert(lBitPos >= 0, "zero is leftmost bit position"); + assert(rBitPos <= 63, "63 is rightmost bit position"); + assert(lBitPos <= rBitPos, "inverted selection interval"); + return (lBitPos == 0 ? (unsigned long)(-1L) : ((1UL<<(63-lBitPos+1))-1)) & (~((1UL<<(63-rBitPos))-1)); +} + +// Helper function for the "Rotate_then_" emitters. +// Rotate src, then mask register contents such that only bits in range survive. +// For oneBits == false, all bits not in range are set to 0. Useful for deleting all bits outside range. +// For oneBits == true, all bits not in range are set to 1. Useful for preserving all bits outside range. +// The caller must ensure that the selected range only contains bits with defined value. +void MacroAssembler::rotate_then_mask(Register dst, Register src, int lBitPos, int rBitPos, + int nRotate, bool src32bit, bool dst32bit, bool oneBits) { + assert(!(dst32bit && lBitPos < 32), "selection interval out of range for int destination"); + bool sll4rll = (nRotate >= 0) && (nRotate <= (63-rBitPos)); // Substitute SLL(G) for RLL(G). + bool srl4rll = (nRotate < 0) && (-nRotate <= lBitPos); // Substitute SRL(G) for RLL(G). + // Pre-determine which parts of dst will be zero after shift/rotate. + bool llZero = sll4rll && (nRotate >= 16); + bool lhZero = (sll4rll && (nRotate >= 32)) || (srl4rll && (nRotate <= -48)); + bool lfZero = llZero && lhZero; + bool hlZero = (sll4rll && (nRotate >= 48)) || (srl4rll && (nRotate <= -32)); + bool hhZero = (srl4rll && (nRotate <= -16)); + bool hfZero = hlZero && hhZero; + + // rotate then mask src operand. + // if oneBits == true, all bits outside selected range are 1s. + // if oneBits == false, all bits outside selected range are 0s. + if (src32bit) { // There might be garbage in the upper 32 bits which will get masked away. + if (dst32bit) { + z_rll(dst, src, nRotate); // Copy and rotate, upper half of reg remains undisturbed. + } else { + if (sll4rll) { z_sllg(dst, src, nRotate); } + else if (srl4rll) { z_srlg(dst, src, -nRotate); } + else { z_rllg(dst, src, nRotate); } + } + } else { + if (sll4rll) { z_sllg(dst, src, nRotate); } + else if (srl4rll) { z_srlg(dst, src, -nRotate); } + else { z_rllg(dst, src, nRotate); } + } + + unsigned long range_mask = create_mask(lBitPos, rBitPos); + unsigned int range_mask_h = (unsigned int)(range_mask >> 32); + unsigned int range_mask_l = (unsigned int)range_mask; + unsigned short range_mask_hh = (unsigned short)(range_mask >> 48); + unsigned short range_mask_hl = (unsigned short)(range_mask >> 32); + unsigned short range_mask_lh = (unsigned short)(range_mask >> 16); + unsigned short range_mask_ll = (unsigned short)range_mask; + // Works for z9 and newer H/W. + if (oneBits) { + if ((~range_mask_l) != 0) { z_oilf(dst, ~range_mask_l); } // All bits outside range become 1s. + if (((~range_mask_h) != 0) && !dst32bit) { z_oihf(dst, ~range_mask_h); } + } else { + // All bits outside range become 0s + if (((~range_mask_l) != 0) && !lfZero) { + z_nilf(dst, range_mask_l); + } + if (((~range_mask_h) != 0) && !dst32bit && !hfZero) { + z_nihf(dst, range_mask_h); + } + } +} + +// Rotate src, then insert selected range from rotated src into dst. +// Clear dst before, if requested. +void MacroAssembler::rotate_then_insert(Register dst, Register src, int lBitPos, int rBitPos, + int nRotate, bool clear_dst) { + // This version does not depend on src being zero-extended int2long. + nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. + z_risbg(dst, src, lBitPos, rBitPos, nRotate, clear_dst); // Rotate, then insert selected, clear the rest. +} + +// Rotate src, then and selected range from rotated src into dst. +// Set condition code only if so requested. Otherwise it is unpredictable. +// See performance note in macroAssembler_s390.hpp for important information. +void MacroAssembler::rotate_then_and(Register dst, Register src, int lBitPos, int rBitPos, + int nRotate, bool test_only) { + guarantee(!test_only, "Emitter not fit for test_only instruction variant."); + // This version does not depend on src being zero-extended int2long. + nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. + z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected. +} + +// Rotate src, then or selected range from rotated src into dst. +// Set condition code only if so requested. Otherwise it is unpredictable. +// See performance note in macroAssembler_s390.hpp for important information. +void MacroAssembler::rotate_then_or(Register dst, Register src, int lBitPos, int rBitPos, + int nRotate, bool test_only) { + guarantee(!test_only, "Emitter not fit for test_only instruction variant."); + // This version does not depend on src being zero-extended int2long. + nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. + z_rosbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected. +} + +// Rotate src, then xor selected range from rotated src into dst. +// Set condition code only if so requested. Otherwise it is unpredictable. +// See performance note in macroAssembler_s390.hpp for important information. +void MacroAssembler::rotate_then_xor(Register dst, Register src, int lBitPos, int rBitPos, + int nRotate, bool test_only) { + guarantee(!test_only, "Emitter not fit for test_only instruction variant."); + // This version does not depend on src being zero-extended int2long. + nRotate &= 0x003f; // For risbg, pretend it's an unsigned value. + z_rxsbg(dst, src, lBitPos, rBitPos, nRotate, test_only); // Rotate, then xor selected. +} + +void MacroAssembler::add64(Register r1, RegisterOrConstant inc) { + if (inc.is_register()) { + z_agr(r1, inc.as_register()); + } else { // constant + intptr_t imm = inc.as_constant(); + add2reg(r1, imm); + } +} +// Helper function to multiply the 64bit contents of a register by a 16bit constant. +// The optimization tries to avoid the mghi instruction, since it uses the FPU for +// calculation and is thus rather slow. +// +// There is no handling for special cases, e.g. cval==0 or cval==1. +// +// Returns len of generated code block. +unsigned int MacroAssembler::mul_reg64_const16(Register rval, Register work, int cval) { + int block_start = offset(); + + bool sign_flip = cval < 0; + cval = sign_flip ? -cval : cval; + + BLOCK_COMMENT("Reg64*Con16 {"); + + int bit1 = cval & -cval; + if (bit1 == cval) { + z_sllg(rval, rval, exact_log2(bit1)); + if (sign_flip) { z_lcgr(rval, rval); } + } else { + int bit2 = (cval-bit1) & -(cval-bit1); + if ((bit1+bit2) == cval) { + z_sllg(work, rval, exact_log2(bit1)); + z_sllg(rval, rval, exact_log2(bit2)); + z_agr(rval, work); + if (sign_flip) { z_lcgr(rval, rval); } + } else { + if (sign_flip) { z_mghi(rval, -cval); } + else { z_mghi(rval, cval); } + } + } + BLOCK_COMMENT("} Reg64*Con16"); + + int block_end = offset(); + return block_end - block_start; +} + +// Generic operation r1 := r2 + imm. +// +// Should produce the best code for each supported CPU version. +// r2 == noreg yields r1 := r1 + imm +// imm == 0 emits either no instruction or r1 := r2 ! +// NOTES: 1) Don't use this function where fixed sized +// instruction sequences are required!!! +// 2) Don't use this function if condition code +// setting is required! +// 3) Despite being declared as int64_t, the parameter imm +// must be a simm_32 value (= signed 32-bit integer). +void MacroAssembler::add2reg(Register r1, int64_t imm, Register r2) { + assert(Immediate::is_simm32(imm), "probably an implicit conversion went wrong"); + + if (r2 == noreg) { r2 = r1; } + + // Handle special case imm == 0. + if (imm == 0) { + lgr_if_needed(r1, r2); + // Nothing else to do. + return; + } + + if (!PreferLAoverADD || (r2 == Z_R0)) { + bool distinctOpnds = VM_Version::has_DistinctOpnds(); + + // Can we encode imm in 16 bits signed? + if (Immediate::is_simm16(imm)) { + if (r1 == r2) { + z_aghi(r1, imm); + return; + } + if (distinctOpnds) { + z_aghik(r1, r2, imm); + return; + } + z_lgr(r1, r2); + z_aghi(r1, imm); + return; + } + } else { + // Can we encode imm in 12 bits unsigned? + if (Displacement::is_shortDisp(imm)) { + z_la(r1, imm, r2); + return; + } + // Can we encode imm in 20 bits signed? + if (Displacement::is_validDisp(imm)) { + // Always use LAY instruction, so we don't need the tmp register. + z_lay(r1, imm, r2); + return; + } + + } + + // Can handle it (all possible values) with long immediates. + lgr_if_needed(r1, r2); + z_agfi(r1, imm); +} + +// Generic operation r := b + x + d +// +// Addition of several operands with address generation semantics - sort of: +// - no restriction on the registers. Any register will do for any operand. +// - x == noreg: operand will be disregarded. +// - b == noreg: will use (contents of) result reg as operand (r := r + d). +// - x == Z_R0: just disregard +// - b == Z_R0: use as operand. This is not address generation semantics!!! +// +// The same restrictions as on add2reg() are valid!!! +void MacroAssembler::add2reg_with_index(Register r, int64_t d, Register x, Register b) { + assert(Immediate::is_simm32(d), "probably an implicit conversion went wrong"); + + if (x == noreg) { x = Z_R0; } + if (b == noreg) { b = r; } + + // Handle special case x == R0. + if (x == Z_R0) { + // Can simply add the immediate value to the base register. + add2reg(r, d, b); + return; + } + + if (!PreferLAoverADD || (b == Z_R0)) { + bool distinctOpnds = VM_Version::has_DistinctOpnds(); + // Handle special case d == 0. + if (d == 0) { + if (b == x) { z_sllg(r, b, 1); return; } + if (r == x) { z_agr(r, b); return; } + if (r == b) { z_agr(r, x); return; } + if (distinctOpnds) { z_agrk(r, x, b); return; } + z_lgr(r, b); + z_agr(r, x); + } else { + if (x == b) { z_sllg(r, x, 1); } + else if (r == x) { z_agr(r, b); } + else if (r == b) { z_agr(r, x); } + else if (distinctOpnds) { z_agrk(r, x, b); } + else { + z_lgr(r, b); + z_agr(r, x); + } + add2reg(r, d); + } + } else { + // Can we encode imm in 12 bits unsigned? + if (Displacement::is_shortDisp(d)) { + z_la(r, d, x, b); + return; + } + // Can we encode imm in 20 bits signed? + if (Displacement::is_validDisp(d)) { + z_lay(r, d, x, b); + return; + } + z_la(r, 0, x, b); + add2reg(r, d); + } +} + +// Generic emitter (32bit) for direct memory increment. +// For optimal code, do not specify Z_R0 as temp register. +void MacroAssembler::add2mem_32(const Address &a, int64_t imm, Register tmp) { + if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) { + z_asi(a, imm); + } else { + z_lgf(tmp, a); + add2reg(tmp, imm); + z_st(tmp, a); + } +} + +void MacroAssembler::add2mem_64(const Address &a, int64_t imm, Register tmp) { + if (VM_Version::has_MemWithImmALUOps() && Immediate::is_simm8(imm)) { + z_agsi(a, imm); + } else { + z_lg(tmp, a); + add2reg(tmp, imm); + z_stg(tmp, a); + } +} + +void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { + switch (size_in_bytes) { + case 8: z_lg(dst, src); break; + case 4: is_signed ? z_lgf(dst, src) : z_llgf(dst, src); break; + case 2: is_signed ? z_lgh(dst, src) : z_llgh(dst, src); break; + case 1: is_signed ? z_lgb(dst, src) : z_llgc(dst, src); break; + default: ShouldNotReachHere(); + } +} + +void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { + switch (size_in_bytes) { + case 8: z_stg(src, dst); break; + case 4: z_st(src, dst); break; + case 2: z_sth(src, dst); break; + case 1: z_stc(src, dst); break; + default: ShouldNotReachHere(); + } +} + +// Split a si20 offset (20bit, signed) into an ui12 offset (12bit, unsigned) and +// a high-order summand in register tmp. +// +// return value: < 0: No split required, si20 actually has property uimm12. +// >= 0: Split performed. Use return value as uimm12 displacement and +// tmp as index register. +int MacroAssembler::split_largeoffset(int64_t si20_offset, Register tmp, bool fixed_codelen, bool accumulate) { + assert(Immediate::is_simm20(si20_offset), "sanity"); + int lg_off = (int)si20_offset & 0x0fff; // Punch out low-order 12 bits, always positive. + int ll_off = (int)si20_offset & ~0x0fff; // Force low-order 12 bits to zero. + assert((Displacement::is_shortDisp(si20_offset) && (ll_off == 0)) || + !Displacement::is_shortDisp(si20_offset), "unexpected offset values"); + assert((lg_off+ll_off) == si20_offset, "offset splitup error"); + + Register work = accumulate? Z_R0 : tmp; + + if (fixed_codelen) { // Len of code = 10 = 4 + 6. + z_lghi(work, ll_off>>12); // Implicit sign extension. + z_slag(work, work, 12); + } else { // Len of code = 0..10. + if (ll_off == 0) { return -1; } + // ll_off has 8 significant bits (at most) plus sign. + if ((ll_off & 0x0000f000) == 0) { // Non-zero bits only in upper halfbyte. + z_llilh(work, ll_off >> 16); + if (ll_off < 0) { // Sign-extension required. + z_lgfr(work, work); + } + } else { + if ((ll_off & 0x000f0000) == 0) { // Non-zero bits only in lower halfbyte. + z_llill(work, ll_off); + } else { // Non-zero bits in both halfbytes. + z_lghi(work, ll_off>>12); // Implicit sign extension. + z_slag(work, work, 12); + } + } + } + if (accumulate) { z_algr(tmp, work); } // len of code += 4 + return lg_off; +} + +void MacroAssembler::load_float_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) { + if (Displacement::is_validDisp(si20)) { + z_ley(t, si20, a); + } else { + // Fixed_codelen = true is a simple way to ensure that the size of load_float_largeoffset + // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant + // pool loads). + bool accumulate = true; + bool fixed_codelen = true; + Register work; + + if (fixed_codelen) { + z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen. + } else { + accumulate = (a == tmp); + } + work = tmp; + + int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate); + if (disp12 < 0) { + z_le(t, si20, work); + } else { + if (accumulate) { + z_le(t, disp12, work); + } else { + z_le(t, disp12, work, a); + } + } + } +} + +void MacroAssembler::load_double_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp) { + if (Displacement::is_validDisp(si20)) { + z_ldy(t, si20, a); + } else { + // Fixed_codelen = true is a simple way to ensure that the size of load_double_largeoffset + // does not depend on si20 (scratch buffer emit size == code buffer emit size for constant + // pool loads). + bool accumulate = true; + bool fixed_codelen = true; + Register work; + + if (fixed_codelen) { + z_lgr(tmp, a); // Lgr_if_needed not applicable due to fixed_codelen. + } else { + accumulate = (a == tmp); + } + work = tmp; + + int disp12 = split_largeoffset(si20, work, fixed_codelen, accumulate); + if (disp12 < 0) { + z_ld(t, si20, work); + } else { + if (accumulate) { + z_ld(t, disp12, work); + } else { + z_ld(t, disp12, work, a); + } + } + } +} + +// PCrelative TOC access. +// Returns distance (in bytes) from current position to start of consts section. +// Returns 0 (zero) if no consts section exists or if it has size zero. +long MacroAssembler::toc_distance() { + CodeSection* cs = code()->consts(); + return (long)((cs != NULL) ? cs->start()-pc() : 0); +} + +// Implementation on x86/sparc assumes that constant and instruction section are +// adjacent, but this doesn't hold. Two special situations may occur, that we must +// be able to handle: +// 1. const section may be located apart from the inst section. +// 2. const section may be empty +// In both cases, we use the const section's start address to compute the "TOC", +// this seems to occur only temporarily; in the final step we always seem to end up +// with the pc-relatice variant. +// +// PC-relative offset could be +/-2**32 -> use long for disp +// Furthermore: makes no sense to have special code for +// adjacent const and inst sections. +void MacroAssembler::load_toc(Register Rtoc) { + // Simply use distance from start of const section (should be patched in the end). + long disp = toc_distance(); + + RelocationHolder rspec = internal_word_Relocation::spec(pc() + disp); + relocate(rspec); + z_larl(Rtoc, RelAddr::pcrel_off32(disp)); // Offset is in halfwords. +} + +// PCrelative TOC access. +// Load from anywhere pcrelative (with relocation of load instr) +void MacroAssembler::load_long_pcrelative(Register Rdst, address dataLocation) { + address pc = this->pc(); + ptrdiff_t total_distance = dataLocation - pc; + RelocationHolder rspec = internal_word_Relocation::spec(dataLocation); + + assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory"); + assert(total_distance != 0, "sanity"); + + // Some extra safety net. + if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) { + guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "too far away"); + } + + (this)->relocate(rspec, relocInfo::pcrel_addr_format); + z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance)); +} + + +// PCrelative TOC access. +// Load from anywhere pcrelative (with relocation of load instr) +// loaded addr has to be relocated when added to constant pool. +void MacroAssembler::load_addr_pcrelative(Register Rdst, address addrLocation) { + address pc = this->pc(); + ptrdiff_t total_distance = addrLocation - pc; + RelocationHolder rspec = internal_word_Relocation::spec(addrLocation); + + assert((total_distance & 0x01L) == 0, "halfword alignment is mandatory"); + + // Some extra safety net. + if (!RelAddr::is_in_range_of_RelAddr32(total_distance)) { + guarantee(RelAddr::is_in_range_of_RelAddr32(total_distance), "too far away"); + } + + (this)->relocate(rspec, relocInfo::pcrel_addr_format); + z_lgrl(Rdst, RelAddr::pcrel_off32(total_distance)); +} + +// Generic operation: load a value from memory and test. +// CondCode indicates the sign (<0, ==0, >0) of the loaded value. +void MacroAssembler::load_and_test_byte(Register dst, const Address &a) { + z_lb(dst, a); + z_ltr(dst, dst); +} + +void MacroAssembler::load_and_test_short(Register dst, const Address &a) { + int64_t disp = a.disp20(); + if (Displacement::is_shortDisp(disp)) { + z_lh(dst, a); + } else if (Displacement::is_longDisp(disp)) { + z_lhy(dst, a); + } else { + guarantee(false, "displacement out of range"); + } + z_ltr(dst, dst); +} + +void MacroAssembler::load_and_test_int(Register dst, const Address &a) { + z_lt(dst, a); +} + +void MacroAssembler::load_and_test_int2long(Register dst, const Address &a) { + z_ltgf(dst, a); +} + +void MacroAssembler::load_and_test_long(Register dst, const Address &a) { + z_ltg(dst, a); +} + +// Test a bit in memory. +void MacroAssembler::testbit(const Address &a, unsigned int bit) { + assert(a.index() == noreg, "no index reg allowed in testbit"); + if (bit <= 7) { + z_tm(a.disp() + 3, a.base(), 1 << bit); + } else if (bit <= 15) { + z_tm(a.disp() + 2, a.base(), 1 << (bit - 8)); + } else if (bit <= 23) { + z_tm(a.disp() + 1, a.base(), 1 << (bit - 16)); + } else if (bit <= 31) { + z_tm(a.disp() + 0, a.base(), 1 << (bit - 24)); + } else { + ShouldNotReachHere(); + } +} + +// Test a bit in a register. Result is reflected in CC. +void MacroAssembler::testbit(Register r, unsigned int bitPos) { + if (bitPos < 16) { + z_tmll(r, 1U< no value-dependent optimizations. +// Do not kill condition code. +void MacroAssembler::load_const(Register t, long x) { + Assembler::z_iihf(t, (int)(x >> 32)); + Assembler::z_iilf(t, (int)(x & 0xffffffff)); +} + +// Load a 32bit constant into a 64bit register, sign-extend or zero-extend. +// Patchable code sequence, but not atomically patchable. +// Make sure to keep code size constant -> no value-dependent optimizations. +// Do not kill condition code. +void MacroAssembler::load_const_32to64(Register t, int64_t x, bool sign_extend) { + if (sign_extend) { Assembler::z_lgfi(t, x); } + else { Assembler::z_llilf(t, x); } +} + +// Load narrow oop constant, no decompression. +void MacroAssembler::load_narrow_oop(Register t, narrowOop a) { + assert(UseCompressedOops, "must be on to call this method"); + load_const_32to64(t, a, false /*sign_extend*/); +} + +// Load narrow klass constant, compression required. +void MacroAssembler::load_narrow_klass(Register t, Klass* k) { + assert(UseCompressedClassPointers, "must be on to call this method"); + narrowKlass encoded_k = Klass::encode_klass(k); + load_const_32to64(t, encoded_k, false /*sign_extend*/); +} + +//------------------------------------------------------ +// Compare (patchable) constant with register. +//------------------------------------------------------ + +// Compare narrow oop in reg with narrow oop constant, no decompression. +void MacroAssembler::compare_immediate_narrow_oop(Register oop1, narrowOop oop2) { + assert(UseCompressedOops, "must be on to call this method"); + + Assembler::z_clfi(oop1, oop2); +} + +// Compare narrow oop in reg with narrow oop constant, no decompression. +void MacroAssembler::compare_immediate_narrow_klass(Register klass1, Klass* klass2) { + assert(UseCompressedClassPointers, "must be on to call this method"); + narrowKlass encoded_k = Klass::encode_klass(klass2); + + Assembler::z_clfi(klass1, encoded_k); +} + +//---------------------------------------------------------- +// Check which kind of load_constant we have here. +//---------------------------------------------------------- + +// Detection of CPU version dependent load_const sequence. +// The detection is valid only for code sequences generated by load_const, +// not load_const_optimized. +bool MacroAssembler::is_load_const(address a) { + unsigned long inst1, inst2; + unsigned int len1, len2; + + len1 = get_instruction(a, &inst1); + len2 = get_instruction(a + len1, &inst2); + + return is_z_iihf(inst1) && is_z_iilf(inst2); +} + +// Detection of CPU version dependent load_const_32to64 sequence. +// Mostly used for narrow oops and narrow Klass pointers. +// The detection is valid only for code sequences generated by load_const_32to64. +bool MacroAssembler::is_load_const_32to64(address pos) { + unsigned long inst1, inst2; + unsigned int len1; + + len1 = get_instruction(pos, &inst1); + return is_z_llilf(inst1); +} + +// Detection of compare_immediate_narrow sequence. +// The detection is valid only for code sequences generated by compare_immediate_narrow_oop. +bool MacroAssembler::is_compare_immediate32(address pos) { + return is_equal(pos, CLFI_ZOPC, RIL_MASK); +} + +// Detection of compare_immediate_narrow sequence. +// The detection is valid only for code sequences generated by compare_immediate_narrow_oop. +bool MacroAssembler::is_compare_immediate_narrow_oop(address pos) { + return is_compare_immediate32(pos); + } + +// Detection of compare_immediate_narrow sequence. +// The detection is valid only for code sequences generated by compare_immediate_narrow_klass. +bool MacroAssembler::is_compare_immediate_narrow_klass(address pos) { + return is_compare_immediate32(pos); +} + +//----------------------------------- +// patch the load_constant +//----------------------------------- + +// CPU-version dependend patching of load_const. +void MacroAssembler::patch_const(address a, long x) { + assert(is_load_const(a), "not a load of a constant"); + set_imm32((address)a, (int) ((x >> 32) & 0xffffffff)); + set_imm32((address)(a + 6), (int)(x & 0xffffffff)); +} + +// Patching the value of CPU version dependent load_const_32to64 sequence. +// The passed ptr MUST be in compressed format! +int MacroAssembler::patch_load_const_32to64(address pos, int64_t np) { + assert(is_load_const_32to64(pos), "not a load of a narrow ptr (oop or klass)"); + + set_imm32(pos, np); + return 6; +} + +// Patching the value of CPU version dependent compare_immediate_narrow sequence. +// The passed ptr MUST be in compressed format! +int MacroAssembler::patch_compare_immediate_32(address pos, int64_t np) { + assert(is_compare_immediate32(pos), "not a compressed ptr compare"); + + set_imm32(pos, np); + return 6; +} + +// Patching the immediate value of CPU version dependent load_narrow_oop sequence. +// The passed ptr must NOT be in compressed format! +int MacroAssembler::patch_load_narrow_oop(address pos, oop o) { + assert(UseCompressedOops, "Can only patch compressed oops"); + + narrowOop no = oopDesc::encode_heap_oop(o); + return patch_load_const_32to64(pos, no); +} + +// Patching the immediate value of CPU version dependent load_narrow_klass sequence. +// The passed ptr must NOT be in compressed format! +int MacroAssembler::patch_load_narrow_klass(address pos, Klass* k) { + assert(UseCompressedClassPointers, "Can only patch compressed klass pointers"); + + narrowKlass nk = Klass::encode_klass(k); + return patch_load_const_32to64(pos, nk); +} + +// Patching the immediate value of CPU version dependent compare_immediate_narrow_oop sequence. +// The passed ptr must NOT be in compressed format! +int MacroAssembler::patch_compare_immediate_narrow_oop(address pos, oop o) { + assert(UseCompressedOops, "Can only patch compressed oops"); + + narrowOop no = oopDesc::encode_heap_oop(o); + return patch_compare_immediate_32(pos, no); +} + +// Patching the immediate value of CPU version dependent compare_immediate_narrow_klass sequence. +// The passed ptr must NOT be in compressed format! +int MacroAssembler::patch_compare_immediate_narrow_klass(address pos, Klass* k) { + assert(UseCompressedClassPointers, "Can only patch compressed klass pointers"); + + narrowKlass nk = Klass::encode_klass(k); + return patch_compare_immediate_32(pos, nk); +} + +//------------------------------------------------------------------------ +// Extract the constant from a load_constant instruction stream. +//------------------------------------------------------------------------ + +// Get constant from a load_const sequence. +long MacroAssembler::get_const(address a) { + assert(is_load_const(a), "not a load of a constant"); + unsigned long x; + x = (((unsigned long) (get_imm32(a,0) & 0xffffffff)) << 32); + x |= (((unsigned long) (get_imm32(a,1) & 0xffffffff))); + return (long) x; +} + +//-------------------------------------- +// Store a constant in memory. +//-------------------------------------- + +// General emitter to move a constant to memory. +// The store is atomic. +// o Address must be given in RS format (no index register) +// o Displacement should be 12bit unsigned for efficiency. 20bit signed also supported. +// o Constant can be 1, 2, 4, or 8 bytes, signed or unsigned. +// o Memory slot can be 1, 2, 4, or 8 bytes, signed or unsigned. +// o Memory slot must be at least as wide as constant, will assert otherwise. +// o Signed constants will sign-extend, unsigned constants will zero-extend to slot width. +int MacroAssembler::store_const(const Address &dest, long imm, + unsigned int lm, unsigned int lc, + Register scratch) { + int64_t disp = dest.disp(); + Register base = dest.base(); + assert(!dest.has_index(), "not supported"); + assert((lm==1)||(lm==2)||(lm==4)||(lm==8), "memory length not supported"); + assert((lc==1)||(lc==2)||(lc==4)||(lc==8), "constant length not supported"); + assert(lm>=lc, "memory slot too small"); + assert(lc==8 || Immediate::is_simm(imm, lc*8), "const out of range"); + assert(Displacement::is_validDisp(disp), "displacement out of range"); + + bool is_shortDisp = Displacement::is_shortDisp(disp); + int store_offset = -1; + + // For target len == 1 it's easy. + if (lm == 1) { + store_offset = offset(); + if (is_shortDisp) { + z_mvi(disp, base, imm); + return store_offset; + } else { + z_mviy(disp, base, imm); + return store_offset; + } + } + + // All the "good stuff" takes an unsigned displacement. + if (is_shortDisp) { + // NOTE: Cannot use clear_mem for imm==0, because it is not atomic. + + store_offset = offset(); + switch (lm) { + case 2: // Lc == 1 handled correctly here, even for unsigned. Instruction does no widening. + z_mvhhi(disp, base, imm); + return store_offset; + case 4: + if (Immediate::is_simm16(imm)) { + z_mvhi(disp, base, imm); + return store_offset; + } + break; + case 8: + if (Immediate::is_simm16(imm)) { + z_mvghi(disp, base, imm); + return store_offset; + } + break; + default: + ShouldNotReachHere(); + break; + } + } + + // Can't optimize, so load value and store it. + guarantee(scratch != noreg, " need a scratch register here !"); + if (imm != 0) { + load_const_optimized(scratch, imm); // Preserves CC anyway. + } else { + // Leave CC alone!! + (void) clear_reg(scratch, true, false); // Indicate unused result. + } + + store_offset = offset(); + if (is_shortDisp) { + switch (lm) { + case 2: + z_sth(scratch, disp, Z_R0, base); + return store_offset; + case 4: + z_st(scratch, disp, Z_R0, base); + return store_offset; + case 8: + z_stg(scratch, disp, Z_R0, base); + return store_offset; + default: + ShouldNotReachHere(); + break; + } + } else { + switch (lm) { + case 2: + z_sthy(scratch, disp, Z_R0, base); + return store_offset; + case 4: + z_sty(scratch, disp, Z_R0, base); + return store_offset; + case 8: + z_stg(scratch, disp, Z_R0, base); + return store_offset; + default: + ShouldNotReachHere(); + break; + } + } + return -1; // should not reach here +} + +//=================================================================== +//=== N O T P A T CH A B L E C O N S T A N T S === +//=================================================================== + +// Load constant x into register t with a fast instrcution sequence +// depending on the bits in x. Preserves CC under all circumstances. +int MacroAssembler::load_const_optimized_rtn_len(Register t, long x, bool emit) { + if (x == 0) { + int len; + if (emit) { + len = clear_reg(t, true, false); + } else { + len = 4; + } + return len; + } + + if (Immediate::is_simm16(x)) { + if (emit) { z_lghi(t, x); } + return 4; + } + + // 64 bit value: | part1 | part2 | part3 | part4 | + // At least one part is not zero! + int part1 = ((x >> 32) & 0xffff0000) >> 16; + int part2 = (x >> 32) & 0x0000ffff; + int part3 = (x & 0xffff0000) >> 16; + int part4 = (x & 0x0000ffff); + + // Lower word only (unsigned). + if ((part1 == 0) && (part2 == 0)) { + if (part3 == 0) { + if (emit) z_llill(t, part4); + return 4; + } + if (part4 == 0) { + if (emit) z_llilh(t, part3); + return 4; + } + if (emit) z_llilf(t, (int)(x & 0xffffffff)); + return 6; + } + + // Upper word only. + if ((part3 == 0) && (part4 == 0)) { + if (part1 == 0) { + if (emit) z_llihl(t, part2); + return 4; + } + if (part2 == 0) { + if (emit) z_llihh(t, part1); + return 4; + } + if (emit) z_llihf(t, (int)(x >> 32)); + return 6; + } + + // Lower word only (signed). + if ((part1 == 0x0000ffff) && (part2 == 0x0000ffff) && ((part3 & 0x00008000) != 0)) { + if (emit) z_lgfi(t, (int)(x & 0xffffffff)); + return 6; + } + + int len = 0; + + if ((part1 == 0) || (part2 == 0)) { + if (part1 == 0) { + if (emit) z_llihl(t, part2); + len += 4; + } else { + if (emit) z_llihh(t, part1); + len += 4; + } + } else { + if (emit) z_llihf(t, (int)(x >> 32)); + len += 6; + } + + if ((part3 == 0) || (part4 == 0)) { + if (part3 == 0) { + if (emit) z_iill(t, part4); + len += 4; + } else { + if (emit) z_iilh(t, part3); + len += 4; + } + } else { + if (emit) z_iilf(t, (int)(x & 0xffffffff)); + len += 6; + } + return len; +} + +//===================================================================== +//=== H I G H E R L E V E L B R A N C H E M I T T E R S === +//===================================================================== + +// Note: In the worst case, one of the scratch registers is destroyed!!! +void MacroAssembler::compare32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { + // Right operand is constant. + if (x2.is_constant()) { + jlong value = x2.as_constant(); + compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/true); + return; + } + + // Right operand is in register. + compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/true); +} + +// Note: In the worst case, one of the scratch registers is destroyed!!! +void MacroAssembler::compareU32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { + // Right operand is constant. + if (x2.is_constant()) { + jlong value = x2.as_constant(); + compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/false, /*has_sign=*/false); + return; + } + + // Right operand is in register. + compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/false, /*has_sign=*/false); +} + +// Note: In the worst case, one of the scratch registers is destroyed!!! +void MacroAssembler::compare64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { + // Right operand is constant. + if (x2.is_constant()) { + jlong value = x2.as_constant(); + compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/true); + return; + } + + // Right operand is in register. + compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/true); +} + +void MacroAssembler::compareU64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& lbl) { + // Right operand is constant. + if (x2.is_constant()) { + jlong value = x2.as_constant(); + compare_and_branch_optimized(r1, value, cond, lbl, /*len64=*/true, /*has_sign=*/false); + return; + } + + // Right operand is in register. + compare_and_branch_optimized(r1, x2.as_register(), cond, lbl, /*len64=*/true, /*has_sign=*/false); +} + +// Generate an optimal branch to the branch target. +// Optimal means that a relative branch (brc or brcl) is used if the +// branch distance is short enough. Loading the target address into a +// register and branching via reg is used as fallback only. +// +// Used registers: +// Z_R1 - work reg. Holds branch target address. +// Used in fallback case only. +// +// This version of branch_optimized is good for cases where the target address is known +// and constant, i.e. is never changed (no relocation, no patching). +void MacroAssembler::branch_optimized(Assembler::branch_condition cond, address branch_addr) { + address branch_origin = pc(); + + if (RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) { + z_brc(cond, branch_addr); + } else if (RelAddr::is_in_range_of_RelAddr32(branch_addr, branch_origin)) { + z_brcl(cond, branch_addr); + } else { + load_const_optimized(Z_R1, branch_addr); // CC must not get killed by load_const_optimized. + z_bcr(cond, Z_R1); + } +} + +// This version of branch_optimized is good for cases where the target address +// is potentially not yet known at the time the code is emitted. +// +// One very common case is a branch to an unbound label which is handled here. +// The caller might know (or hope) that the branch distance is short enough +// to be encoded in a 16bit relative address. In this case he will pass a +// NearLabel branch_target. +// Care must be taken with unbound labels. Each call to target(label) creates +// an entry in the patch queue for that label to patch all references of the label +// once it gets bound. Those recorded patch locations must be patchable. Otherwise, +// an assertion fires at patch time. +void MacroAssembler::branch_optimized(Assembler::branch_condition cond, Label& branch_target) { + if (branch_target.is_bound()) { + address branch_addr = target(branch_target); + branch_optimized(cond, branch_addr); + } else { + z_brcl(cond, branch_target); // Let's hope target is in range. Otherwise, we will abort at patch time. + } +} + +// Generate an optimal compare and branch to the branch target. +// Optimal means that a relative branch (clgrj, brc or brcl) is used if the +// branch distance is short enough. Loading the target address into a +// register and branching via reg is used as fallback only. +// +// Input: +// r1 - left compare operand +// r2 - right compare operand +void MacroAssembler::compare_and_branch_optimized(Register r1, + Register r2, + Assembler::branch_condition cond, + address branch_addr, + bool len64, + bool has_sign) { + unsigned int casenum = (len64?2:0)+(has_sign?0:1); + + address branch_origin = pc(); + if (VM_Version::has_CompareBranch() && RelAddr::is_in_range_of_RelAddr16(branch_addr, branch_origin)) { + switch (casenum) { + case 0: z_crj( r1, r2, cond, branch_addr); break; + case 1: z_clrj (r1, r2, cond, branch_addr); break; + case 2: z_cgrj(r1, r2, cond, branch_addr); break; + case 3: z_clgrj(r1, r2, cond, branch_addr); break; + default: ShouldNotReachHere(); break; + } + } else { + switch (casenum) { + case 0: z_cr( r1, r2); break; + case 1: z_clr(r1, r2); break; + case 2: z_cgr(r1, r2); break; + case 3: z_clgr(r1, r2); break; + default: ShouldNotReachHere(); break; + } + branch_optimized(cond, branch_addr); + } +} + +// Generate an optimal compare and branch to the branch target. +// Optimal means that a relative branch (clgij, brc or brcl) is used if the +// branch distance is short enough. Loading the target address into a +// register and branching via reg is used as fallback only. +// +// Input: +// r1 - left compare operand (in register) +// x2 - right compare operand (immediate) +void MacroAssembler::compare_and_branch_optimized(Register r1, + jlong x2, + Assembler::branch_condition cond, + Label& branch_target, + bool len64, + bool has_sign) { + address branch_origin = pc(); + bool x2_imm8 = (has_sign && Immediate::is_simm8(x2)) || (!has_sign && Immediate::is_uimm8(x2)); + bool is_RelAddr16 = (branch_target.is_bound() && + RelAddr::is_in_range_of_RelAddr16(target(branch_target), branch_origin)); + unsigned int casenum = (len64?2:0)+(has_sign?0:1); + + if (VM_Version::has_CompareBranch() && is_RelAddr16 && x2_imm8) { + switch (casenum) { + case 0: z_cij( r1, x2, cond, branch_target); break; + case 1: z_clij(r1, x2, cond, branch_target); break; + case 2: z_cgij(r1, x2, cond, branch_target); break; + case 3: z_clgij(r1, x2, cond, branch_target); break; + default: ShouldNotReachHere(); break; + } + return; + } + + if (x2 == 0) { + switch (casenum) { + case 0: z_ltr(r1, r1); break; + case 1: z_ltr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication! + case 2: z_ltgr(r1, r1); break; + case 3: z_ltgr(r1, r1); break; // Caution: unsigned test only provides zero/notZero indication! + default: ShouldNotReachHere(); break; + } + } else { + if ((has_sign && Immediate::is_simm16(x2)) || (!has_sign && Immediate::is_uimm(x2, 15))) { + switch (casenum) { + case 0: z_chi(r1, x2); break; + case 1: z_chi(r1, x2); break; // positive immediate < 2**15 + case 2: z_cghi(r1, x2); break; + case 3: z_cghi(r1, x2); break; // positive immediate < 2**15 + default: break; + } + } else if ( (has_sign && Immediate::is_simm32(x2)) || (!has_sign && Immediate::is_uimm32(x2)) ) { + switch (casenum) { + case 0: z_cfi( r1, x2); break; + case 1: z_clfi(r1, x2); break; + case 2: z_cgfi(r1, x2); break; + case 3: z_clgfi(r1, x2); break; + default: ShouldNotReachHere(); break; + } + } else { + // No instruction with immediate operand possible, so load into register. + Register scratch = (r1 != Z_R0) ? Z_R0 : Z_R1; + load_const_optimized(scratch, x2); + switch (casenum) { + case 0: z_cr( r1, scratch); break; + case 1: z_clr(r1, scratch); break; + case 2: z_cgr(r1, scratch); break; + case 3: z_clgr(r1, scratch); break; + default: ShouldNotReachHere(); break; + } + } + } + branch_optimized(cond, branch_target); +} + +// Generate an optimal compare and branch to the branch target. +// Optimal means that a relative branch (clgrj, brc or brcl) is used if the +// branch distance is short enough. Loading the target address into a +// register and branching via reg is used as fallback only. +// +// Input: +// r1 - left compare operand +// r2 - right compare operand +void MacroAssembler::compare_and_branch_optimized(Register r1, + Register r2, + Assembler::branch_condition cond, + Label& branch_target, + bool len64, + bool has_sign) { + unsigned int casenum = (len64?2:0)+(has_sign?0:1); + + if (branch_target.is_bound()) { + address branch_addr = target(branch_target); + compare_and_branch_optimized(r1, r2, cond, branch_addr, len64, has_sign); + } else { + { + switch (casenum) { + case 0: z_cr( r1, r2); break; + case 1: z_clr(r1, r2); break; + case 2: z_cgr(r1, r2); break; + case 3: z_clgr(r1, r2); break; + default: ShouldNotReachHere(); break; + } + branch_optimized(cond, branch_target); + } + } +} + +//=========================================================================== +//=== END H I G H E R L E V E L B R A N C H E M I T T E R S === +//=========================================================================== + +AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) { + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int index = oop_recorder()->allocate_metadata_index(obj); + RelocationHolder rspec = metadata_Relocation::spec(index); + return AddressLiteral((address)obj, rspec); +} + +AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) { + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int index = oop_recorder()->find_index(obj); + RelocationHolder rspec = metadata_Relocation::spec(index); + return AddressLiteral((address)obj, rspec); +} + +AddressLiteral MacroAssembler::allocate_oop_address(jobject obj) { + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int oop_index = oop_recorder()->allocate_oop_index(obj); + return AddressLiteral(address(obj), oop_Relocation::spec(oop_index)); +} + +AddressLiteral MacroAssembler::constant_oop_address(jobject obj) { + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int oop_index = oop_recorder()->find_index(obj); + return AddressLiteral(address(obj), oop_Relocation::spec(oop_index)); +} + +// NOTE: destroys r +void MacroAssembler::c2bool(Register r, Register t) { + z_lcr(t, r); // t = -r + z_or(r, t); // r = -r OR r + z_srl(r, 31); // Yields 0 if r was 0, 1 otherwise. +} + +RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset) { + intptr_t value = *delayed_value_addr; + if (value != 0) { + return RegisterOrConstant(value + offset); + } + + BLOCK_COMMENT("delayed_value {"); + // Load indirectly to solve generation ordering problem. + load_absolute_address(tmp, (address) delayed_value_addr); // tmp = a; + z_lg(tmp, 0, tmp); // tmp = *tmp; + +#ifdef ASSERT + NearLabel L; + compare64_and_branch(tmp, (intptr_t)0L, Assembler::bcondNotEqual, L); + z_illtrap(); + bind(L); +#endif + + if (offset != 0) { + z_agfi(tmp, offset); // tmp = tmp + offset; + } + + BLOCK_COMMENT("} delayed_value"); + return RegisterOrConstant(tmp); +} + +// Patch instruction `inst' at offset `inst_pos' to refer to `dest_pos' +// and return the resulting instruction. +// Dest_pos and inst_pos are 32 bit only. These parms can only designate +// relative positions. +// Use correct argument types. Do not pre-calculate distance. +unsigned long MacroAssembler::patched_branch(address dest_pos, unsigned long inst, address inst_pos) { + int c = 0; + unsigned long patched_inst = 0; + if (is_call_pcrelative_short(inst) || + is_branch_pcrelative_short(inst) || + is_branchoncount_pcrelative_short(inst) || + is_branchonindex32_pcrelative_short(inst)) { + c = 1; + int m = fmask(15, 0); // simm16(-1, 16, 32); + int v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 32); + patched_inst = (inst & ~m) | v; + } else if (is_compareandbranch_pcrelative_short(inst)) { + c = 2; + long m = fmask(31, 16); // simm16(-1, 16, 48); + long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48); + patched_inst = (inst & ~m) | v; + } else if (is_branchonindex64_pcrelative_short(inst)) { + c = 3; + long m = fmask(31, 16); // simm16(-1, 16, 48); + long v = simm16(RelAddr::pcrel_off16(dest_pos, inst_pos), 16, 48); + patched_inst = (inst & ~m) | v; + } else if (is_call_pcrelative_long(inst) || is_branch_pcrelative_long(inst)) { + c = 4; + long m = fmask(31, 0); // simm32(-1, 16, 48); + long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48); + patched_inst = (inst & ~m) | v; + } else if (is_pcrelative_long(inst)) { // These are the non-branch pc-relative instructions. + c = 5; + long m = fmask(31, 0); // simm32(-1, 16, 48); + long v = simm32(RelAddr::pcrel_off32(dest_pos, inst_pos), 16, 48); + patched_inst = (inst & ~m) | v; + } else { + print_dbg_msg(tty, inst, "not a relative branch", 0); + dump_code_range(tty, inst_pos, 32, "not a pcrelative branch"); + ShouldNotReachHere(); + } + + long new_off = get_pcrel_offset(patched_inst); + if (new_off != (dest_pos-inst_pos)) { + tty->print_cr("case %d: dest_pos = %p, inst_pos = %p, disp = %ld(%12.12lx)", c, dest_pos, inst_pos, new_off, new_off); + print_dbg_msg(tty, inst, "<- original instruction: branch patching error", 0); + print_dbg_msg(tty, patched_inst, "<- patched instruction: branch patching error", 0); +#ifdef LUCY_DBG + VM_Version::z_SIGSEGV(); +#endif + ShouldNotReachHere(); + } + return patched_inst; +} + +// Only called when binding labels (share/vm/asm/assembler.cpp) +// Pass arguments as intended. Do not pre-calculate distance. +void MacroAssembler::pd_patch_instruction(address branch, address target) { + unsigned long stub_inst; + int inst_len = get_instruction(branch, &stub_inst); + + set_instruction(branch, patched_branch(target, stub_inst, branch), inst_len); +} + + +// Extract relative address (aka offset). +// inv_simm16 works for 4-byte instructions only. +// compare and branch instructions are 6-byte and have a 16bit offset "in the middle". +long MacroAssembler::get_pcrel_offset(unsigned long inst) { + + if (MacroAssembler::is_pcrelative_short(inst)) { + if (((inst&0xFFFFffff00000000UL) == 0) && ((inst&0x00000000FFFF0000UL) != 0)) { + return RelAddr::inv_pcrel_off16(inv_simm16(inst)); + } else { + return RelAddr::inv_pcrel_off16(inv_simm16_48(inst)); + } + } + + if (MacroAssembler::is_pcrelative_long(inst)) { + return RelAddr::inv_pcrel_off32(inv_simm32(inst)); + } + + print_dbg_msg(tty, inst, "not a pcrelative instruction", 6); +#ifdef LUCY_DBG + VM_Version::z_SIGSEGV(); +#else + ShouldNotReachHere(); +#endif + return -1; +} + +long MacroAssembler::get_pcrel_offset(address pc) { + unsigned long inst; + unsigned int len = get_instruction(pc, &inst); + +#ifdef ASSERT + long offset; + if (MacroAssembler::is_pcrelative_short(inst) || MacroAssembler::is_pcrelative_long(inst)) { + offset = get_pcrel_offset(inst); + } else { + offset = -1; + } + + if (offset == -1) { + dump_code_range(tty, pc, 32, "not a pcrelative instruction"); +#ifdef LUCY_DBG + VM_Version::z_SIGSEGV(); +#else + ShouldNotReachHere(); +#endif + } + return offset; +#else + return get_pcrel_offset(inst); +#endif // ASSERT +} + +// Get target address from pc-relative instructions. +address MacroAssembler::get_target_addr_pcrel(address pc) { + assert(is_pcrelative_long(pc), "not a pcrelative instruction"); + return pc + get_pcrel_offset(pc); +} + +// Patch pc relative load address. +void MacroAssembler::patch_target_addr_pcrel(address pc, address con) { + unsigned long inst; + // Offset is +/- 2**32 -> use long. + ptrdiff_t distance = con - pc; + + get_instruction(pc, &inst); + + if (is_pcrelative_short(inst)) { + *(short *)(pc+2) = RelAddr::pcrel_off16(con, pc); // Instructions are at least 2-byte aligned, no test required. + + // Some extra safety net. + if (!RelAddr::is_in_range_of_RelAddr16(distance)) { + print_dbg_msg(tty, inst, "distance out of range (16bit)", 4); + dump_code_range(tty, pc, 32, "distance out of range (16bit)"); + guarantee(RelAddr::is_in_range_of_RelAddr16(distance), "too far away (more than +/- 2**16"); + } + return; + } + + if (is_pcrelative_long(inst)) { + *(int *)(pc+2) = RelAddr::pcrel_off32(con, pc); + + // Some Extra safety net. + if (!RelAddr::is_in_range_of_RelAddr32(distance)) { + print_dbg_msg(tty, inst, "distance out of range (32bit)", 6); + dump_code_range(tty, pc, 32, "distance out of range (32bit)"); + guarantee(RelAddr::is_in_range_of_RelAddr32(distance), "too far away (more than +/- 2**32"); + } + return; + } + + guarantee(false, "not a pcrelative instruction to patch!"); +} + +// "Current PC" here means the address just behind the basr instruction. +address MacroAssembler::get_PC(Register result) { + z_basr(result, Z_R0); // Don't branch, just save next instruction address in result. + return pc(); +} + +// Get current PC + offset. +// Offset given in bytes, must be even! +// "Current PC" here means the address of the larl instruction plus the given offset. +address MacroAssembler::get_PC(Register result, int64_t offset) { + address here = pc(); + z_larl(result, offset/2); // Save target instruction address in result. + return here + offset; +} + +// Resize_frame with SP(new) = SP(old) - [offset]. +void MacroAssembler::resize_frame_sub(Register offset, Register fp, bool load_fp) +{ + assert_different_registers(offset, fp, Z_SP); + if (load_fp) { z_lg(fp, _z_abi(callers_sp), Z_SP); } + + z_sgr(Z_SP, offset); + z_stg(fp, _z_abi(callers_sp), Z_SP); +} + +// Resize_frame with SP(new) = [addr]. +void MacroAssembler::resize_frame_absolute(Register addr, Register fp, bool load_fp) { + assert_different_registers(addr, fp, Z_SP); + if (load_fp) { z_lg(fp, _z_abi(callers_sp), Z_SP); } + + if (addr != Z_R0) { + // Minimize stalls by not using Z_SP immediately after update. + z_stg(fp, _z_abi(callers_sp), addr); + z_lgr(Z_SP, addr); + } else { + z_lgr(Z_SP, addr); + z_stg(fp, _z_abi(callers_sp), Z_SP); + } +} + +// Resize_frame with SP(new) = SP(old) + offset. +void MacroAssembler::resize_frame(RegisterOrConstant offset, Register fp, bool load_fp) { + assert_different_registers(fp, Z_SP); + if (load_fp) z_lg(fp, _z_abi(callers_sp), Z_SP); + + if (Displacement::is_validDisp((int)_z_abi(callers_sp) + offset.constant_or_zero())) { + // Minimize stalls by first using, then updating Z_SP. + // Do that only if we have a small positive offset or if ExtImm are available. + z_stg(fp, Address(Z_SP, offset, _z_abi(callers_sp))); + add64(Z_SP, offset); + } else { + add64(Z_SP, offset); + z_stg(fp, _z_abi(callers_sp), Z_SP); + } +} + +void MacroAssembler::push_frame(Register bytes, Register old_sp, bool copy_sp, bool bytes_with_inverted_sign) { +#ifdef ASSERT + assert_different_registers(bytes, old_sp, Z_SP); + if (!copy_sp) { + z_cgr(old_sp, Z_SP); + asm_assert_eq("[old_sp]!=[Z_SP]", 0x211); + } +#endif + if (copy_sp) { z_lgr(old_sp, Z_SP); } + if (bytes_with_inverted_sign) { + z_stg(old_sp, 0, bytes, Z_SP); + add2reg_with_index(Z_SP, 0, bytes, Z_SP); + } else { + z_sgr(Z_SP, bytes); // Z_sgfr sufficient, but probably not faster. + z_stg(old_sp, 0, Z_SP); + } +} + +unsigned int MacroAssembler::push_frame(unsigned int bytes, Register scratch) { + long offset = Assembler::align(bytes, frame::alignment_in_bytes); + + if (Displacement::is_validDisp(-offset)) { + // Minimize stalls by first using, then updating Z_SP. + // Do that only if we have ExtImm available. + z_stg(Z_SP, -offset, Z_SP); + add2reg(Z_SP, -offset); + } else { + if (scratch != Z_R0 && scratch != Z_R1) { + z_stg(Z_SP, -offset, Z_SP); + add2reg(Z_SP, -offset); + } else { // scratch == Z_R0 || scratch == Z_R1 + z_lgr(scratch, Z_SP); + add2reg(Z_SP, -offset); + z_stg(scratch, 0, Z_SP); + } + } + return offset; +} + +// Push a frame of size `bytes' plus abi160 on top. +unsigned int MacroAssembler::push_frame_abi160(unsigned int bytes) { + BLOCK_COMMENT("push_frame_abi160 {"); + unsigned int res = push_frame(bytes + frame::z_abi_160_size); + BLOCK_COMMENT("} push_frame_abi160"); + return res; +} + +// Pop current C frame. +void MacroAssembler::pop_frame() { + BLOCK_COMMENT("pop_frame:"); + Assembler::z_lg(Z_SP, _z_abi(callers_sp), Z_SP); +} + +void MacroAssembler::call_VM_leaf_base(address entry_point, bool allow_relocation) { + if (allow_relocation) { + call_c(entry_point); + } else { + call_c_static(entry_point); + } +} + +void MacroAssembler::call_VM_leaf_base(address entry_point) { + bool allow_relocation = true; + call_VM_leaf_base(entry_point, allow_relocation); +} + +void MacroAssembler::call_VM_base(Register oop_result, + Register last_java_sp, + address entry_point, + bool allow_relocation, + bool check_exceptions) { // Defaults to true. + // Allow_relocation indicates, if true, that the generated code shall + // be fit for code relocation or referenced data relocation. In other + // words: all addresses must be considered variable. PC-relative addressing + // is not possible then. + // On the other hand, if (allow_relocation == false), addresses and offsets + // may be considered stable, enabling us to take advantage of some PC-relative + // addressing tweaks. These might improve performance and reduce code size. + + // Determine last_java_sp register. + if (!last_java_sp->is_valid()) { + last_java_sp = Z_SP; // Load Z_SP as SP. + } + + set_top_ijava_frame_at_SP_as_last_Java_frame(last_java_sp, Z_R1, allow_relocation); + + // ARG1 must hold thread address. + z_lgr(Z_ARG1, Z_thread); + + address return_pc = NULL; + if (allow_relocation) { + return_pc = call_c(entry_point); + } else { + return_pc = call_c_static(entry_point); + } + + reset_last_Java_frame(allow_relocation); + + // C++ interp handles this in the interpreter. + check_and_handle_popframe(Z_thread); + check_and_handle_earlyret(Z_thread); + + // Check for pending exceptions. + if (check_exceptions) { + // Check for pending exceptions (java_thread is set upon return). + load_and_test_long(Z_R0_scratch, Address(Z_thread, Thread::pending_exception_offset())); + + // This used to conditionally jump to forward_exception however it is + // possible if we relocate that the branch will not reach. So we must jump + // around so we can always reach. + + Label ok; + z_bre(ok); // Bcondequal is the same as bcondZero. + call_stub(StubRoutines::forward_exception_entry()); + bind(ok); + } + + // Get oop result if there is one and reset the value in the thread. + if (oop_result->is_valid()) { + get_vm_result(oop_result); + } + + _last_calls_return_pc = return_pc; // Wipe out other (error handling) calls. +} + +void MacroAssembler::call_VM_base(Register oop_result, + Register last_java_sp, + address entry_point, + bool check_exceptions) { // Defaults to true. + bool allow_relocation = true; + call_VM_base(oop_result, last_java_sp, entry_point, allow_relocation, check_exceptions); +} + +// VM calls without explicit last_java_sp. + +void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { + // Call takes possible detour via InterpreterMacroAssembler. + call_VM_base(oop_result, noreg, entry_point, true, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { + // Z_ARG1 is reserved for the thread. + lgr_if_needed(Z_ARG2, arg_1); + call_VM(oop_result, entry_point, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { + // Z_ARG1 is reserved for the thread. + lgr_if_needed(Z_ARG2, arg_1); + assert(arg_2 != Z_ARG2, "smashed argument"); + lgr_if_needed(Z_ARG3, arg_2); + call_VM(oop_result, entry_point, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, + Register arg_3, bool check_exceptions) { + // Z_ARG1 is reserved for the thread. + lgr_if_needed(Z_ARG2, arg_1); + assert(arg_2 != Z_ARG2, "smashed argument"); + lgr_if_needed(Z_ARG3, arg_2); + assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument"); + lgr_if_needed(Z_ARG4, arg_3); + call_VM(oop_result, entry_point, check_exceptions); +} + +// VM static calls without explicit last_java_sp. + +void MacroAssembler::call_VM_static(Register oop_result, address entry_point, bool check_exceptions) { + // Call takes possible detour via InterpreterMacroAssembler. + call_VM_base(oop_result, noreg, entry_point, false, check_exceptions); +} + +void MacroAssembler::call_VM_static(Register oop_result, address entry_point, Register arg_1, Register arg_2, + Register arg_3, bool check_exceptions) { + // Z_ARG1 is reserved for the thread. + lgr_if_needed(Z_ARG2, arg_1); + assert(arg_2 != Z_ARG2, "smashed argument"); + lgr_if_needed(Z_ARG3, arg_2); + assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument"); + lgr_if_needed(Z_ARG4, arg_3); + call_VM_static(oop_result, entry_point, check_exceptions); +} + +// VM calls with explicit last_java_sp. + +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, bool check_exceptions) { + // Call takes possible detour via InterpreterMacroAssembler. + call_VM_base(oop_result, last_java_sp, entry_point, true, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { + // Z_ARG1 is reserved for the thread. + lgr_if_needed(Z_ARG2, arg_1); + call_VM(oop_result, last_java_sp, entry_point, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, + Register arg_2, bool check_exceptions) { + // Z_ARG1 is reserved for the thread. + lgr_if_needed(Z_ARG2, arg_1); + assert(arg_2 != Z_ARG2, "smashed argument"); + lgr_if_needed(Z_ARG3, arg_2); + call_VM(oop_result, last_java_sp, entry_point, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, + Register arg_2, Register arg_3, bool check_exceptions) { + // Z_ARG1 is reserved for the thread. + lgr_if_needed(Z_ARG2, arg_1); + assert(arg_2 != Z_ARG2, "smashed argument"); + lgr_if_needed(Z_ARG3, arg_2); + assert(arg_3 != Z_ARG2 && arg_3 != Z_ARG3, "smashed argument"); + lgr_if_needed(Z_ARG4, arg_3); + call_VM(oop_result, last_java_sp, entry_point, check_exceptions); +} + +// VM leaf calls. + +void MacroAssembler::call_VM_leaf(address entry_point) { + // Call takes possible detour via InterpreterMacroAssembler. + call_VM_leaf_base(entry_point, true); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { + if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); + call_VM_leaf(entry_point); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { + if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); + assert(arg_2 != Z_ARG1, "smashed argument"); + if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); + call_VM_leaf(entry_point); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { + if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); + assert(arg_2 != Z_ARG1, "smashed argument"); + if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); + assert(arg_3 != Z_ARG1 && arg_3 != Z_ARG2, "smashed argument"); + if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3); + call_VM_leaf(entry_point); +} + +// Static VM leaf calls. +// Really static VM leaf calls are never patched. + +void MacroAssembler::call_VM_leaf_static(address entry_point) { + // Call takes possible detour via InterpreterMacroAssembler. + call_VM_leaf_base(entry_point, false); +} + +void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1) { + if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); + call_VM_leaf_static(entry_point); +} + +void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2) { + if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); + assert(arg_2 != Z_ARG1, "smashed argument"); + if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); + call_VM_leaf_static(entry_point); +} + +void MacroAssembler::call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2, Register arg_3) { + if (arg_1 != noreg) lgr_if_needed(Z_ARG1, arg_1); + assert(arg_2 != Z_ARG1, "smashed argument"); + if (arg_2 != noreg) lgr_if_needed(Z_ARG2, arg_2); + assert(arg_3 != Z_ARG1 && arg_3 != Z_ARG2, "smashed argument"); + if (arg_3 != noreg) lgr_if_needed(Z_ARG3, arg_3); + call_VM_leaf_static(entry_point); +} + +// Don't use detour via call_c(reg). +address MacroAssembler::call_c(address function_entry) { + load_const(Z_R1, function_entry); + return call(Z_R1); +} + +// Variant for really static (non-relocatable) calls which are never patched. +address MacroAssembler::call_c_static(address function_entry) { + load_absolute_address(Z_R1, function_entry); +#if 0 // def ASSERT + // Verify that call site did not move. + load_const_optimized(Z_R0, function_entry); + z_cgr(Z_R1, Z_R0); + z_brc(bcondEqual, 3); + z_illtrap(0xba); +#endif + return call(Z_R1); +} + +address MacroAssembler::call_c_opt(address function_entry) { + bool success = call_far_patchable(function_entry, -2 /* emit relocation + constant */); + _last_calls_return_pc = success ? pc() : NULL; + return _last_calls_return_pc; +} + +// Identify a call_far_patchable instruction: LARL + LG + BASR +// +// nop ; optionally, if required for alignment +// lgrl rx,A(TOC entry) ; PC-relative access into constant pool +// basr Z_R14,rx ; end of this instruction must be aligned to a word boundary +// +// Code pattern will eventually get patched into variant2 (see below for detection code). +// +bool MacroAssembler::is_call_far_patchable_variant0_at(address instruction_addr) { + address iaddr = instruction_addr; + + // Check for the actual load instruction. + if (!is_load_const_from_toc(iaddr)) { return false; } + iaddr += load_const_from_toc_size(); + + // Check for the call (BASR) instruction, finally. + assert(iaddr-instruction_addr+call_byregister_size() == call_far_patchable_size(), "size mismatch"); + return is_call_byregister(iaddr); +} + +// Identify a call_far_patchable instruction: BRASL +// +// Code pattern to suits atomic patching: +// nop ; Optionally, if required for alignment. +// nop ... ; Multiple filler nops to compensate for size difference (variant0 is longer). +// nop ; For code pattern detection: Prepend each BRASL with a nop. +// brasl Z_R14, ; End of code must be 4-byte aligned ! +bool MacroAssembler::is_call_far_patchable_variant2_at(address instruction_addr) { + const address call_addr = (address)((intptr_t)instruction_addr + call_far_patchable_size() - call_far_pcrelative_size()); + + // Check for correct number of leading nops. + address iaddr; + for (iaddr = instruction_addr; iaddr < call_addr; iaddr += nop_size()) { + if (!is_z_nop(iaddr)) { return false; } + } + assert(iaddr == call_addr, "sanity"); + + // --> Check for call instruction. + if (is_call_far_pcrelative(call_addr)) { + assert(call_addr-instruction_addr+call_far_pcrelative_size() == call_far_patchable_size(), "size mismatch"); + return true; + } + + return false; +} + +// Emit a NOT mt-safely patchable 64 bit absolute call. +// If toc_offset == -2, then the destination of the call (= target) is emitted +// to the constant pool and a runtime_call relocation is added +// to the code buffer. +// If toc_offset != -2, target must already be in the constant pool at +// _ctableStart+toc_offset (a caller can retrieve toc_offset +// from the runtime_call relocation). +// Special handling of emitting to scratch buffer when there is no constant pool. +// Slightly changed code pattern. We emit an additional nop if we would +// not end emitting at a word aligned address. This is to ensure +// an atomically patchable displacement in brasl instructions. +// +// A call_far_patchable comes in different flavors: +// - LARL(CP) / LG(CP) / BR (address in constant pool, access via CP register) +// - LGRL(CP) / BR (address in constant pool, pc-relative accesss) +// - BRASL (relative address of call target coded in instruction) +// All flavors occupy the same amount of space. Length differences are compensated +// by leading nops, such that the instruction sequence always ends at the same +// byte offset. This is required to keep the return offset constant. +// Furthermore, the return address (the end of the instruction sequence) is forced +// to be on a 4-byte boundary. This is required for atomic patching, should we ever +// need to patch the call target of the BRASL flavor. +// RETURN value: false, if no constant pool entry could be allocated, true otherwise. +bool MacroAssembler::call_far_patchable(address target, int64_t tocOffset) { + // Get current pc and ensure word alignment for end of instr sequence. + const address start_pc = pc(); + const intptr_t start_off = offset(); + assert(!call_far_patchable_requires_alignment_nop(start_pc), "call_far_patchable requires aligned address"); + const ptrdiff_t dist = (ptrdiff_t)(target - (start_pc + 2)); // Prepend each BRASL with a nop. + const bool emit_target_to_pool = (tocOffset == -2) && !code_section()->scratch_emit(); + const bool emit_relative_call = !emit_target_to_pool && + RelAddr::is_in_range_of_RelAddr32(dist) && + ReoptimizeCallSequences && + !code_section()->scratch_emit(); + + if (emit_relative_call) { + // Add padding to get the same size as below. + const unsigned int padding = call_far_patchable_size() - call_far_pcrelative_size(); + unsigned int current_padding; + for (current_padding = 0; current_padding < padding; current_padding += nop_size()) { z_nop(); } + assert(current_padding == padding, "sanity"); + + // relative call: len = 2(nop) + 6 (brasl) + // CodeBlob resize cannot occur in this case because + // this call is emitted into pre-existing space. + z_nop(); // Prepend each BRASL with a nop. + z_brasl(Z_R14, target); + } else { + // absolute call: Get address from TOC. + // len = (load TOC){6|0} + (load from TOC){6} + (basr){2} = {14|8} + if (emit_target_to_pool) { + // When emitting the call for the first time, we do not need to use + // the pc-relative version. It will be patched anyway, when the code + // buffer is copied. + // Relocation is not needed when !ReoptimizeCallSequences. + relocInfo::relocType rt = ReoptimizeCallSequences ? relocInfo::runtime_call_w_cp_type : relocInfo::none; + AddressLiteral dest(target, rt); + // Store_oop_in_toc() adds dest to the constant table. As side effect, this kills + // inst_mark(). Reset if possible. + bool reset_mark = (inst_mark() == pc()); + tocOffset = store_oop_in_toc(dest); + if (reset_mark) { set_inst_mark(); } + if (tocOffset == -1) { + return false; // Couldn't create constant pool entry. + } + } + assert(offset() == start_off, "emit no code before this point!"); + + address tocPos = pc() + tocOffset; + if (emit_target_to_pool) { + tocPos = code()->consts()->start() + tocOffset; + } + load_long_pcrelative(Z_R14, tocPos); + z_basr(Z_R14, Z_R14); + } + +#ifdef ASSERT + // Assert that we can identify the emitted call. + assert(is_call_far_patchable_at(addr_at(start_off)), "can't identify emitted call"); + assert(offset() == start_off+call_far_patchable_size(), "wrong size"); + + if (emit_target_to_pool) { + assert(get_dest_of_call_far_patchable_at(addr_at(start_off), code()->consts()->start()) == target, + "wrong encoding of dest address"); + } +#endif + return true; // success +} + +// Identify a call_far_patchable instruction. +// For more detailed information see header comment of call_far_patchable. +bool MacroAssembler::is_call_far_patchable_at(address instruction_addr) { + return is_call_far_patchable_variant2_at(instruction_addr) || // short version: BRASL + is_call_far_patchable_variant0_at(instruction_addr); // long version LARL + LG + BASR +} + +// Does the call_far_patchable instruction use a pc-relative encoding +// of the call destination? +bool MacroAssembler::is_call_far_patchable_pcrelative_at(address instruction_addr) { + // Variant 2 is pc-relative. + return is_call_far_patchable_variant2_at(instruction_addr); +} + +bool MacroAssembler::is_call_far_pcrelative(address instruction_addr) { + // Prepend each BRASL with a nop. + return is_z_nop(instruction_addr) && is_z_brasl(instruction_addr + nop_size()); // Match at position after one nop required. +} + +// Set destination address of a call_far_patchable instruction. +void MacroAssembler::set_dest_of_call_far_patchable_at(address instruction_addr, address dest, int64_t tocOffset) { + ResourceMark rm; + + // Now that CP entry is verified, patch call to a pc-relative call (if circumstances permit). + int code_size = MacroAssembler::call_far_patchable_size(); + CodeBuffer buf(instruction_addr, code_size); + MacroAssembler masm(&buf); + masm.call_far_patchable(dest, tocOffset); + ICache::invalidate_range(instruction_addr, code_size); // Empty on z. +} + +// Get dest address of a call_far_patchable instruction. +address MacroAssembler::get_dest_of_call_far_patchable_at(address instruction_addr, address ctable) { + // Dynamic TOC: absolute address in constant pool. + // Check variant2 first, it is more frequent. + + // Relative address encoded in call instruction. + if (is_call_far_patchable_variant2_at(instruction_addr)) { + return MacroAssembler::get_target_addr_pcrel(instruction_addr + nop_size()); // Prepend each BRASL with a nop. + + // Absolute address in constant pool. + } else if (is_call_far_patchable_variant0_at(instruction_addr)) { + address iaddr = instruction_addr; + + long tocOffset = get_load_const_from_toc_offset(iaddr); + address tocLoc = iaddr + tocOffset; + return *(address *)(tocLoc); + } else { + fprintf(stderr, "MacroAssembler::get_dest_of_call_far_patchable_at has a problem at %p:\n", instruction_addr); + fprintf(stderr, "not a call_far_patchable: %16.16lx %16.16lx, len = %d\n", + *(unsigned long*)instruction_addr, + *(unsigned long*)(instruction_addr+8), + call_far_patchable_size()); + Disassembler::decode(instruction_addr, instruction_addr+call_far_patchable_size()); + ShouldNotReachHere(); + return NULL; + } +} + +void MacroAssembler::align_call_far_patchable(address pc) { + if (call_far_patchable_requires_alignment_nop(pc)) { z_nop(); } +} + +void MacroAssembler::check_and_handle_earlyret(Register java_thread) { +} + +void MacroAssembler::check_and_handle_popframe(Register java_thread) { +} + +// Read from the polling page. +// Use TM or TMY instruction, depending on read offset. +// offset = 0: Use TM, safepoint polling. +// offset < 0: Use TMY, profiling safepoint polling. +void MacroAssembler::load_from_polling_page(Register polling_page_address, int64_t offset) { + if (Immediate::is_uimm12(offset)) { + z_tm(offset, polling_page_address, mask_safepoint); + } else { + z_tmy(offset, polling_page_address, mask_profiling); + } +} + +// Check whether z_instruction is a read access to the polling page +// which was emitted by load_from_polling_page(..). +bool MacroAssembler::is_load_from_polling_page(address instr_loc) { + unsigned long z_instruction; + unsigned int ilen = get_instruction(instr_loc, &z_instruction); + + if (ilen == 2) { return false; } // It's none of the allowed instructions. + + if (ilen == 4) { + if (!is_z_tm(z_instruction)) { return false; } // It's len=4, but not a z_tm. fail. + + int ms = inv_mask(z_instruction,8,32); // mask + int ra = inv_reg(z_instruction,16,32); // base register + int ds = inv_uimm12(z_instruction); // displacement + + if (!(ds == 0 && ra != 0 && ms == mask_safepoint)) { + return false; // It's not a z_tm(0, ra, mask_safepoint). Fail. + } + + } else { /* if (ilen == 6) */ + + assert(!is_z_lg(z_instruction), "old form (LG) polling page access. Please fix and use TM(Y)."); + + if (!is_z_tmy(z_instruction)) { return false; } // It's len=6, but not a z_tmy. fail. + + int ms = inv_mask(z_instruction,8,48); // mask + int ra = inv_reg(z_instruction,16,48); // base register + int ds = inv_simm20(z_instruction); // displacement + } + + return true; +} + +// Extract poll address from instruction and ucontext. +address MacroAssembler::get_poll_address(address instr_loc, void* ucontext) { + assert(ucontext != NULL, "must have ucontext"); + ucontext_t* uc = (ucontext_t*) ucontext; + unsigned long z_instruction; + unsigned int ilen = get_instruction(instr_loc, &z_instruction); + + if (ilen == 4 && is_z_tm(z_instruction)) { + int ra = inv_reg(z_instruction, 16, 32); // base register + int ds = inv_uimm12(z_instruction); // displacement + address addr = (address)uc->uc_mcontext.gregs[ra]; + return addr + ds; + } else if (ilen == 6 && is_z_tmy(z_instruction)) { + int ra = inv_reg(z_instruction, 16, 48); // base register + int ds = inv_simm20(z_instruction); // displacement + address addr = (address)uc->uc_mcontext.gregs[ra]; + return addr + ds; + } + + ShouldNotReachHere(); + return NULL; +} + +// Extract poll register from instruction. +uint MacroAssembler::get_poll_register(address instr_loc) { + unsigned long z_instruction; + unsigned int ilen = get_instruction(instr_loc, &z_instruction); + + if (ilen == 4 && is_z_tm(z_instruction)) { + return (uint)inv_reg(z_instruction, 16, 32); // base register + } else if (ilen == 6 && is_z_tmy(z_instruction)) { + return (uint)inv_reg(z_instruction, 16, 48); // base register + } + + ShouldNotReachHere(); + return 0; +} + +bool MacroAssembler::is_memory_serialization(int instruction, JavaThread* thread, void* ucontext) { + ShouldNotCallThis(); + return false; +} + +// Write serialization page so VM thread can do a pseudo remote membar +// We use the current thread pointer to calculate a thread specific +// offset to write to within the page. This minimizes bus traffic +// due to cache line collision. +void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) { + assert_different_registers(tmp1, tmp2); + z_sllg(tmp2, thread, os::get_serialize_page_shift_count()); + load_const_optimized(tmp1, (long) os::get_memory_serialize_page()); + + int mask = os::get_serialize_page_mask(); + if (Immediate::is_uimm16(mask)) { + z_nill(tmp2, mask); + z_llghr(tmp2, tmp2); + } else { + z_nilf(tmp2, mask); + z_llgfr(tmp2, tmp2); + } + + z_release(); + z_st(Z_R0, 0, tmp2, tmp1); +} + +// Don't rely on register locking, always use Z_R1 as scratch register instead. +void MacroAssembler::bang_stack_with_offset(int offset) { + // Stack grows down, caller passes positive offset. + assert(offset > 0, "must bang with positive offset"); + if (Displacement::is_validDisp(-offset)) { + z_tmy(-offset, Z_SP, mask_stackbang); + } else { + add2reg(Z_R1, -offset, Z_SP); // Do not destroy Z_SP!!! + z_tm(0, Z_R1, mask_stackbang); // Just banging. + } +} + +// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. +void MacroAssembler::tlab_allocate(Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, + Label& slow_case) { + assert_different_registers(obj, var_size_in_bytes, t1); + Register end = t1; + Register thread = Z_thread; + + z_lg(obj, Address(thread, JavaThread::tlab_top_offset())); + if (var_size_in_bytes == noreg) { + z_lay(end, Address(obj, con_size_in_bytes)); + } else { + z_lay(end, Address(obj, var_size_in_bytes)); + } + z_cg(end, Address(thread, JavaThread::tlab_end_offset())); + branch_optimized(bcondHigh, slow_case); + + // Update the tlab top pointer. + z_stg(end, Address(thread, JavaThread::tlab_top_offset())); + + // Recover var_size_in_bytes if necessary. + if (var_size_in_bytes == end) { + z_sgr(var_size_in_bytes, obj); + } +} + +// Emitter for interface method lookup. +// input: recv_klass, intf_klass, itable_index +// output: method_result +// kills: itable_index, temp1_reg, Z_R0, Z_R1 +// TODO: Temp2_reg is unused. we may use this emitter also in the itable stubs. +// If the register is still not needed then, remove it. +void MacroAssembler::lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register temp1_reg, + Register temp2_reg, + Label& no_such_interface) { + + const Register vtable_len = temp1_reg; // Used to compute itable_entry_addr. + const Register itable_entry_addr = Z_R1_scratch; + const Register itable_interface = Z_R0_scratch; + + BLOCK_COMMENT("lookup_interface_method {"); + + // Load start of itable entries into itable_entry_addr. + z_llgf(vtable_len, Address(recv_klass, InstanceKlass::vtable_length_offset())); + z_sllg(vtable_len, vtable_len, exact_log2(vtableEntry::size_in_bytes())); + + // Loop over all itable entries until desired interfaceOop(Rinterface) found. + const int vtable_base_offset = in_bytes(InstanceKlass::vtable_start_offset()); + + add2reg_with_index(itable_entry_addr, + vtable_base_offset + itableOffsetEntry::interface_offset_in_bytes(), + recv_klass, vtable_len); + + const int itable_offset_search_inc = itableOffsetEntry::size() * wordSize; + Label search; + + bind(search); + + // Handle IncompatibleClassChangeError. + // If the entry is NULL then we've reached the end of the table + // without finding the expected interface, so throw an exception. + load_and_test_long(itable_interface, Address(itable_entry_addr)); + z_bre(no_such_interface); + + add2reg(itable_entry_addr, itable_offset_search_inc); + z_cgr(itable_interface, intf_klass); + z_brne(search); + + // Entry found and itable_entry_addr points to it, get offset of vtable for interface. + + const int vtable_offset_offset = (itableOffsetEntry::offset_offset_in_bytes() - + itableOffsetEntry::interface_offset_in_bytes()) - + itable_offset_search_inc; + + // Compute itableMethodEntry and get method and entry point + // we use addressing with index and displacement, since the formula + // for computing the entry's offset has a fixed and a dynamic part, + // the latter depending on the matched interface entry and on the case, + // that the itable index has been passed as a register, not a constant value. + int method_offset = itableMethodEntry::method_offset_in_bytes(); + // Fixed part (displacement), common operand. + Register itable_offset; // Dynamic part (index register). + + if (itable_index.is_register()) { + // Compute the method's offset in that register, for the formula, see the + // else-clause below. + itable_offset = itable_index.as_register(); + + z_sllg(itable_offset, itable_offset, exact_log2(itableMethodEntry::size() * wordSize)); + z_agf(itable_offset, vtable_offset_offset, itable_entry_addr); + } else { + itable_offset = Z_R1_scratch; + // Displacement increases. + method_offset += itableMethodEntry::size() * wordSize * itable_index.as_constant(); + + // Load index from itable. + z_llgf(itable_offset, vtable_offset_offset, itable_entry_addr); + } + + // Finally load the method's oop. + z_lg(method_result, method_offset, itable_offset, recv_klass); + BLOCK_COMMENT("} lookup_interface_method"); +} + +// Lookup for virtual method invocation. +void MacroAssembler::lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result) { + assert_different_registers(recv_klass, vtable_index.register_or_noreg()); + assert(vtableEntry::size() * wordSize == wordSize, + "else adjust the scaling in the code below"); + + BLOCK_COMMENT("lookup_virtual_method {"); + + const int base = in_bytes(Klass::vtable_start_offset()); + + if (vtable_index.is_constant()) { + // Load with base + disp. + Address vtable_entry_addr(recv_klass, + vtable_index.as_constant() * wordSize + + base + + vtableEntry::method_offset_in_bytes()); + + z_lg(method_result, vtable_entry_addr); + } else { + // Shift index properly and load with base + index + disp. + Register vindex = vtable_index.as_register(); + Address vtable_entry_addr(recv_klass, vindex, + base + vtableEntry::method_offset_in_bytes()); + + z_sllg(vindex, vindex, exact_log2(wordSize)); + z_lg(method_result, vtable_entry_addr); + } + BLOCK_COMMENT("} lookup_virtual_method"); +} + +// Factor out code to call ic_miss_handler. +// Generate code to call the inline cache miss handler. +// +// In most cases, this code will be generated out-of-line. +// The method parameters are intended to provide some variability. +// ICM - Label which has to be bound to the start of useful code (past any traps). +// trapMarker - Marking byte for the generated illtrap instructions (if any). +// Any value except 0x00 is supported. +// = 0x00 - do not generate illtrap instructions. +// use nops to fill ununsed space. +// requiredSize - required size of the generated code. If the actually +// generated code is smaller, use padding instructions to fill up. +// = 0 - no size requirement, no padding. +// scratch - scratch register to hold branch target address. +// +// The method returns the code offset of the bound label. +unsigned int MacroAssembler::call_ic_miss_handler(Label& ICM, int trapMarker, int requiredSize, Register scratch) { + intptr_t startOffset = offset(); + + // Prevent entry at content_begin(). + if (trapMarker != 0) { + z_illtrap(trapMarker); + } + + // Load address of inline cache miss code into scratch register + // and branch to cache miss handler. + BLOCK_COMMENT("IC miss handler {"); + BIND(ICM); + unsigned int labelOffset = offset(); + AddressLiteral icmiss(SharedRuntime::get_ic_miss_stub()); + + load_const_optimized(scratch, icmiss); + z_br(scratch); + + // Fill unused space. + if (requiredSize > 0) { + while ((offset() - startOffset) < requiredSize) { + if (trapMarker == 0) { + z_nop(); + } else { + z_illtrap(trapMarker); + } + } + } + BLOCK_COMMENT("} IC miss handler"); + return labelOffset; +} + +void MacroAssembler::nmethod_UEP(Label& ic_miss) { + Register ic_reg = as_Register(Matcher::inline_cache_reg_encode()); + int klass_offset = oopDesc::klass_offset_in_bytes(); + if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) { + if (VM_Version::has_CompareBranch()) { + z_cgij(Z_ARG1, 0, Assembler::bcondEqual, ic_miss); + } else { + z_ltgr(Z_ARG1, Z_ARG1); + z_bre(ic_miss); + } + } + // Compare cached class against klass from receiver. + compare_klass_ptr(ic_reg, klass_offset, Z_ARG1, false); + z_brne(ic_miss); +} + +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp1_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset) { + + const int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + const int sco_offset = in_bytes(Klass::super_check_offset_offset()); + + bool must_load_sco = (super_check_offset.constant_or_zero() == -1); + bool need_slow_path = (must_load_sco || + super_check_offset.constant_or_zero() == sc_offset); + + // Input registers must not overlap. + assert_different_registers(sub_klass, super_klass, temp1_reg); + if (super_check_offset.is_register()) { + assert_different_registers(sub_klass, super_klass, + super_check_offset.as_register()); + } else if (must_load_sco) { + assert(temp1_reg != noreg, "supply either a temp or a register offset"); + } + + const Register Rsuper_check_offset = temp1_reg; + + NearLabel L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1 || + (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path), + "at most one NULL in the batch, usually"); + + BLOCK_COMMENT("check_klass_subtype_fast_path {"); + // If the pointers are equal, we are done (e.g., String[] elements). + // This self-check enables sharing of secondary supertype arrays among + // non-primary types such as array-of-interface. Otherwise, each such + // type would need its own customized SSA. + // We move this check to the front of the fast path because many + // type checks are in fact trivially successful in this manner, + // so we get a nicely predicted branch right at the start of the check. + compare64_and_branch(sub_klass, super_klass, bcondEqual, *L_success); + + // Check the supertype display, which is uint. + if (must_load_sco) { + z_llgf(Rsuper_check_offset, sco_offset, super_klass); + super_check_offset = RegisterOrConstant(Rsuper_check_offset); + } + Address super_check_addr(sub_klass, super_check_offset, 0); + z_cg(super_klass, super_check_addr); // compare w/ displayed supertype + + // This check has worked decisively for primary supers. + // Secondary supers are sought in the super_cache ('super_cache_addr'). + // (Secondary supers are interfaces and very deeply nested subtypes.) + // This works in the same check above because of a tricky aliasing + // between the super_cache and the primary super display elements. + // (The 'super_check_addr' can address either, as the case requires.) + // Note that the cache is updated below if it does not help us find + // what we need immediately. + // So if it was a primary super, we can just fail immediately. + // Otherwise, it's the slow path for us (no success at this point). + + // Hacked jmp, which may only be used just before L_fallthrough. +#define final_jmp(label) \ + if (&(label) == &L_fallthrough) { /*do nothing*/ } \ + else { branch_optimized(Assembler::bcondAlways, label); } /*omit semicolon*/ + + if (super_check_offset.is_register()) { + branch_optimized(Assembler::bcondEqual, *L_success); + z_cfi(super_check_offset.as_register(), sc_offset); + if (L_failure == &L_fallthrough) { + branch_optimized(Assembler::bcondEqual, *L_slow_path); + } else { + branch_optimized(Assembler::bcondNotEqual, *L_failure); + final_jmp(*L_slow_path); + } + } else if (super_check_offset.as_constant() == sc_offset) { + // Need a slow path; fast failure is impossible. + if (L_slow_path == &L_fallthrough) { + branch_optimized(Assembler::bcondEqual, *L_success); + } else { + branch_optimized(Assembler::bcondNotEqual, *L_slow_path); + final_jmp(*L_success); + } + } else { + // No slow path; it's a fast decision. + if (L_failure == &L_fallthrough) { + branch_optimized(Assembler::bcondEqual, *L_success); + } else { + branch_optimized(Assembler::bcondNotEqual, *L_failure); + final_jmp(*L_success); + } + } + + bind(L_fallthrough); +#undef local_brc +#undef final_jmp + BLOCK_COMMENT("} check_klass_subtype_fast_path"); + // fallthru (to slow path) +} + +void MacroAssembler::check_klass_subtype_slow_path(Register Rsubklass, + Register Rsuperklass, + Register Rarray_ptr, // tmp + Register Rlength, // tmp + Label* L_success, + Label* L_failure) { + // Input registers must not overlap. + // Also check for R1 which is explicitely used here. + assert_different_registers(Z_R1, Rsubklass, Rsuperklass, Rarray_ptr, Rlength); + NearLabel L_fallthrough, L_loop; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + const int ss_offset = in_bytes(Klass::secondary_supers_offset()); + const int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + + const int length_offset = Array::length_offset_in_bytes(); + const int base_offset = Array::base_offset_in_bytes(); + + // Hacked jmp, which may only be used just before L_fallthrough. +#define final_jmp(label) \ + if (&(label) == &L_fallthrough) { /*do nothing*/ } \ + else branch_optimized(Assembler::bcondAlways, label) /*omit semicolon*/ + + NearLabel loop_iterate, loop_count, match; + + BLOCK_COMMENT("check_klass_subtype_slow_path {"); + z_lg(Rarray_ptr, ss_offset, Rsubklass); + + load_and_test_int(Rlength, Address(Rarray_ptr, length_offset)); + branch_optimized(Assembler::bcondZero, *L_failure); + + // Oops in table are NO MORE compressed. + z_cg(Rsuperklass, base_offset, Rarray_ptr); // Check array element for match. + z_bre(match); // Shortcut for array length = 1. + + // No match yet, so we must walk the array's elements. + z_lngfr(Rlength, Rlength); + z_sllg(Rlength, Rlength, LogBytesPerWord); // -#bytes of cache array + z_llill(Z_R1, BytesPerWord); // Set increment/end index. + add2reg(Rlength, 2 * BytesPerWord); // start index = -(n-2)*BytesPerWord + z_slgr(Rarray_ptr, Rlength); // start addr: += (n-2)*BytesPerWord + z_bru(loop_count); + + BIND(loop_iterate); + z_cg(Rsuperklass, base_offset, Rlength, Rarray_ptr); // Check array element for match. + z_bre(match); + BIND(loop_count); + z_brxlg(Rlength, Z_R1, loop_iterate); + + // Rsuperklass not found among secondary super classes -> failure. + branch_optimized(Assembler::bcondAlways, *L_failure); + + // Got a hit. Return success (zero result). Set cache. + // Cache load doesn't happen here. For speed it is directly emitted by the compiler. + + BIND(match); + + z_stg(Rsuperklass, sc_offset, Rsubklass); // Save result to cache. + + final_jmp(*L_success); + + // Exit to the surrounding code. + BIND(L_fallthrough); +#undef local_brc +#undef final_jmp + BLOCK_COMMENT("} check_klass_subtype_slow_path"); +} + +// Emitter for combining fast and slow path. +void MacroAssembler::check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp1_reg, + Register temp2_reg, + Label& L_success) { + NearLabel failure; + BLOCK_COMMENT(err_msg("check_klass_subtype(%s subclass of %s) {", sub_klass->name(), super_klass->name())); + check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg, + &L_success, &failure, NULL); + check_klass_subtype_slow_path(sub_klass, super_klass, + temp1_reg, temp2_reg, &L_success, NULL); + BIND(failure); + BLOCK_COMMENT("} check_klass_subtype"); +} + +// Increment a counter at counter_address when the eq condition code is +// set. Kills registers tmp1_reg and tmp2_reg and preserves the condition code. +void MacroAssembler::increment_counter_eq(address counter_address, Register tmp1_reg, Register tmp2_reg) { + Label l; + z_brne(l); + load_const(tmp1_reg, counter_address); + add2mem_32(Address(tmp1_reg), 1, tmp2_reg); + z_cr(tmp1_reg, tmp1_reg); // Set cc to eq. + bind(l); +} + +// Semantics are dependent on the slow_case label: +// If the slow_case label is not NULL, failure to biased-lock the object +// transfers control to the location of the slow_case label. If the +// object could be biased-locked, control is transferred to the done label. +// The condition code is unpredictable. +// +// If the slow_case label is NULL, failure to biased-lock the object results +// in a transfer of control to the done label with a condition code of not_equal. +// If the biased-lock could be successfully obtained, control is transfered to +// the done label with a condition code of equal. +// It is mandatory to react on the condition code At the done label. +// +void MacroAssembler::biased_locking_enter(Register obj_reg, + Register mark_reg, + Register temp_reg, + Register temp2_reg, // May be Z_RO! + Label &done, + Label *slow_case) { + assert(UseBiasedLocking, "why call this otherwise?"); + assert_different_registers(obj_reg, mark_reg, temp_reg, temp2_reg); + + Label cas_label; // Try, if implemented, CAS locking. Fall thru to slow path otherwise. + + BLOCK_COMMENT("biased_locking_enter {"); + + // Biased locking + // See whether the lock is currently biased toward our thread and + // whether the epoch is still valid. + // Note that the runtime guarantees sufficient alignment of JavaThread + // pointers to allow age to be placed into low bits. + assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, + "biased locking makes assumptions about bit layout"); + z_lr(temp_reg, mark_reg); + z_nilf(temp_reg, markOopDesc::biased_lock_mask_in_place); + z_chi(temp_reg, markOopDesc::biased_lock_pattern); + z_brne(cas_label); // Try cas if object is not biased, i.e. cannot be biased locked. + + load_prototype_header(temp_reg, obj_reg); + load_const_optimized(temp2_reg, ~((int) markOopDesc::age_mask_in_place)); + + z_ogr(temp_reg, Z_thread); + z_xgr(temp_reg, mark_reg); + z_ngr(temp_reg, temp2_reg); + if (PrintBiasedLockingStatistics) { + increment_counter_eq((address) BiasedLocking::biased_lock_entry_count_addr(), mark_reg, temp2_reg); + // Restore mark_reg. + z_lg(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg); + } + branch_optimized(Assembler::bcondEqual, done); // Biased lock obtained, return success. + + Label try_revoke_bias; + Label try_rebias; + Address mark_addr = Address(obj_reg, oopDesc::mark_offset_in_bytes()); + + //---------------------------------------------------------------------------- + // At this point we know that the header has the bias pattern and + // that we are not the bias owner in the current epoch. We need to + // figure out more details about the state of the header in order to + // know what operations can be legally performed on the object's + // header. + + // If the low three bits in the xor result aren't clear, that means + // the prototype header is no longer biased and we have to revoke + // the bias on this object. + z_tmll(temp_reg, markOopDesc::biased_lock_mask_in_place); + z_brnaz(try_revoke_bias); + + // Biasing is still enabled for this data type. See whether the + // epoch of the current bias is still valid, meaning that the epoch + // bits of the mark word are equal to the epoch bits of the + // prototype header. (Note that the prototype header's epoch bits + // only change at a safepoint.) If not, attempt to rebias the object + // toward the current thread. Note that we must be absolutely sure + // that the current epoch is invalid in order to do this because + // otherwise the manipulations it performs on the mark word are + // illegal. + z_tmll(temp_reg, markOopDesc::epoch_mask_in_place); + z_brnaz(try_rebias); + + //---------------------------------------------------------------------------- + // The epoch of the current bias is still valid but we know nothing + // about the owner; it might be set or it might be clear. Try to + // acquire the bias of the object using an atomic operation. If this + // fails we will go in to the runtime to revoke the object's bias. + // Note that we first construct the presumed unbiased header so we + // don't accidentally blow away another thread's valid bias. + z_nilf(mark_reg, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | + markOopDesc::epoch_mask_in_place); + z_lgr(temp_reg, Z_thread); + z_llgfr(mark_reg, mark_reg); + z_ogr(temp_reg, mark_reg); + + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + + z_csg(mark_reg, temp_reg, 0, obj_reg); + + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + + if (PrintBiasedLockingStatistics) { + increment_counter_eq((address) BiasedLocking::anonymously_biased_lock_entry_count_addr(), + temp_reg, temp2_reg); + } + if (slow_case != NULL) { + branch_optimized(Assembler::bcondNotEqual, *slow_case); // Biased lock not obtained, need to go the long way. + } + branch_optimized(Assembler::bcondAlways, done); // Biased lock status given in condition code. + + //---------------------------------------------------------------------------- + bind(try_rebias); + // At this point we know the epoch has expired, meaning that the + // current "bias owner", if any, is actually invalid. Under these + // circumstances _only_, we are allowed to use the current header's + // value as the comparison value when doing the cas to acquire the + // bias in the current epoch. In other words, we allow transfer of + // the bias from one thread to another directly in this situation. + + z_nilf(mark_reg, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); + load_prototype_header(temp_reg, obj_reg); + z_llgfr(mark_reg, mark_reg); + + z_ogr(temp_reg, Z_thread); + + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + + z_csg(mark_reg, temp_reg, 0, obj_reg); + + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + + if (PrintBiasedLockingStatistics) { + increment_counter_eq((address) BiasedLocking::rebiased_lock_entry_count_addr(), temp_reg, temp2_reg); + } + if (slow_case != NULL) { + branch_optimized(Assembler::bcondNotEqual, *slow_case); // Biased lock not obtained, need to go the long way. + } + z_bru(done); // Biased lock status given in condition code. + + //---------------------------------------------------------------------------- + bind(try_revoke_bias); + // The prototype mark in the klass doesn't have the bias bit set any + // more, indicating that objects of this data type are not supposed + // to be biased any more. We are going to try to reset the mark of + // this object to the prototype value and fall through to the + // CAS-based locking scheme. Note that if our CAS fails, it means + // that another thread raced us for the privilege of revoking the + // bias of this particular object, so it's okay to continue in the + // normal locking code. + load_prototype_header(temp_reg, obj_reg); + + assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); + + z_csg(mark_reg, temp_reg, 0, obj_reg); + + // Fall through to the normal CAS-based lock, because no matter what + // the result of the above CAS, some thread must have succeeded in + // removing the bias bit from the object's header. + if (PrintBiasedLockingStatistics) { + // z_cgr(mark_reg, temp2_reg); + increment_counter_eq((address) BiasedLocking::revoked_lock_entry_count_addr(), temp_reg, temp2_reg); + } + + bind(cas_label); + BLOCK_COMMENT("} biased_locking_enter"); +} + +void MacroAssembler::biased_locking_exit(Register mark_addr, Register temp_reg, Label& done) { + // Check for biased locking unlock case, which is a no-op + // Note: we do not have to check the thread ID for two reasons. + // First, the interpreter checks for IllegalMonitorStateException at + // a higher level. Second, if the bias was revoked while we held the + // lock, the object could not be rebiased toward another thread, so + // the bias bit would be clear. + BLOCK_COMMENT("biased_locking_exit {"); + + z_lg(temp_reg, 0, mark_addr); + z_nilf(temp_reg, markOopDesc::biased_lock_mask_in_place); + + z_chi(temp_reg, markOopDesc::biased_lock_pattern); + z_bre(done); + BLOCK_COMMENT("} biased_locking_exit"); +} + +void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias) { + Register displacedHeader = temp1; + Register currentHeader = temp1; + Register temp = temp2; + NearLabel done, object_has_monitor; + + BLOCK_COMMENT("compiler_fast_lock_object {"); + + // Load markOop from oop into mark. + z_lg(displacedHeader, 0, oop); + + if (try_bias) { + biased_locking_enter(oop, displacedHeader, temp, Z_R0, done); + } + + // Handle existing monitor. + if ((EmitSync & 0x01) == 0) { + // The object has an existing monitor iff (mark & monitor_value) != 0. + guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word"); + z_lr(temp, displacedHeader); + z_nill(temp, markOopDesc::monitor_value); + z_brne(object_has_monitor); + } + + // Set mark to markOop | markOopDesc::unlocked_value. + z_oill(displacedHeader, markOopDesc::unlocked_value); + + // Load Compare Value application register. + + // Initialize the box (must happen before we update the object mark). + z_stg(displacedHeader, BasicLock::displaced_header_offset_in_bytes(), box); + + // Memory Fence (in cmpxchgd) + // Compare object markOop with mark and if equal exchange scratch1 with object markOop. + + // If the compare-and-swap succeeded, then we found an unlocked object and we + // have now locked it. + z_csg(displacedHeader, box, 0, oop); + assert(currentHeader==displacedHeader, "must be same register"); // Identified two registers from z/Architecture. + z_bre(done); + + // We did not see an unlocked object so try the fast recursive case. + + z_sgr(currentHeader, Z_SP); + load_const_optimized(temp, (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place)); + + z_ngr(currentHeader, temp); + // z_brne(done); + // z_release(); + z_stg(currentHeader/*==0 or not 0*/, BasicLock::displaced_header_offset_in_bytes(), box); + + z_bru(done); + + if ((EmitSync & 0x01) == 0) { + Register zero = temp; + Register monitor_tagged = displacedHeader; // Tagged with markOopDesc::monitor_value. + bind(object_has_monitor); + // The object's monitor m is unlocked iff m->owner == NULL, + // otherwise m->owner may contain a thread or a stack address. + // + // Try to CAS m->owner from NULL to current thread. + z_lghi(zero, 0); + // If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ. + z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor_tagged); + // Store a non-null value into the box. + z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box); +#ifdef ASSERT + z_brne(done); + // We've acquired the monitor, check some invariants. + // Invariant 1: _recursions should be 0. + asm_assert_mem8_is_zero(OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), monitor_tagged, + "monitor->_recursions should be 0", -1); + z_ltgr(zero, zero); // Set CR=EQ. +#endif + } + bind(done); + + BLOCK_COMMENT("} compiler_fast_lock_object"); + // If locking was successful, CR should indicate 'EQ'. + // The compiler or the native wrapper generates a branch to the runtime call + // _complete_monitor_locking_Java. +} + +void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias) { + Register displacedHeader = temp1; + Register currentHeader = temp2; + Register temp = temp1; + Register monitor = temp2; + + Label done, object_has_monitor; + + BLOCK_COMMENT("compiler_fast_unlock_object {"); + + if (try_bias) { + biased_locking_exit(oop, currentHeader, done); + } + + // Find the lock address and load the displaced header from the stack. + // if the displaced header is zero, we have a recursive unlock. + load_and_test_long(displacedHeader, Address(box, BasicLock::displaced_header_offset_in_bytes())); + z_bre(done); + + // Handle existing monitor. + if ((EmitSync & 0x02) == 0) { + // The object has an existing monitor iff (mark & monitor_value) != 0. + z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop); + guarantee(Immediate::is_uimm16(markOopDesc::monitor_value), "must be half-word"); + z_nill(currentHeader, markOopDesc::monitor_value); + z_brne(object_has_monitor); + } + + // Check if it is still a light weight lock, this is true if we see + // the stack address of the basicLock in the markOop of the object + // copy box to currentHeader such that csg does not kill it. + z_lgr(currentHeader, box); + z_csg(currentHeader, displacedHeader, 0, oop); + z_bru(done); // Csg sets CR as desired. + + // Handle existing monitor. + if ((EmitSync & 0x02) == 0) { + bind(object_has_monitor); + z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop); // CurrentHeader is tagged with monitor_value set. + load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); + z_brne(done); + load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + z_brne(done); + load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); + z_brne(done); + load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); + z_brne(done); + z_release(); + z_stg(temp/*=0*/, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), currentHeader); + } + + bind(done); + + BLOCK_COMMENT("} compiler_fast_unlock_object"); + // flag == EQ indicates success + // flag == NE indicates failure +} + +// Write to card table for modification at store_addr - register is destroyed afterwards. +void MacroAssembler::card_write_barrier_post(Register store_addr, Register tmp) { + CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::CardTableForRS || + bs->kind() == BarrierSet::CardTableExtension, "wrong barrier"); + assert_different_registers(store_addr, tmp); + z_srlg(store_addr, store_addr, CardTableModRefBS::card_shift); + load_absolute_address(tmp, (address)bs->byte_map_base); + z_agr(store_addr, tmp); + z_mvi(0, store_addr, 0); // Store byte 0. +} + +#if INCLUDE_ALL_GCS + +//------------------------------------------------------ +// General G1 pre-barrier generator. +// Purpose: record the previous value if it is not null. +// All non-tmps are preserved. +//------------------------------------------------------ +void MacroAssembler::g1_write_barrier_pre(Register Robj, + RegisterOrConstant offset, + Register Rpre_val, // Ideally, this is a non-volatile register. + Register Rval, // Will be preserved. + Register Rtmp1, // If Rpre_val is volatile, either Rtmp1 + Register Rtmp2, // or Rtmp2 has to be non-volatile.. + bool pre_val_needed // Save Rpre_val across runtime call, caller uses it. + ) { + Label callRuntime, filtered; + const int active_offset = in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_active()); + const int buffer_offset = in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_buf()); + const int index_offset = in_bytes(JavaThread::satb_mark_queue_offset() + SATBMarkQueue::byte_offset_of_index()); + assert_different_registers(Rtmp1, Rtmp2, Z_R0_scratch); // None of the Rtmp must be Z_R0!! + + BLOCK_COMMENT("g1_write_barrier_pre {"); + + // Is marking active? + // Note: value is loaded for test purposes only. No further use here. + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + load_and_test_int(Rtmp1, Address(Z_thread, active_offset)); + } else { + guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + load_and_test_byte(Rtmp1, Address(Z_thread, active_offset)); + } + z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently. + + // Do we need to load the previous value into Rpre_val? + if (Robj != noreg) { + // Load the previous value... + Register ixReg = offset.is_register() ? offset.register_or_noreg() : Z_R0; + if (UseCompressedOops) { + z_llgf(Rpre_val, offset.constant_or_zero(), ixReg, Robj); + } else { + z_lg(Rpre_val, offset.constant_or_zero(), ixReg, Robj); + } + } + assert(Rpre_val != noreg, "must have a real register"); + + // Is the previous value NULL? + // Note: pre_val is loaded, decompressed and stored (directly or via runtime call). + // Register contents is preserved across runtime call if caller requests to do so. + z_ltgr(Rpre_val, Rpre_val); + z_bre(filtered); // previous value is NULL, so we don't need to record it. + + // Decode the oop now. We know it's not NULL. + if (Robj != noreg && UseCompressedOops) { + oop_decoder(Rpre_val, Rpre_val, /*maybeNULL=*/false); + } + + // OK, it's not filtered, so we'll need to call enqueue. + + // We can store the original value in the thread's buffer + // only if index > 0. Otherwise, we need runtime to handle. + // (The index field is typed as size_t.) + Register Rbuffer = Rtmp1, Rindex = Rtmp2; + + z_lg(Rbuffer, buffer_offset, Z_thread); + + load_and_test_long(Rindex, Address(Z_thread, index_offset)); + z_bre(callRuntime); // If index == 0, goto runtime. + + add2reg(Rindex, -wordSize); // Decrement index. + z_stg(Rindex, index_offset, Z_thread); + + // Record the previous value. + z_stg(Rpre_val, 0, Rbuffer, Rindex); + z_bru(filtered); // We are done. + + Rbuffer = noreg; // end of life + Rindex = noreg; // end of life + + bind(callRuntime); + + // Save Rpre_val (result) over runtime call. + // Requires Rtmp1, Rtmp2, or Rpre_val to be non-volatile. + Register Rpre_save = Rpre_val; + if (pre_val_needed && Rpre_val->is_volatile()) { + guarantee(!Rtmp1->is_volatile() || !Rtmp2->is_volatile(), "oops!"); + Rpre_save = !Rtmp1->is_volatile() ? Rtmp1 : Rtmp2; + } + lgr_if_needed(Rpre_save, Rpre_val); + + // Preserve inputs by spilling them into the top frame. + if (Robj != noreg && Robj->is_volatile()) { + z_stg(Robj, Robj->encoding()*BytesPerWord, Z_SP); + } + if (offset.is_register() && offset.as_register()->is_volatile()) { + Register Roff = offset.as_register(); + z_stg(Roff, Roff->encoding()*BytesPerWord, Z_SP); + } + if (Rval != noreg && Rval->is_volatile()) { + z_stg(Rval, Rval->encoding()*BytesPerWord, Z_SP); + } + + // Push frame to protect top frame with return pc and spilled register values. + save_return_pc(); + push_frame_abi160(0); // Will use Z_R0 as tmp on old CPUs. + + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), Rpre_val, Z_thread); + + pop_frame(); + restore_return_pc(); + + // Restore spilled values. + if (Robj != noreg && Robj->is_volatile()) { + z_lg(Robj, Robj->encoding()*BytesPerWord, Z_SP); + } + if (offset.is_register() && offset.as_register()->is_volatile()) { + Register Roff = offset.as_register(); + z_lg(Roff, Roff->encoding()*BytesPerWord, Z_SP); + } + if (Rval != noreg && Rval->is_volatile()) { + z_lg(Rval, Rval->encoding()*BytesPerWord, Z_SP); + } + + // Restore Rpre_val (result) after runtime call. + lgr_if_needed(Rpre_val, Rpre_save); + + bind(filtered); + BLOCK_COMMENT("} g1_write_barrier_pre"); +} + +// General G1 post-barrier generator. +// Purpose: Store cross-region card. +void MacroAssembler::g1_write_barrier_post(Register Rstore_addr, + Register Rnew_val, + Register Rtmp1, + Register Rtmp2, + Register Rtmp3) { + Label callRuntime, filtered; + + assert_different_registers(Rstore_addr, Rnew_val, Rtmp1, Rtmp2); // Most probably, Rnew_val == Rtmp3. + + G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier"); + + BLOCK_COMMENT("g1_write_barrier_post {"); + + // Does store cross heap regions? + // It does if the two addresses specify different grain addresses. + if (G1RSBarrierRegionFilter) { + if (VM_Version::has_DistinctOpnds()) { + z_xgrk(Rtmp1, Rstore_addr, Rnew_val); + } else { + z_lgr(Rtmp1, Rstore_addr); + z_xgr(Rtmp1, Rnew_val); + } + z_srag(Rtmp1, Rtmp1, HeapRegion::LogOfHRGrainBytes); + z_bre(filtered); + } + + // Crosses regions, storing NULL? +#ifdef ASSERT + z_ltgr(Rnew_val, Rnew_val); + asm_assert_ne("null oop not allowed (G1)", 0x255); // TODO: also on z? Checked by caller on PPC64, so following branch is obsolete: + z_bre(filtered); // Safety net: don't break if we have a NULL oop. +#endif + Rnew_val = noreg; // end of lifetime + + // Storing region crossing non-NULL, is card already dirty? + assert(sizeof(*bs->byte_map_base) == sizeof(jbyte), "adjust this code"); + assert_different_registers(Rtmp1, Rtmp2, Rtmp3); + // Make sure not to use Z_R0 for any of these registers. + Register Rcard_addr = (Rtmp1 != Z_R0_scratch) ? Rtmp1 : Rtmp3; + Register Rbase = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp3; + + // calculate address of card + load_const_optimized(Rbase, (address)bs->byte_map_base); // Card table base. + z_srlg(Rcard_addr, Rstore_addr, CardTableModRefBS::card_shift); // Index into card table. + add2reg_with_index(Rcard_addr, 0, Rcard_addr, Rbase); // Explicit calculation needed for cli. + Rbase = noreg; // end of lifetime + + // Filter young. + assert((unsigned int)G1SATBCardTableModRefBS::g1_young_card_val() <= 255, "otherwise check this code"); + z_cli(0, Rcard_addr, (int)G1SATBCardTableModRefBS::g1_young_card_val()); + z_bre(filtered); + + // Check the card value. If dirty, we're done. + // This also avoids false sharing of the (already dirty) card. + z_sync(); // Required to support concurrent cleaning. + assert((unsigned int)CardTableModRefBS::dirty_card_val() <= 255, "otherwise check this code"); + z_cli(0, Rcard_addr, CardTableModRefBS::dirty_card_val()); // Reload after membar. + z_bre(filtered); + + // Storing a region crossing, non-NULL oop, card is clean. + // Dirty card and log. + z_mvi(0, Rcard_addr, CardTableModRefBS::dirty_card_val()); + + Register Rcard_addr_x = Rcard_addr; + Register Rqueue_index = (Rtmp2 != Z_R0_scratch) ? Rtmp2 : Rtmp1; + Register Rqueue_buf = (Rtmp3 != Z_R0_scratch) ? Rtmp3 : Rtmp1; + const int qidx_off = in_bytes(JavaThread::dirty_card_queue_offset() + SATBMarkQueue::byte_offset_of_index()); + const int qbuf_off = in_bytes(JavaThread::dirty_card_queue_offset() + SATBMarkQueue::byte_offset_of_buf()); + if ((Rcard_addr == Rqueue_buf) || (Rcard_addr == Rqueue_index)) { + Rcard_addr_x = Z_R0_scratch; // Register shortage. We have to use Z_R0. + } + lgr_if_needed(Rcard_addr_x, Rcard_addr); + + load_and_test_long(Rqueue_index, Address(Z_thread, qidx_off)); + z_bre(callRuntime); // Index == 0 then jump to runtime. + + z_lg(Rqueue_buf, qbuf_off, Z_thread); + + add2reg(Rqueue_index, -wordSize); // Decrement index. + z_stg(Rqueue_index, qidx_off, Z_thread); + + z_stg(Rcard_addr_x, 0, Rqueue_index, Rqueue_buf); // Store card. + z_bru(filtered); + + bind(callRuntime); + + // TODO: do we need a frame? Introduced to be on the safe side. + bool needs_frame = true; + + // VM call need frame to access(write) O register. + if (needs_frame) { + save_return_pc(); + push_frame_abi160(0); // Will use Z_R0 as tmp on old CPUs. + } + + // Save the live input values. + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), Rcard_addr_x, Z_thread); + + if (needs_frame) { + pop_frame(); + restore_return_pc(); + } + + bind(filtered); + + BLOCK_COMMENT("} g1_write_barrier_post"); +} +#endif // INCLUDE_ALL_GCS + +// Last_Java_sp must comply to the rules in frame_s390.hpp. +void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc, bool allow_relocation) { + BLOCK_COMMENT("set_last_Java_frame {"); + + // Always set last_Java_pc and flags first because once last_Java_sp + // is visible has_last_Java_frame is true and users will look at the + // rest of the fields. (Note: flags should always be zero before we + // get here so doesn't need to be set.) + + // Verify that last_Java_pc was zeroed on return to Java. + if (allow_relocation) { + asm_assert_mem8_is_zero(in_bytes(JavaThread::last_Java_pc_offset()), + Z_thread, + "last_Java_pc not zeroed before leaving Java", + 0x200); + } else { + asm_assert_mem8_is_zero_static(in_bytes(JavaThread::last_Java_pc_offset()), + Z_thread, + "last_Java_pc not zeroed before leaving Java", + 0x200); + } + + // When returning from calling out from Java mode the frame anchor's + // last_Java_pc will always be set to NULL. It is set here so that + // if we are doing a call to native (not VM) that we capture the + // known pc and don't have to rely on the native call having a + // standard frame linkage where we can find the pc. + if (last_Java_pc!=noreg) { + z_stg(last_Java_pc, Address(Z_thread, JavaThread::last_Java_pc_offset())); + } + + // This membar release is not required on z/Architecture, since the sequence of stores + // in maintained. Nevertheless, we leave it in to document the required ordering. + // The implementation of z_release() should be empty. + // z_release(); + + z_stg(last_Java_sp, Address(Z_thread, JavaThread::last_Java_sp_offset())); + BLOCK_COMMENT("} set_last_Java_frame"); +} + +void MacroAssembler::reset_last_Java_frame(bool allow_relocation) { + BLOCK_COMMENT("reset_last_Java_frame {"); + + if (allow_relocation) { + asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()), + Z_thread, + "SP was not set, still zero", + 0x202); + } else { + asm_assert_mem8_isnot_zero_static(in_bytes(JavaThread::last_Java_sp_offset()), + Z_thread, + "SP was not set, still zero", + 0x202); + } + + // _last_Java_sp = 0 + // Clearing storage must be atomic here, so don't use clear_mem()! + store_const(Address(Z_thread, JavaThread::last_Java_sp_offset()), 0); + + // _last_Java_pc = 0 + store_const(Address(Z_thread, JavaThread::last_Java_pc_offset()), 0); + + BLOCK_COMMENT("} reset_last_Java_frame"); + return; +} + +void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1, bool allow_relocation) { + assert_different_registers(sp, tmp1); + + // We cannot trust that code generated by the C++ compiler saves R14 + // to z_abi_160.return_pc, because sometimes it spills R14 using stmg at + // z_abi_160.gpr14 (e.g. InterpreterRuntime::_new()). + // Therefore we load the PC into tmp1 and let set_last_Java_frame() save + // it into the frame anchor. + get_PC(tmp1); + set_last_Java_frame(/*sp=*/sp, /*pc=*/tmp1, allow_relocation); +} + +void MacroAssembler::set_thread_state(JavaThreadState new_state) { + z_release(); + + assert(Immediate::is_uimm16(_thread_max_state), "enum value out of range for instruction"); + assert(sizeof(JavaThreadState) == sizeof(int), "enum value must have base type int"); + store_const(Address(Z_thread, JavaThread::thread_state_offset()), new_state, Z_R0, false); +} + +void MacroAssembler::get_vm_result(Register oop_result) { + verify_thread(); + + z_lg(oop_result, Address(Z_thread, JavaThread::vm_result_offset())); + clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(void*)); + + verify_oop(oop_result); +} + +void MacroAssembler::get_vm_result_2(Register result) { + verify_thread(); + + z_lg(result, Address(Z_thread, JavaThread::vm_result_2_offset())); + clear_mem(Address(Z_thread, JavaThread::vm_result_2_offset()), sizeof(void*)); +} + +// We require that C code which does not return a value in vm_result will +// leave it undisturbed. +void MacroAssembler::set_vm_result(Register oop_result) { + z_stg(oop_result, Address(Z_thread, JavaThread::vm_result_offset())); +} + +// Explicit null checks (used for method handle code). +void MacroAssembler::null_check(Register reg, Register tmp, int64_t offset) { + if (!ImplicitNullChecks) { + NearLabel ok; + + compare64_and_branch(reg, (intptr_t) 0, Assembler::bcondNotEqual, ok); + + // We just put the address into reg if it was 0 (tmp==Z_R0 is allowed so we can't use it for the address). + address exception_entry = Interpreter::throw_NullPointerException_entry(); + load_absolute_address(reg, exception_entry); + z_br(reg); + + bind(ok); + } else { + if (needs_explicit_null_check((intptr_t)offset)) { + // Provoke OS NULL exception if reg = NULL by + // accessing M[reg] w/o changing any registers. + z_lg(tmp, 0, reg); + } + // else + // Nothing to do, (later) access of M[reg + offset] + // will provoke OS NULL exception if reg = NULL. + } +} + +//------------------------------------- +// Compressed Klass Pointers +//------------------------------------- + +// Klass oop manipulations if compressed. +void MacroAssembler::encode_klass_not_null(Register dst, Register src) { + Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided. (dst == src) also possible. + address base = Universe::narrow_klass_base(); + int shift = Universe::narrow_klass_shift(); + assert(UseCompressedClassPointers, "only for compressed klass ptrs"); + + BLOCK_COMMENT("cKlass encoder {"); + +#ifdef ASSERT + Label ok; + z_tmll(current, KlassAlignmentInBytes-1); // Check alignment. + z_brc(Assembler::bcondAllZero, ok); + // The plain disassembler does not recognize illtrap. It instead displays + // a 32-bit value. Issueing two illtraps assures the disassembler finds + // the proper beginning of the next instruction. + z_illtrap(0xee); + z_illtrap(0xee); + bind(ok); +#endif + + if (base != NULL) { + unsigned int base_h = ((unsigned long)base)>>32; + unsigned int base_l = (unsigned int)((unsigned long)base); + if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { + lgr_if_needed(dst, current); + z_aih(dst, -((int)base_h)); // Base has no set bits in lower half. + } else if ((base_h == 0) && (base_l != 0)) { + lgr_if_needed(dst, current); + z_agfi(dst, -(int)base_l); + } else { + load_const(Z_R0, base); + lgr_if_needed(dst, current); + z_sgr(dst, Z_R0); + } + current = dst; + } + if (shift != 0) { + assert (LogKlassAlignmentInBytes == shift, "decode alg wrong"); + z_srlg(dst, current, shift); + current = dst; + } + lgr_if_needed(dst, current); // Move may be required (if neither base nor shift != 0). + + BLOCK_COMMENT("} cKlass encoder"); +} + +// This function calculates the size of the code generated by +// decode_klass_not_null(register dst, Register src) +// when (Universe::heap() != NULL). Hence, if the instructions +// it generates change, then this method needs to be updated. +int MacroAssembler::instr_size_for_decode_klass_not_null() { + address base = Universe::narrow_klass_base(); + int shift_size = Universe::narrow_klass_shift() == 0 ? 0 : 6; /* sllg */ + int addbase_size = 0; + assert(UseCompressedClassPointers, "only for compressed klass ptrs"); + + if (base != NULL) { + unsigned int base_h = ((unsigned long)base)>>32; + unsigned int base_l = (unsigned int)((unsigned long)base); + if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { + addbase_size += 6; /* aih */ + } else if ((base_h == 0) && (base_l != 0)) { + addbase_size += 6; /* algfi */ + } else { + addbase_size += load_const_size(); + addbase_size += 4; /* algr */ + } + } +#ifdef ASSERT + addbase_size += 10; + addbase_size += 2; // Extra sigill. +#endif + return addbase_size + shift_size; +} + +// !!! If the instructions that get generated here change +// then function instr_size_for_decode_klass_not_null() +// needs to get updated. +// This variant of decode_klass_not_null() must generate predictable code! +// The code must only depend on globally known parameters. +void MacroAssembler::decode_klass_not_null(Register dst) { + address base = Universe::narrow_klass_base(); + int shift = Universe::narrow_klass_shift(); + int beg_off = offset(); + assert(UseCompressedClassPointers, "only for compressed klass ptrs"); + + BLOCK_COMMENT("cKlass decoder (const size) {"); + + if (shift != 0) { // Shift required? + z_sllg(dst, dst, shift); + } + if (base != NULL) { + unsigned int base_h = ((unsigned long)base)>>32; + unsigned int base_l = (unsigned int)((unsigned long)base); + if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { + z_aih(dst, base_h); // Base has no set bits in lower half. + } else if ((base_h == 0) && (base_l != 0)) { + z_algfi(dst, base_l); // Base has no set bits in upper half. + } else { + load_const(Z_R0, base); // Base has set bits everywhere. + z_algr(dst, Z_R0); + } + } + +#ifdef ASSERT + Label ok; + z_tmll(dst, KlassAlignmentInBytes-1); // Check alignment. + z_brc(Assembler::bcondAllZero, ok); + // The plain disassembler does not recognize illtrap. It instead displays + // a 32-bit value. Issueing two illtraps assures the disassembler finds + // the proper beginning of the next instruction. + z_illtrap(0xd1); + z_illtrap(0xd1); + bind(ok); +#endif + assert(offset() == beg_off + instr_size_for_decode_klass_not_null(), "Code gen mismatch."); + + BLOCK_COMMENT("} cKlass decoder (const size)"); +} + +// This variant of decode_klass_not_null() is for cases where +// 1) the size of the generated instructions may vary +// 2) the result is (potentially) stored in a register different from the source. +void MacroAssembler::decode_klass_not_null(Register dst, Register src) { + address base = Universe::narrow_klass_base(); + int shift = Universe::narrow_klass_shift(); + assert(UseCompressedClassPointers, "only for compressed klass ptrs"); + + BLOCK_COMMENT("cKlass decoder {"); + + if (src == noreg) src = dst; + + if (shift != 0) { // Shift or at least move required? + z_sllg(dst, src, shift); + } else { + lgr_if_needed(dst, src); + } + + if (base != NULL) { + unsigned int base_h = ((unsigned long)base)>>32; + unsigned int base_l = (unsigned int)((unsigned long)base); + if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { + z_aih(dst, base_h); // Base has not set bits in lower half. + } else if ((base_h == 0) && (base_l != 0)) { + z_algfi(dst, base_l); // Base has no set bits in upper half. + } else { + load_const_optimized(Z_R0, base); // Base has set bits everywhere. + z_algr(dst, Z_R0); + } + } + +#ifdef ASSERT + Label ok; + z_tmll(dst, KlassAlignmentInBytes-1); // Check alignment. + z_brc(Assembler::bcondAllZero, ok); + // The plain disassembler does not recognize illtrap. It instead displays + // a 32-bit value. Issueing two illtraps assures the disassembler finds + // the proper beginning of the next instruction. + z_illtrap(0xd2); + z_illtrap(0xd2); + bind(ok); +#endif + BLOCK_COMMENT("} cKlass decoder"); +} + +void MacroAssembler::load_klass(Register klass, Address mem) { + if (UseCompressedClassPointers) { + z_llgf(klass, mem); + // Attention: no null check here! + decode_klass_not_null(klass); + } else { + z_lg(klass, mem); + } +} + +void MacroAssembler::load_klass(Register klass, Register src_oop) { + if (UseCompressedClassPointers) { + z_llgf(klass, oopDesc::klass_offset_in_bytes(), src_oop); + // Attention: no null check here! + decode_klass_not_null(klass); + } else { + z_lg(klass, oopDesc::klass_offset_in_bytes(), src_oop); + } +} + +void MacroAssembler::load_prototype_header(Register Rheader, Register Rsrc_oop) { + assert_different_registers(Rheader, Rsrc_oop); + load_klass(Rheader, Rsrc_oop); + z_lg(Rheader, Address(Rheader, Klass::prototype_header_offset())); +} + +void MacroAssembler::store_klass(Register klass, Register dst_oop, Register ck) { + if (UseCompressedClassPointers) { + assert_different_registers(dst_oop, klass, Z_R0); + if (ck == noreg) ck = klass; + encode_klass_not_null(ck, klass); + z_st(ck, Address(dst_oop, oopDesc::klass_offset_in_bytes())); + } else { + z_stg(klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); + } +} + +void MacroAssembler::store_klass_gap(Register s, Register d) { + if (UseCompressedClassPointers) { + assert(s != d, "not enough registers"); + z_st(s, Address(d, oopDesc::klass_gap_offset_in_bytes())); + } +} + +// Compare klass ptr in memory against klass ptr in register. +// +// Rop1 - klass in register, always uncompressed. +// disp - Offset of klass in memory, compressed/uncompressed, depending on runtime flag. +// Rbase - Base address of cKlass in memory. +// maybeNULL - True if Rop1 possibly is a NULL. +void MacroAssembler::compare_klass_ptr(Register Rop1, int64_t disp, Register Rbase, bool maybeNULL) { + + BLOCK_COMMENT("compare klass ptr {"); + + if (UseCompressedClassPointers) { + const int shift = Universe::narrow_klass_shift(); + address base = Universe::narrow_klass_base(); + + assert((shift == 0) || (shift == LogKlassAlignmentInBytes), "cKlass encoder detected bad shift"); + assert_different_registers(Rop1, Z_R0); + assert_different_registers(Rop1, Rbase, Z_R1); + + // First encode register oop and then compare with cOop in memory. + // This sequence saves an unnecessary cOop load and decode. + if (base == NULL) { + if (shift == 0) { + z_cl(Rop1, disp, Rbase); // Unscaled + } else { + z_srlg(Z_R0, Rop1, shift); // ZeroBased + z_cl(Z_R0, disp, Rbase); + } + } else { // HeapBased +#ifdef ASSERT + bool used_R0 = true; + bool used_R1 = true; +#endif + Register current = Rop1; + Label done; + + if (maybeNULL) { // NULL ptr must be preserved! + z_ltgr(Z_R0, current); + z_bre(done); + current = Z_R0; + } + + unsigned int base_h = ((unsigned long)base)>>32; + unsigned int base_l = (unsigned int)((unsigned long)base); + if ((base_h != 0) && (base_l == 0) && VM_Version::has_HighWordInstr()) { + lgr_if_needed(Z_R0, current); + z_aih(Z_R0, -((int)base_h)); // Base has no set bits in lower half. + } else if ((base_h == 0) && (base_l != 0)) { + lgr_if_needed(Z_R0, current); + z_agfi(Z_R0, -(int)base_l); + } else { + int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base)); + add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); // Subtract base by adding complement. + } + + if (shift != 0) { + z_srlg(Z_R0, Z_R0, shift); + } + bind(done); + z_cl(Z_R0, disp, Rbase); +#ifdef ASSERT + if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2); + if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2); +#endif + } + } else { + z_clg(Rop1, disp, Z_R0, Rbase); + } + BLOCK_COMMENT("} compare klass ptr"); +} + +//--------------------------- +// Compressed oops +//--------------------------- + +void MacroAssembler::encode_heap_oop(Register oop) { + oop_encoder(oop, oop, true /*maybe null*/); +} + +void MacroAssembler::encode_heap_oop_not_null(Register oop) { + oop_encoder(oop, oop, false /*not null*/); +} + +// Called with something derived from the oop base. e.g. oop_base>>3. +int MacroAssembler::get_oop_base_pow2_offset(uint64_t oop_base) { + unsigned int oop_base_ll = ((unsigned int)(oop_base >> 0)) & 0xffff; + unsigned int oop_base_lh = ((unsigned int)(oop_base >> 16)) & 0xffff; + unsigned int oop_base_hl = ((unsigned int)(oop_base >> 32)) & 0xffff; + unsigned int oop_base_hh = ((unsigned int)(oop_base >> 48)) & 0xffff; + unsigned int n_notzero_parts = (oop_base_ll == 0 ? 0:1) + + (oop_base_lh == 0 ? 0:1) + + (oop_base_hl == 0 ? 0:1) + + (oop_base_hh == 0 ? 0:1); + + assert(oop_base != 0, "This is for HeapBased cOops only"); + + if (n_notzero_parts != 1) { // Check if oop_base is just a few pages shy of a power of 2. + uint64_t pow2_offset = 0x10000 - oop_base_ll; + if (pow2_offset < 0x8000) { // This might not be necessary. + uint64_t oop_base2 = oop_base + pow2_offset; + + oop_base_ll = ((unsigned int)(oop_base2 >> 0)) & 0xffff; + oop_base_lh = ((unsigned int)(oop_base2 >> 16)) & 0xffff; + oop_base_hl = ((unsigned int)(oop_base2 >> 32)) & 0xffff; + oop_base_hh = ((unsigned int)(oop_base2 >> 48)) & 0xffff; + n_notzero_parts = (oop_base_ll == 0 ? 0:1) + + (oop_base_lh == 0 ? 0:1) + + (oop_base_hl == 0 ? 0:1) + + (oop_base_hh == 0 ? 0:1); + if (n_notzero_parts == 1) { + assert(-(int64_t)pow2_offset != (int64_t)-1, "We use -1 to signal uninitialized base register"); + return -pow2_offset; + } + } + } + return 0; +} + +// If base address is offset from a straight power of two by just a few pages, +// return this offset to the caller for a possible later composite add. +// TODO/FIX: will only work correctly for 4k pages. +int MacroAssembler::get_oop_base(Register Rbase, uint64_t oop_base) { + int pow2_offset = get_oop_base_pow2_offset(oop_base); + + load_const_optimized(Rbase, oop_base - pow2_offset); // Best job possible. + + return pow2_offset; +} + +int MacroAssembler::get_oop_base_complement(Register Rbase, uint64_t oop_base) { + int offset = get_oop_base(Rbase, oop_base); + z_lcgr(Rbase, Rbase); + return -offset; +} + +// Compare compressed oop in memory against oop in register. +// Rop1 - Oop in register. +// disp - Offset of cOop in memory. +// Rbase - Base address of cOop in memory. +// maybeNULL - True if Rop1 possibly is a NULL. +// maybeNULLtarget - Branch target for Rop1 == NULL, if flow control shall NOT continue with compare instruction. +void MacroAssembler::compare_heap_oop(Register Rop1, Address mem, bool maybeNULL) { + Register Rbase = mem.baseOrR0(); + Register Rindex = mem.indexOrR0(); + int64_t disp = mem.disp(); + + const int shift = Universe::narrow_oop_shift(); + address base = Universe::narrow_oop_base(); + + assert(UseCompressedOops, "must be on to call this method"); + assert(Universe::heap() != NULL, "java heap must be initialized to call this method"); + assert((shift == 0) || (shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift"); + assert_different_registers(Rop1, Z_R0); + assert_different_registers(Rop1, Rbase, Z_R1); + assert_different_registers(Rop1, Rindex, Z_R1); + + BLOCK_COMMENT("compare heap oop {"); + + // First encode register oop and then compare with cOop in memory. + // This sequence saves an unnecessary cOop load and decode. + if (base == NULL) { + if (shift == 0) { + z_cl(Rop1, disp, Rindex, Rbase); // Unscaled + } else { + z_srlg(Z_R0, Rop1, shift); // ZeroBased + z_cl(Z_R0, disp, Rindex, Rbase); + } + } else { // HeapBased +#ifdef ASSERT + bool used_R0 = true; + bool used_R1 = true; +#endif + Label done; + int pow2_offset = get_oop_base_complement(Z_R1, ((uint64_t)(intptr_t)base)); + + if (maybeNULL) { // NULL ptr must be preserved! + z_ltgr(Z_R0, Rop1); + z_bre(done); + } + + add2reg_with_index(Z_R0, pow2_offset, Z_R1, Rop1); + z_srlg(Z_R0, Z_R0, shift); + + bind(done); + z_cl(Z_R0, disp, Rindex, Rbase); +#ifdef ASSERT + if (used_R0) preset_reg(Z_R0, 0xb05bUL, 2); + if (used_R1) preset_reg(Z_R1, 0xb06bUL, 2); +#endif + } + BLOCK_COMMENT("} compare heap oop"); +} + +// Load heap oop and decompress, if necessary. +void MacroAssembler::load_heap_oop(Register dest, const Address &a) { + if (UseCompressedOops) { + z_llgf(dest, a.disp(), a.indexOrR0(), a.baseOrR0()); + oop_decoder(dest, dest, true); + } else { + z_lg(dest, a.disp(), a.indexOrR0(), a.baseOrR0()); + } +} + +// Load heap oop and decompress, if necessary. +void MacroAssembler::load_heap_oop(Register dest, int64_t disp, Register base) { + if (UseCompressedOops) { + z_llgf(dest, disp, base); + oop_decoder(dest, dest, true); + } else { + z_lg(dest, disp, base); + } +} + +// Load heap oop and decompress, if necessary. +void MacroAssembler::load_heap_oop_not_null(Register dest, int64_t disp, Register base) { + if (UseCompressedOops) { + z_llgf(dest, disp, base); + oop_decoder(dest, dest, false); + } else { + z_lg(dest, disp, base); + } +} + +// Compress, if necessary, and store oop to heap. +void MacroAssembler::store_heap_oop(Register Roop, RegisterOrConstant offset, Register base) { + Register Ridx = offset.is_register() ? offset.register_or_noreg() : Z_R0; + if (UseCompressedOops) { + assert_different_registers(Roop, offset.register_or_noreg(), base); + encode_heap_oop(Roop); + z_st(Roop, offset.constant_or_zero(), Ridx, base); + } else { + z_stg(Roop, offset.constant_or_zero(), Ridx, base); + } +} + +// Compress, if necessary, and store oop to heap. Oop is guaranteed to be not NULL. +void MacroAssembler::store_heap_oop_not_null(Register Roop, RegisterOrConstant offset, Register base) { + Register Ridx = offset.is_register() ? offset.register_or_noreg() : Z_R0; + if (UseCompressedOops) { + assert_different_registers(Roop, offset.register_or_noreg(), base); + encode_heap_oop_not_null(Roop); + z_st(Roop, offset.constant_or_zero(), Ridx, base); + } else { + z_stg(Roop, offset.constant_or_zero(), Ridx, base); + } +} + +// Store NULL oop to heap. +void MacroAssembler::store_heap_oop_null(Register zero, RegisterOrConstant offset, Register base) { + Register Ridx = offset.is_register() ? offset.register_or_noreg() : Z_R0; + if (UseCompressedOops) { + z_st(zero, offset.constant_or_zero(), Ridx, base); + } else { + z_stg(zero, offset.constant_or_zero(), Ridx, base); + } +} + +//------------------------------------------------- +// Encode compressed oop. Generally usable encoder. +//------------------------------------------------- +// Rsrc - contains regular oop on entry. It remains unchanged. +// Rdst - contains compressed oop on exit. +// Rdst and Rsrc may indicate same register, in which case Rsrc does not remain unchanged. +// +// Rdst must not indicate scratch register Z_R1 (Z_R1_scratch) for functionality. +// Rdst should not indicate scratch register Z_R0 (Z_R0_scratch) for performance. +// +// only32bitValid is set, if later code only uses the lower 32 bits. In this +// case we must not fix the upper 32 bits. +void MacroAssembler::oop_encoder(Register Rdst, Register Rsrc, bool maybeNULL, + Register Rbase, int pow2_offset, bool only32bitValid) { + + const address oop_base = Universe::narrow_oop_base(); + const int oop_shift = Universe::narrow_oop_shift(); + const bool disjoint = Universe::narrow_oop_base_disjoint(); + + assert(UseCompressedOops, "must be on to call this method"); + assert(Universe::heap() != NULL, "java heap must be initialized to call this encoder"); + assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes), "cOop encoder detected bad shift"); + + if (disjoint || (oop_base == NULL)) { + BLOCK_COMMENT("cOop encoder zeroBase {"); + if (oop_shift == 0) { + if (oop_base != NULL && !only32bitValid) { + z_llgfr(Rdst, Rsrc); // Clear upper bits in case the register will be decoded again. + } else { + lgr_if_needed(Rdst, Rsrc); + } + } else { + z_srlg(Rdst, Rsrc, oop_shift); + if (oop_base != NULL && !only32bitValid) { + z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again. + } + } + BLOCK_COMMENT("} cOop encoder zeroBase"); + return; + } + + bool used_R0 = false; + bool used_R1 = false; + + BLOCK_COMMENT("cOop encoder general {"); + assert_different_registers(Rdst, Z_R1); + assert_different_registers(Rsrc, Rbase); + if (maybeNULL) { + Label done; + // We reorder shifting and subtracting, so that we can compare + // and shift in parallel: + // + // cycle 0: potential LoadN, base = + // cycle 1: base = !base dst = src >> 3, cmp cr = (src != 0) + // cycle 2: if (cr) br, dst = dst + base + offset + + // Get oop_base components. + if (pow2_offset == -1) { + if (Rdst == Rbase) { + if (Rdst == Z_R1 || Rsrc == Z_R1) { + Rbase = Z_R0; + used_R0 = true; + } else { + Rdst = Z_R1; + used_R1 = true; + } + } + if (Rbase == Z_R1) { + used_R1 = true; + } + pow2_offset = get_oop_base_complement(Rbase, ((uint64_t)(intptr_t)oop_base) >> oop_shift); + } + assert_different_registers(Rdst, Rbase); + + // Check for NULL oop (must be left alone) and shift. + if (oop_shift != 0) { // Shift out alignment bits + if (((intptr_t)oop_base&0xc000000000000000L) == 0L) { // We are sure: no single address will have the leftmost bit set. + z_srag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code. + } else { + z_srlg(Rdst, Rsrc, oop_shift); + z_ltgr(Rsrc, Rsrc); // This is the recommended way of testing for zero. + // This probably is faster, as it does not write a register. No! + // z_cghi(Rsrc, 0); + } + } else { + z_ltgr(Rdst, Rsrc); // Move NULL to result register. + } + z_bre(done); + + // Subtract oop_base components. + if ((Rdst == Z_R0) || (Rbase == Z_R0)) { + z_algr(Rdst, Rbase); + if (pow2_offset != 0) { add2reg(Rdst, pow2_offset); } + } else { + add2reg_with_index(Rdst, pow2_offset, Rbase, Rdst); + } + if (!only32bitValid) { + z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again. + } + bind(done); + + } else { // not null + // Get oop_base components. + if (pow2_offset == -1) { + pow2_offset = get_oop_base_complement(Rbase, (uint64_t)(intptr_t)oop_base); + } + + // Subtract oop_base components and shift. + if (Rdst == Z_R0 || Rsrc == Z_R0 || Rbase == Z_R0) { + // Don't use lay instruction. + if (Rdst == Rsrc) { + z_algr(Rdst, Rbase); + } else { + lgr_if_needed(Rdst, Rbase); + z_algr(Rdst, Rsrc); + } + if (pow2_offset != 0) add2reg(Rdst, pow2_offset); + } else { + add2reg_with_index(Rdst, pow2_offset, Rbase, Rsrc); + } + if (oop_shift != 0) { // Shift out alignment bits. + z_srlg(Rdst, Rdst, oop_shift); + } + if (!only32bitValid) { + z_llgfr(Rdst, Rdst); // Clear upper bits in case the register will be decoded again. + } + } +#ifdef ASSERT + if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb01bUL, 2); } + if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb02bUL, 2); } +#endif + BLOCK_COMMENT("} cOop encoder general"); +} + +//------------------------------------------------- +// decode compressed oop. Generally usable decoder. +//------------------------------------------------- +// Rsrc - contains compressed oop on entry. +// Rdst - contains regular oop on exit. +// Rdst and Rsrc may indicate same register. +// Rdst must not be the same register as Rbase, if Rbase was preloaded (before call). +// Rdst can be the same register as Rbase. Then, either Z_R0 or Z_R1 must be available as scratch. +// Rbase - register to use for the base +// pow2_offset - offset of base to nice value. If -1, base must be loaded. +// For performance, it is good to +// - avoid Z_R0 for any of the argument registers. +// - keep Rdst and Rsrc distinct from Rbase. Rdst == Rsrc is ok for performance. +// - avoid Z_R1 for Rdst if Rdst == Rbase. +void MacroAssembler::oop_decoder(Register Rdst, Register Rsrc, bool maybeNULL, Register Rbase, int pow2_offset) { + + const address oop_base = Universe::narrow_oop_base(); + const int oop_shift = Universe::narrow_oop_shift(); + const bool disjoint = Universe::narrow_oop_base_disjoint(); + + assert(UseCompressedOops, "must be on to call this method"); + assert(Universe::heap() != NULL, "java heap must be initialized to call this decoder"); + assert((oop_shift == 0) || (oop_shift == LogMinObjAlignmentInBytes), + "cOop encoder detected bad shift"); + + // cOops are always loaded zero-extended from memory. No explicit zero-extension necessary. + + if (oop_base != NULL) { + unsigned int oop_base_hl = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xffff; + unsigned int oop_base_hh = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 48)) & 0xffff; + unsigned int oop_base_hf = ((unsigned int)((uint64_t)(intptr_t)oop_base >> 32)) & 0xFFFFffff; + if (disjoint && (oop_base_hl == 0 || oop_base_hh == 0)) { + BLOCK_COMMENT("cOop decoder disjointBase {"); + // We do not need to load the base. Instead, we can install the upper bits + // with an OR instead of an ADD. + Label done; + + // Rsrc contains a narrow oop. Thus we are sure the leftmost bits will never be set. + if (maybeNULL) { // NULL ptr must be preserved! + z_slag(Rdst, Rsrc, oop_shift); // Arithmetic shift sets the condition code. + z_bre(done); + } else { + z_sllg(Rdst, Rsrc, oop_shift); // Logical shift leaves condition code alone. + } + if ((oop_base_hl != 0) && (oop_base_hh != 0)) { + z_oihf(Rdst, oop_base_hf); + } else if (oop_base_hl != 0) { + z_oihl(Rdst, oop_base_hl); + } else { + assert(oop_base_hh != 0, "not heapbased mode"); + z_oihh(Rdst, oop_base_hh); + } + bind(done); + BLOCK_COMMENT("} cOop decoder disjointBase"); + } else { + BLOCK_COMMENT("cOop decoder general {"); + // There are three decode steps: + // scale oop offset (shift left) + // get base (in reg) and pow2_offset (constant) + // add base, pow2_offset, and oop offset + // The following register overlap situations may exist: + // Rdst == Rsrc, Rbase any other + // not a problem. Scaling in-place leaves Rbase undisturbed. + // Loading Rbase does not impact the scaled offset. + // Rdst == Rbase, Rsrc any other + // scaling would destroy a possibly preloaded Rbase. Loading Rbase + // would destroy the scaled offset. + // Remedy: use Rdst_tmp if Rbase has been preloaded. + // use Rbase_tmp if base has to be loaded. + // Rsrc == Rbase, Rdst any other + // Only possible without preloaded Rbase. + // Loading Rbase does not destroy compressed oop because it was scaled into Rdst before. + // Rsrc == Rbase, Rdst == Rbase + // Only possible without preloaded Rbase. + // Loading Rbase would destroy compressed oop. Scaling in-place is ok. + // Remedy: use Rbase_tmp. + // + Label done; + Register Rdst_tmp = Rdst; + Register Rbase_tmp = Rbase; + bool used_R0 = false; + bool used_R1 = false; + bool base_preloaded = pow2_offset >= 0; + guarantee(!(base_preloaded && (Rsrc == Rbase)), "Register clash, check caller"); + assert(oop_shift != 0, "room for optimization"); + + // Check if we need to use scratch registers. + if (Rdst == Rbase) { + assert(!(((Rdst == Z_R0) && (Rsrc == Z_R1)) || ((Rdst == Z_R1) && (Rsrc == Z_R0))), "need a scratch reg"); + if (Rdst != Rsrc) { + if (base_preloaded) { Rdst_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; } + else { Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; } + } else { + Rbase_tmp = (Rdst == Z_R1) ? Z_R0 : Z_R1; + } + } + if (base_preloaded) lgr_if_needed(Rbase_tmp, Rbase); + + // Scale oop and check for NULL. + // Rsrc contains a narrow oop. Thus we are sure the leftmost bits will never be set. + if (maybeNULL) { // NULL ptr must be preserved! + z_slag(Rdst_tmp, Rsrc, oop_shift); // Arithmetic shift sets the condition code. + z_bre(done); + } else { + z_sllg(Rdst_tmp, Rsrc, oop_shift); // Logical shift leaves condition code alone. + } + + // Get oop_base components. + if (!base_preloaded) { + pow2_offset = get_oop_base(Rbase_tmp, (uint64_t)(intptr_t)oop_base); + } + + // Add up all components. + if ((Rbase_tmp == Z_R0) || (Rdst_tmp == Z_R0)) { + z_algr(Rdst_tmp, Rbase_tmp); + if (pow2_offset != 0) { add2reg(Rdst_tmp, pow2_offset); } + } else { + add2reg_with_index(Rdst_tmp, pow2_offset, Rbase_tmp, Rdst_tmp); + } + + bind(done); + lgr_if_needed(Rdst, Rdst_tmp); +#ifdef ASSERT + if (used_R0 && Rdst != Z_R0 && Rsrc != Z_R0) { preset_reg(Z_R0, 0xb03bUL, 2); } + if (used_R1 && Rdst != Z_R1 && Rsrc != Z_R1) { preset_reg(Z_R1, 0xb04bUL, 2); } +#endif + BLOCK_COMMENT("} cOop decoder general"); + } + } else { + BLOCK_COMMENT("cOop decoder zeroBase {"); + if (oop_shift == 0) { + lgr_if_needed(Rdst, Rsrc); + } else { + z_sllg(Rdst, Rsrc, oop_shift); + } + BLOCK_COMMENT("} cOop decoder zeroBase"); + } +} + +void MacroAssembler::load_mirror(Register mirror, Register method) { + mem2reg_opt(mirror, Address(method, Method::const_offset())); + mem2reg_opt(mirror, Address(mirror, ConstMethod::constants_offset())); + mem2reg_opt(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes())); + mem2reg_opt(mirror, Address(mirror, Klass::java_mirror_offset())); +} + +//--------------------------------------------------------------- +//--- Operations on arrays. +//--------------------------------------------------------------- + +// Compiler ensures base is doubleword aligned and cnt is #doublewords. +// Emitter does not KILL cnt and base arguments, since they need to be copied to +// work registers anyway. +// Actually, only r0, r1, and r5 are killed. +unsigned int MacroAssembler::Clear_Array(Register cnt_arg, Register base_pointer_arg, Register src_addr, Register src_len) { + // Src_addr is evenReg. + // Src_len is odd_Reg. + + int block_start = offset(); + Register tmp_reg = src_len; // Holds target instr addr for EX. + Register dst_len = Z_R1; // Holds dst len for MVCLE. + Register dst_addr = Z_R0; // Holds dst addr for MVCLE. + + Label doXC, doMVCLE, done; + + BLOCK_COMMENT("Clear_Array {"); + + // Check for zero len and convert to long. + z_ltgfr(src_len, cnt_arg); // Remember casted value for doSTG case. + z_bre(done); // Nothing to do if len == 0. + + // Prefetch data to be cleared. + if (VM_Version::has_Prefetch()) { + z_pfd(0x02, 0, Z_R0, base_pointer_arg); + z_pfd(0x02, 256, Z_R0, base_pointer_arg); + } + + z_sllg(dst_len, src_len, 3); // #bytes to clear. + z_cghi(src_len, 32); // Check for len <= 256 bytes (<=32 DW). + z_brnh(doXC); // If so, use executed XC to clear. + + // MVCLE: initialize long arrays (general case). + bind(doMVCLE); + z_lgr(dst_addr, base_pointer_arg); + clear_reg(src_len, true, false); // Src len of MVCLE is zero. + + MacroAssembler::move_long_ext(dst_addr, src_addr, 0); + z_bru(done); + + // XC: initialize short arrays. + Label XC_template; // Instr template, never exec directly! + bind(XC_template); + z_xc(0,0,base_pointer_arg,0,base_pointer_arg); + + bind(doXC); + add2reg(dst_len, -1); // Get #bytes-1 for EXECUTE. + if (VM_Version::has_ExecuteExtensions()) { + z_exrl(dst_len, XC_template); // Execute XC with var. len. + } else { + z_larl(tmp_reg, XC_template); + z_ex(dst_len,0,Z_R0,tmp_reg); // Execute XC with var. len. + } + // z_bru(done); // fallthru + + bind(done); + + BLOCK_COMMENT("} Clear_Array"); + + int block_end = offset(); + return block_end - block_start; +} + +// Compiler ensures base is doubleword aligned and cnt is count of doublewords. +// Emitter does not KILL any arguments nor work registers. +// Emitter generates up to 16 XC instructions, depending on the array length. +unsigned int MacroAssembler::Clear_Array_Const(long cnt, Register base) { + int block_start = offset(); + int off; + int lineSize_Bytes = AllocatePrefetchStepSize; + int lineSize_DW = AllocatePrefetchStepSize>>LogBytesPerWord; + bool doPrefetch = VM_Version::has_Prefetch(); + int XC_maxlen = 256; + int numXCInstr = cnt > 0 ? (cnt*BytesPerWord-1)/XC_maxlen+1 : 0; + + BLOCK_COMMENT("Clear_Array_Const {"); + assert(cnt*BytesPerWord <= 4096, "ClearArrayConst can handle 4k only"); + + // Do less prefetching for very short arrays. + if (numXCInstr > 0) { + // Prefetch only some cache lines, then begin clearing. + if (doPrefetch) { + if (cnt*BytesPerWord <= lineSize_Bytes/4) { // If less than 1/4 of a cache line to clear, + z_pfd(0x02, 0, Z_R0, base); // prefetch just the first cache line. + } else { + assert(XC_maxlen == lineSize_Bytes, "ClearArrayConst needs 256B cache lines"); + for (off = 0; (off < AllocatePrefetchLines) && (off <= numXCInstr); off ++) { + z_pfd(0x02, off*lineSize_Bytes, Z_R0, base); + } + } + } + + for (off=0; off<(numXCInstr-1); off++) { + z_xc(off*XC_maxlen, XC_maxlen-1, base, off*XC_maxlen, base); + + // Prefetch some cache lines in advance. + if (doPrefetch && (off <= numXCInstr-AllocatePrefetchLines)) { + z_pfd(0x02, (off+AllocatePrefetchLines)*lineSize_Bytes, Z_R0, base); + } + } + if (off*XC_maxlen < cnt*BytesPerWord) { + z_xc(off*XC_maxlen, (cnt*BytesPerWord-off*XC_maxlen)-1, base, off*XC_maxlen, base); + } + } + BLOCK_COMMENT("} Clear_Array_Const"); + + int block_end = offset(); + return block_end - block_start; +} + +// Compiler ensures base is doubleword aligned and cnt is #doublewords. +// Emitter does not KILL cnt and base arguments, since they need to be copied to +// work registers anyway. +// Actually, only r0, r1, r4, and r5 (which are work registers) are killed. +// +// For very large arrays, exploit MVCLE H/W support. +// MVCLE instruction automatically exploits H/W-optimized page mover. +// - Bytes up to next page boundary are cleared with a series of XC to self. +// - All full pages are cleared with the page mover H/W assist. +// - Remaining bytes are again cleared by a series of XC to self. +// +unsigned int MacroAssembler::Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register src_addr, Register src_len) { + // Src_addr is evenReg. + // Src_len is odd_Reg. + + int block_start = offset(); + Register dst_len = Z_R1; // Holds dst len for MVCLE. + Register dst_addr = Z_R0; // Holds dst addr for MVCLE. + + BLOCK_COMMENT("Clear_Array_Const_Big {"); + + // Get len to clear. + load_const_optimized(dst_len, (long)cnt*8L); // in Bytes = #DW*8 + + // Prepare other args to MVCLE. + z_lgr(dst_addr, base_pointer_arg); + // Indicate unused result. + (void) clear_reg(src_len, true, false); // Src len of MVCLE is zero. + + // Clear. + MacroAssembler::move_long_ext(dst_addr, src_addr, 0); + BLOCK_COMMENT("} Clear_Array_Const_Big"); + + int block_end = offset(); + return block_end - block_start; +} + +// Allocator. +unsigned int MacroAssembler::CopyRawMemory_AlignedDisjoint(Register src_reg, Register dst_reg, + Register cnt_reg, + Register tmp1_reg, Register tmp2_reg) { + // Tmp1 is oddReg. + // Tmp2 is evenReg. + + int block_start = offset(); + Label doMVC, doMVCLE, done, MVC_template; + + BLOCK_COMMENT("CopyRawMemory_AlignedDisjoint {"); + + // Check for zero len and convert to long. + z_ltgfr(cnt_reg, cnt_reg); // Remember casted value for doSTG case. + z_bre(done); // Nothing to do if len == 0. + + z_sllg(Z_R1, cnt_reg, 3); // Dst len in bytes. calc early to have the result ready. + + z_cghi(cnt_reg, 32); // Check for len <= 256 bytes (<=32 DW). + z_brnh(doMVC); // If so, use executed MVC to clear. + + bind(doMVCLE); // A lot of data (more than 256 bytes). + // Prep dest reg pair. + z_lgr(Z_R0, dst_reg); // dst addr + // Dst len already in Z_R1. + // Prep src reg pair. + z_lgr(tmp2_reg, src_reg); // src addr + z_lgr(tmp1_reg, Z_R1); // Src len same as dst len. + + // Do the copy. + move_long_ext(Z_R0, tmp2_reg, 0xb0); // Bypass cache. + z_bru(done); // All done. + + bind(MVC_template); // Just some data (not more than 256 bytes). + z_mvc(0, 0, dst_reg, 0, src_reg); + + bind(doMVC); + + if (VM_Version::has_ExecuteExtensions()) { + add2reg(Z_R1, -1); + } else { + add2reg(tmp1_reg, -1, Z_R1); + z_larl(Z_R1, MVC_template); + } + + if (VM_Version::has_Prefetch()) { + z_pfd(1, 0,Z_R0,src_reg); + z_pfd(2, 0,Z_R0,dst_reg); + // z_pfd(1,256,Z_R0,src_reg); // Assume very short copy. + // z_pfd(2,256,Z_R0,dst_reg); + } + + if (VM_Version::has_ExecuteExtensions()) { + z_exrl(Z_R1, MVC_template); + } else { + z_ex(tmp1_reg, 0, Z_R0, Z_R1); + } + + bind(done); + + BLOCK_COMMENT("} CopyRawMemory_AlignedDisjoint"); + + int block_end = offset(); + return block_end - block_start; +} + +//------------------------------------------------------ +// Special String Intrinsics. Implementation +//------------------------------------------------------ + +// Intrinsics for CompactStrings + +// Compress char[] to byte[]. odd_reg contains cnt. Kills dst. Early clobber: result +// The result is the number of characters copied before the first incompatible character was found. +// If tmp2 is provided and the compression fails, the compression stops exactly at this point and the result is precise. +// +// Note: Does not behave exactly like package private StringUTF16 compress java implementation in case of failure: +// - Different number of characters may have been written to dead array (if tmp2 not provided). +// - Returns a number encoding()%2 == 0, "must be even reg"); + assert(cnt->encoding()%2 == 1, "must be odd reg"); + assert(cnt->encoding() - dst->encoding() == 1, "must be even/odd pair"); + + StubRoutines::zarch::generate_load_trot_table_addr(this, table); // kills Z_R0 (if ASSERT) + clear_reg(stop_char); // Stop character. Not used here, but initialized to have a defined value. + lgr_if_needed(src_addr, src); + z_llgfr(cnt, cnt); // # src characters, must be a positive simm32. + + translate_ot(dst, src_addr, /* mask = */ 0x0001); + + BLOCK_COMMENT("} string_inflate"); + + return offset() - block_start; +} + +// Inflate byte[] to char[]. odd_reg contains cnt. Kills src. +unsigned int MacroAssembler::string_inflate(Register src, Register dst, Register odd_reg, + Register even_reg, Register tmp) { + int block_start = offset(); + + BLOCK_COMMENT("string_inflate {"); + + Label Lloop1, Lloop2, Lslow, Ldone; + const Register addr1 = src, ind2 = tmp; + + z_sll(odd_reg, 1); // Number of bytes to write. (Must be a positive simm32.) + clear_reg(ind2); // Index to write. + z_ahi(odd_reg, -16); // Last possible index for fast loop. + z_brl(Lslow); + + // ind2: index, even_reg: index increment, odd_reg: index limit + clear_reg(Z_R0); + clear_reg(Z_R1); + z_lhi(even_reg, 16); + + bind(Lloop1); // 8 Characters per iteration. + z_icmh(Z_R0, 5, 0, addr1); + z_icmh(Z_R1, 5, 4, addr1); + z_icm(Z_R0, 5, 2, addr1); + z_icm(Z_R1, 5, 6, addr1); + z_aghi(addr1, 8); + z_stg(Z_R0, Address(dst, ind2)); + z_stg(Z_R1, Address(dst, ind2, 8)); + z_brxle(ind2, even_reg, Lloop1); + + bind(Lslow); + // Compute index limit and skip if negative. + z_ahi(odd_reg, 16-2); // Last possible index for slow loop. + z_lhi(even_reg, 2); + z_cr(ind2, odd_reg); + z_brh(Ldone); + + bind(Lloop2); // 1 Character per iteration. + z_llc(Z_R0, Address(addr1)); + z_sth(Z_R0, Address(dst, ind2)); + z_aghi(addr1, 1); + z_brxle(ind2, even_reg, Lloop2); + + bind(Ldone); + + BLOCK_COMMENT("} string_inflate"); + + return offset() - block_start; +} + +// Kills src. +unsigned int MacroAssembler::has_negatives(Register result, Register src, Register cnt, + Register odd_reg, Register even_reg, Register tmp) { + int block_start = offset(); + Label Lloop1, Lloop2, Lslow, Lnotfound, Ldone; + const Register addr = src, mask = tmp; + + BLOCK_COMMENT("has_negatives {"); + + z_llgfr(Z_R1, cnt); // Number of bytes to read. (Must be a positive simm32.) + z_llilf(mask, 0x80808080); + z_lhi(result, 1); // Assume true. + // Last possible addr for fast loop. + z_lay(odd_reg, -16, Z_R1, src); + z_chi(cnt, 16); + z_brl(Lslow); + + // ind1: index, even_reg: index increment, odd_reg: index limit + z_iihf(mask, 0x80808080); + z_lghi(even_reg, 16); + + bind(Lloop1); // 16 bytes per iteration. + z_lg(Z_R0, Address(addr)); + z_lg(Z_R1, Address(addr, 8)); + z_ogr(Z_R0, Z_R1); + z_ngr(Z_R0, mask); + z_brne(Ldone); // If found return 1. + z_brxlg(addr, even_reg, Lloop1); + + bind(Lslow); + z_aghi(odd_reg, 16-1); // Last possible addr for slow loop. + z_lghi(even_reg, 1); + z_cgr(addr, odd_reg); + z_brh(Lnotfound); + + bind(Lloop2); // 1 byte per iteration. + z_cli(Address(addr), 0x80); + z_brnl(Ldone); // If found return 1. + z_brxlg(addr, even_reg, Lloop2); + + bind(Lnotfound); + z_lhi(result, 0); + + bind(Ldone); + + BLOCK_COMMENT("} has_negatives"); + + return offset() - block_start; +} + +// kill: cnt1, cnt2, odd_reg, even_reg; early clobber: result +unsigned int MacroAssembler::string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, + Register odd_reg, Register even_reg, Register result, int ae) { + int block_start = offset(); + + assert_different_registers(str1, cnt1, cnt2, odd_reg, even_reg, result); + assert_different_registers(str2, cnt1, cnt2, odd_reg, even_reg, result); + + // If strings are equal up to min length, return the length difference. + const Register diff = result, // Pre-set result with length difference. + min = cnt1, // min number of bytes + tmp = cnt2; + + // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a) + // we interchange str1 and str2 in the UL case and negate the result. + // Like this, str1 is always latin1 encoded, except for the UU case. + // In addition, we need 0 (or sign which is 0) extend when using 64 bit register. + const bool used_as_LU = (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL); + + BLOCK_COMMENT("string_compare {"); + + if (used_as_LU) { + z_srl(cnt2, 1); + } + + // See if the lengths are different, and calculate min in cnt1. + // Save diff in case we need it for a tie-breaker. + + // diff = cnt1 - cnt2 + if (VM_Version::has_DistinctOpnds()) { + z_srk(diff, cnt1, cnt2); + } else { + z_lr(diff, cnt1); + z_sr(diff, cnt2); + } + if (str1 != str2) { + if (VM_Version::has_LoadStoreConditional()) { + z_locr(min, cnt2, Assembler::bcondHigh); + } else { + Label Lskip; + z_brl(Lskip); // min ok if cnt1 < cnt2 + z_lr(min, cnt2); // min = cnt2 + bind(Lskip); + } + } + + if (ae == StrIntrinsicNode::UU) { + z_sra(diff, 1); + } + if (str1 != str2) { + Label Ldone; + if (used_as_LU) { + // Loop which searches the first difference character by character. + Label Lloop; + const Register ind1 = Z_R1, + ind2 = min; + int stride1 = 1, stride2 = 2; // See comment above. + + // ind1: index, even_reg: index increment, odd_reg: index limit + z_llilf(ind1, (unsigned int)(-stride1)); + z_lhi(even_reg, stride1); + add2reg(odd_reg, -stride1, min); + clear_reg(ind2); // kills min + + bind(Lloop); + z_brxh(ind1, even_reg, Ldone); + z_llc(tmp, Address(str1, ind1)); + z_llh(Z_R0, Address(str2, ind2)); + z_ahi(ind2, stride2); + z_sr(tmp, Z_R0); + z_bre(Lloop); + + z_lr(result, tmp); + + } else { + // Use clcle in fast loop (only for same encoding). + z_lgr(Z_R0, str1); + z_lgr(even_reg, str2); + z_llgfr(Z_R1, min); + z_llgfr(odd_reg, min); + + if (ae == StrIntrinsicNode::LL) { + compare_long_ext(Z_R0, even_reg, 0); + } else { + compare_long_uni(Z_R0, even_reg, 0); + } + z_bre(Ldone); + z_lgr(Z_R1, Z_R0); + if (ae == StrIntrinsicNode::LL) { + z_llc(Z_R0, Address(even_reg)); + z_llc(result, Address(Z_R1)); + } else { + z_llh(Z_R0, Address(even_reg)); + z_llh(result, Address(Z_R1)); + } + z_sr(result, Z_R0); + } + + // Otherwise, return the difference between the first mismatched chars. + bind(Ldone); + } + + if (ae == StrIntrinsicNode::UL) { + z_lcr(result, result); // Negate result (see note above). + } + + BLOCK_COMMENT("} string_compare"); + + return offset() - block_start; +} + +unsigned int MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2, Register limit, + Register odd_reg, Register even_reg, Register result, bool is_byte) { + int block_start = offset(); + + BLOCK_COMMENT("array_equals {"); + + assert_different_registers(ary1, limit, odd_reg, even_reg); + assert_different_registers(ary2, limit, odd_reg, even_reg); + + Label Ldone, Ldone_true, Ldone_false, Lclcle, CLC_template; + int base_offset = 0; + + if (ary1 != ary2) { + if (is_array_equ) { + base_offset = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR); + + // Return true if the same array. + compareU64_and_branch(ary1, ary2, Assembler::bcondEqual, Ldone_true); + + // Return false if one of them is NULL. + compareU64_and_branch(ary1, (intptr_t)0, Assembler::bcondEqual, Ldone_false); + compareU64_and_branch(ary2, (intptr_t)0, Assembler::bcondEqual, Ldone_false); + + // Load the lengths of arrays. + z_llgf(odd_reg, Address(ary1, arrayOopDesc::length_offset_in_bytes())); + + // Return false if the two arrays are not equal length. + z_c(odd_reg, Address(ary2, arrayOopDesc::length_offset_in_bytes())); + z_brne(Ldone_false); + + // string len in bytes (right operand) + if (!is_byte) { + z_chi(odd_reg, 128); + z_sll(odd_reg, 1); // preserves flags + z_brh(Lclcle); + } else { + compareU32_and_branch(odd_reg, (intptr_t)256, Assembler::bcondHigh, Lclcle); + } + } else { + z_llgfr(odd_reg, limit); // Need to zero-extend prior to using the value. + compareU32_and_branch(limit, (intptr_t)256, Assembler::bcondHigh, Lclcle); + } + + + // Use clc instruction for up to 256 bytes. + { + Register str1_reg = ary1, + str2_reg = ary2; + if (is_array_equ) { + str1_reg = Z_R1; + str2_reg = even_reg; + add2reg(str1_reg, base_offset, ary1); // string addr (left operand) + add2reg(str2_reg, base_offset, ary2); // string addr (right operand) + } + z_ahi(odd_reg, -1); // Clc uses decremented limit. Also compare result to 0. + z_brl(Ldone_true); + // Note: We could jump to the template if equal. + + assert(VM_Version::has_ExecuteExtensions(), "unsupported hardware"); + z_exrl(odd_reg, CLC_template); + z_bre(Ldone_true); + // fall through + + bind(Ldone_false); + clear_reg(result); + z_bru(Ldone); + + bind(CLC_template); + z_clc(0, 0, str1_reg, 0, str2_reg); + } + + // Use clcle instruction. + { + bind(Lclcle); + add2reg(even_reg, base_offset, ary2); // string addr (right operand) + add2reg(Z_R0, base_offset, ary1); // string addr (left operand) + + z_lgr(Z_R1, odd_reg); // string len in bytes (left operand) + if (is_byte) { + compare_long_ext(Z_R0, even_reg, 0); + } else { + compare_long_uni(Z_R0, even_reg, 0); + } + z_lghi(result, 0); // Preserve flags. + z_brne(Ldone); + } + } + // fall through + + bind(Ldone_true); + z_lghi(result, 1); // All characters are equal. + bind(Ldone); + + BLOCK_COMMENT("} array_equals"); + + return offset() - block_start; +} + +// kill: haycnt, needlecnt, odd_reg, even_reg; early clobber: result +unsigned int MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt, + Register needle, Register needlecnt, int needlecntval, + Register odd_reg, Register even_reg, int ae) { + int block_start = offset(); + + // Ensure 0 256) { + bind(L_clcle); + + // Main Loop: clcle version (now we have at least 256 bytes). + Label L_OuterLoop, CLC_template; + bind(L_OuterLoop); // Search for 1st 2 characters. + z_lgr(Z_R1, haycnt); + if (h_csize == 1) { + MacroAssembler::search_string(Z_R1, result); + } else { + MacroAssembler::search_string_uni(Z_R1, result); + } + z_brc(Assembler::bcondNotFound, L_NotFound); + + add2reg(Z_R0, n_csize, needle); + add2reg(even_reg, h_csize, Z_R1); + z_lgr(result, Z_R1); + if (needlecnt != noreg) { + z_llgfr(Z_R1, needlecnt); // needle len in bytes (left operand) + z_llgfr(odd_reg, needlecnt); + } else { + load_const_optimized(Z_R1, needle_bytes); + if (Immediate::is_simm16(needle_bytes)) { z_lghi(odd_reg, needle_bytes); } else { z_lgr(odd_reg, Z_R1); } + } + if (h_csize == 1) { + compare_long_ext(Z_R0, even_reg, 0); + } else { + compare_long_uni(Z_R0, even_reg, 0); + } + z_bre(L_Found); + + if (n_csize == 2) { z_llgh(Z_R0, Address(needle)); } else { z_llgc(Z_R0, Address(needle)); } // Reload. + z_aghi(result, h_csize); // This is the new address we want to use for comparing. + z_bru(L_OuterLoop); + } + } + + if (needlecnt != noreg || needlecntval == 1) { + bind(L_needle1); + + // Single needle character version. + if (h_csize == 1) { + MacroAssembler::search_string(haycnt, result); + } else { + MacroAssembler::search_string_uni(haycnt, result); + } + z_lgr(result, haycnt); + z_brc(Assembler::bcondFound, L_Found); + } + + bind(L_NotFound); + add2reg(result, -1, haystack); // Return -1. + + bind(L_Found); // Return index (or -1 in fallthrough case). + z_sgr(result, haystack); + if (h_csize == 2) { z_srag(result, result, exact_log2(sizeof(jchar))); } + } + BLOCK_COMMENT("} string_indexof"); + + return offset() - block_start; +} + +// early clobber: result +unsigned int MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt, + Register needle, jchar needleChar, Register odd_reg, Register even_reg, bool is_byte) { + int block_start = offset(); + + BLOCK_COMMENT("string_indexof_char {"); + + if (needle == haystack) { + z_lhi(result, 0); + } else { + + Label Ldone; + + z_llgfr(odd_reg, haycnt); // Preset loop ctr/searchrange end. + if (needle == noreg) { + load_const_optimized(Z_R0, (unsigned long)needleChar); + } else { + if (is_byte) { + z_llgcr(Z_R0, needle); // First (and only) needle char. + } else { + z_llghr(Z_R0, needle); // First (and only) needle char. + } + } + + if (!is_byte) { + z_agr(odd_reg, odd_reg); // Calc #bytes to be processed with SRSTU. + } + + z_lgr(even_reg, haystack); // haystack addr + z_agr(odd_reg, haystack); // First char after range end. + z_lghi(result, -1); + + if (is_byte) { + MacroAssembler::search_string(odd_reg, even_reg); + } else { + MacroAssembler::search_string_uni(odd_reg, even_reg); + } + z_brc(Assembler::bcondNotFound, Ldone); + if (is_byte) { + if (VM_Version::has_DistinctOpnds()) { + z_sgrk(result, odd_reg, haystack); + } else { + z_sgr(odd_reg, haystack); + z_lgr(result, odd_reg); + } + } else { + z_slgr(odd_reg, haystack); + z_srlg(result, odd_reg, exact_log2(sizeof(jchar))); + } + + bind(Ldone); + } + BLOCK_COMMENT("} string_indexof_char"); + + return offset() - block_start; +} + + +//------------------------------------------------- +// Constants (scalar and oop) in constant pool +//------------------------------------------------- + +// Add a non-relocated constant to the CP. +int MacroAssembler::store_const_in_toc(AddressLiteral& val) { + long value = val.value(); + address tocPos = long_constant(value); + + if (tocPos != NULL) { + int tocOffset = (int)(tocPos - code()->consts()->start()); + return tocOffset; + } + // Address_constant returned NULL, so no constant entry has been created. + // In that case, we return a "fatal" offset, just in case that subsequently + // generated access code is executed. + return -1; +} + +// Returns the TOC offset where the address is stored. +// Add a relocated constant to the CP. +int MacroAssembler::store_oop_in_toc(AddressLiteral& oop) { + // Use RelocationHolder::none for the constant pool entry. + // Otherwise we will end up with a failing NativeCall::verify(x), + // where x is the address of the constant pool entry. + address tocPos = address_constant((address)oop.value(), RelocationHolder::none); + + if (tocPos != NULL) { + int tocOffset = (int)(tocPos - code()->consts()->start()); + RelocationHolder rsp = oop.rspec(); + Relocation *rel = rsp.reloc(); + + // Store toc_offset in relocation, used by call_far_patchable. + if ((relocInfo::relocType)rel->type() == relocInfo::runtime_call_w_cp_type) { + ((runtime_call_w_cp_Relocation *)(rel))->set_constant_pool_offset(tocOffset); + } + // Relocate at the load's pc. + relocate(rsp); + + return tocOffset; + } + // Address_constant returned NULL, so no constant entry has been created + // in that case, we return a "fatal" offset, just in case that subsequently + // generated access code is executed. + return -1; +} + +bool MacroAssembler::load_const_from_toc(Register dst, AddressLiteral& a, Register Rtoc) { + int tocOffset = store_const_in_toc(a); + if (tocOffset == -1) return false; + address tocPos = tocOffset + code()->consts()->start(); + assert((address)code()->consts()->start() != NULL, "Please add CP address"); + + load_long_pcrelative(dst, tocPos); + return true; +} + +bool MacroAssembler::load_oop_from_toc(Register dst, AddressLiteral& a, Register Rtoc) { + int tocOffset = store_oop_in_toc(a); + if (tocOffset == -1) return false; + address tocPos = tocOffset + code()->consts()->start(); + assert((address)code()->consts()->start() != NULL, "Please add CP address"); + + load_addr_pcrelative(dst, tocPos); + return true; +} + +// If the instruction sequence at the given pc is a load_const_from_toc +// sequence, return the value currently stored at the referenced position +// in the TOC. +intptr_t MacroAssembler::get_const_from_toc(address pc) { + + assert(is_load_const_from_toc(pc), "must be load_const_from_pool"); + + long offset = get_load_const_from_toc_offset(pc); + address dataLoc = NULL; + if (is_load_const_from_toc_pcrelative(pc)) { + dataLoc = pc + offset; + } else { + CodeBlob* cb = CodeCache::find_blob_unsafe(pc); // Else we get assertion if nmethod is zombie. + assert(cb && cb->is_nmethod(), "sanity"); + nmethod* nm = (nmethod*)cb; + dataLoc = nm->ctable_begin() + offset; + } + return *(intptr_t *)dataLoc; +} + +// If the instruction sequence at the given pc is a load_const_from_toc +// sequence, copy the passed-in new_data value into the referenced +// position in the TOC. +void MacroAssembler::set_const_in_toc(address pc, unsigned long new_data, CodeBlob *cb) { + assert(is_load_const_from_toc(pc), "must be load_const_from_pool"); + + long offset = MacroAssembler::get_load_const_from_toc_offset(pc); + address dataLoc = NULL; + if (is_load_const_from_toc_pcrelative(pc)) { + dataLoc = pc+offset; + } else { + nmethod* nm = CodeCache::find_nmethod(pc); + assert((cb == NULL) || (nm == (nmethod*)cb), "instruction address should be in CodeBlob"); + dataLoc = nm->ctable_begin() + offset; + } + if (*(unsigned long *)dataLoc != new_data) { // Prevent cache invalidation: update only if necessary. + *(unsigned long *)dataLoc = new_data; + } +} + +// Dynamic TOC. Getter must only be called if "a" is a load_const_from_toc +// site. Verify by calling is_load_const_from_toc() before!! +// Offset is +/- 2**32 -> use long. +long MacroAssembler::get_load_const_from_toc_offset(address a) { + assert(is_load_const_from_toc_pcrelative(a), "expected pc relative load"); + // expected code sequence: + // z_lgrl(t, simm32); len = 6 + unsigned long inst; + unsigned int len = get_instruction(a, &inst); + return get_pcrel_offset(inst); +} + +//********************************************************************************** +// inspection of generated instruction sequences for a particular pattern +//********************************************************************************** + +bool MacroAssembler::is_load_const_from_toc_pcrelative(address a) { +#ifdef ASSERT + unsigned long inst; + unsigned int len = get_instruction(a+2, &inst); + if ((len == 6) && is_load_pcrelative_long(a) && is_call_pcrelative_long(inst)) { + const int range = 128; + Assembler::dump_code_range(tty, a, range, "instr(a) == z_lgrl && instr(a+2) == z_brasl"); + VM_Version::z_SIGSEGV(); + } +#endif + // expected code sequence: + // z_lgrl(t, relAddr32); len = 6 + //TODO: verify accessed data is in CP, if possible. + return is_load_pcrelative_long(a); // TODO: might be too general. Currently, only lgrl is used. +} + +bool MacroAssembler::is_load_const_from_toc_call(address a) { + return is_load_const_from_toc(a) && is_call_byregister(a + load_const_from_toc_size()); +} + +bool MacroAssembler::is_load_const_call(address a) { + return is_load_const(a) && is_call_byregister(a + load_const_size()); +} + +//------------------------------------------------- +// Emitters for some really CICS instructions +//------------------------------------------------- + +void MacroAssembler::move_long_ext(Register dst, Register src, unsigned int pad) { + assert(dst->encoding()%2==0, "must be an even/odd register pair"); + assert(src->encoding()%2==0, "must be an even/odd register pair"); + assert(pad<256, "must be a padding BYTE"); + + Label retry; + bind(retry); + Assembler::z_mvcle(dst, src, pad); + Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); +} + +void MacroAssembler::compare_long_ext(Register left, Register right, unsigned int pad) { + assert(left->encoding() % 2 == 0, "must be an even/odd register pair"); + assert(right->encoding() % 2 == 0, "must be an even/odd register pair"); + assert(pad<256, "must be a padding BYTE"); + + Label retry; + bind(retry); + Assembler::z_clcle(left, right, pad, Z_R0); + Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); +} + +void MacroAssembler::compare_long_uni(Register left, Register right, unsigned int pad) { + assert(left->encoding() % 2 == 0, "must be an even/odd register pair"); + assert(right->encoding() % 2 == 0, "must be an even/odd register pair"); + assert(pad<=0xfff, "must be a padding HALFWORD"); + assert(VM_Version::has_ETF2(), "instruction must be available"); + + Label retry; + bind(retry); + Assembler::z_clclu(left, right, pad, Z_R0); + Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); +} + +void MacroAssembler::search_string(Register end, Register start) { + assert(end->encoding() != 0, "end address must not be in R0"); + assert(start->encoding() != 0, "start address must not be in R0"); + + Label retry; + bind(retry); + Assembler::z_srst(end, start); + Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); +} + +void MacroAssembler::search_string_uni(Register end, Register start) { + assert(end->encoding() != 0, "end address must not be in R0"); + assert(start->encoding() != 0, "start address must not be in R0"); + assert(VM_Version::has_ETF3(), "instruction must be available"); + + Label retry; + bind(retry); + Assembler::z_srstu(end, start); + Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); +} + +void MacroAssembler::kmac(Register srcBuff) { + assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); + assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair"); + + Label retry; + bind(retry); + Assembler::z_kmac(Z_R0, srcBuff); + Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); +} + +void MacroAssembler::kimd(Register srcBuff) { + assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); + assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair"); + + Label retry; + bind(retry); + Assembler::z_kimd(Z_R0, srcBuff); + Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); +} + +void MacroAssembler::klmd(Register srcBuff) { + assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); + assert(srcBuff->encoding() % 2 == 0, "src buffer/len must be an even/odd register pair"); + + Label retry; + bind(retry); + Assembler::z_klmd(Z_R0, srcBuff); + Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); +} + +void MacroAssembler::km(Register dstBuff, Register srcBuff) { + // DstBuff and srcBuff are allowed to be the same register (encryption in-place). + // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block. + assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); + assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register"); + assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); + + Label retry; + bind(retry); + Assembler::z_km(dstBuff, srcBuff); + Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); +} + +void MacroAssembler::kmc(Register dstBuff, Register srcBuff) { + // DstBuff and srcBuff are allowed to be the same register (encryption in-place). + // DstBuff and srcBuff storage must not overlap destructively, and neither must overlap the parameter block. + assert(srcBuff->encoding() != 0, "src buffer address can't be in Z_R0"); + assert(dstBuff->encoding() % 2 == 0, "dst buffer addr must be an even register"); + assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); + + Label retry; + bind(retry); + Assembler::z_kmc(dstBuff, srcBuff); + Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); +} + +void MacroAssembler::cksm(Register crcBuff, Register srcBuff) { + assert(srcBuff->encoding() % 2 == 0, "src buffer addr/len must be an even/odd register pair"); + + Label retry; + bind(retry); + Assembler::z_cksm(crcBuff, srcBuff); + Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); +} + +void MacroAssembler::translate_oo(Register r1, Register r2, uint m3) { + assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); + assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); + + Label retry; + bind(retry); + Assembler::z_troo(r1, r2, m3); + Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); +} + +void MacroAssembler::translate_ot(Register r1, Register r2, uint m3) { + assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); + assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); + + Label retry; + bind(retry); + Assembler::z_trot(r1, r2, m3); + Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); +} + +void MacroAssembler::translate_to(Register r1, Register r2, uint m3) { + assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); + assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); + + Label retry; + bind(retry); + Assembler::z_trto(r1, r2, m3); + Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); +} + +void MacroAssembler::translate_tt(Register r1, Register r2, uint m3) { + assert(r1->encoding() % 2 == 0, "dst addr/src len must be an even/odd register pair"); + assert((m3 & 0b1110) == 0, "Unused mask bits must be zero"); + + Label retry; + bind(retry); + Assembler::z_trtt(r1, r2, m3); + Assembler::z_brc(Assembler::bcondOverflow /* CC==3 (iterate) */, retry); +} + +void MacroAssembler::generate_safepoint_check(Label& slow_path, Register scratch, bool may_relocate) { + if (scratch == noreg) scratch = Z_R1; + address Astate = SafepointSynchronize::address_of_state(); + BLOCK_COMMENT("safepoint check:"); + + if (may_relocate) { + ptrdiff_t total_distance = Astate - this->pc(); + if (RelAddr::is_in_range_of_RelAddr32(total_distance)) { + RelocationHolder rspec = external_word_Relocation::spec(Astate); + (this)->relocate(rspec, relocInfo::pcrel_addr_format); + load_absolute_address(scratch, Astate); + } else { + load_const_optimized(scratch, Astate); + } + } else { + load_absolute_address(scratch, Astate); + } + z_cli(/*SafepointSynchronize::sz_state()*/4-1, scratch, SafepointSynchronize::_not_synchronized); + z_brne(slow_path); +} + + +void MacroAssembler::generate_type_profiling(const Register Rdata, + const Register Rreceiver_klass, + const Register Rwanted_receiver_klass, + const Register Rmatching_row, + bool is_virtual_call) { + const int row_size = in_bytes(ReceiverTypeData::receiver_offset(1)) - + in_bytes(ReceiverTypeData::receiver_offset(0)); + const int num_rows = ReceiverTypeData::row_limit(); + NearLabel found_free_row; + NearLabel do_increment; + NearLabel found_no_slot; + + BLOCK_COMMENT("type profiling {"); + + // search for: + // a) The type given in Rwanted_receiver_klass. + // b) The *first* empty row. + + // First search for a) only, just running over b) with no regard. + // This is possible because + // wanted_receiver_class == receiver_class && wanted_receiver_class == 0 + // is never true (receiver_class can't be zero). + for (int row_num = 0; row_num < num_rows; row_num++) { + // Row_offset should be a well-behaved positive number. The generated code relies + // on that wrt constant code size. Add2reg can handle all row_offset values, but + // will have to vary generated code size. + int row_offset = in_bytes(ReceiverTypeData::receiver_offset(row_num)); + assert(Displacement::is_shortDisp(row_offset), "Limitation of generated code"); + + // Is Rwanted_receiver_klass in this row? + if (VM_Version::has_CompareBranch()) { + z_lg(Rwanted_receiver_klass, row_offset, Z_R0, Rdata); + // Rmatching_row = Rdata + row_offset; + add2reg(Rmatching_row, row_offset, Rdata); + // if (*row_recv == (intptr_t) receiver_klass) goto fill_existing_slot; + compare64_and_branch(Rwanted_receiver_klass, Rreceiver_klass, Assembler::bcondEqual, do_increment); + } else { + add2reg(Rmatching_row, row_offset, Rdata); + z_cg(Rreceiver_klass, row_offset, Z_R0, Rdata); + z_bre(do_increment); + } + } + + // Now that we did not find a match, let's search for b). + + // We could save the first calculation of Rmatching_row if we woud search for a) in reverse order. + // We would then end up here with Rmatching_row containing the value for row_num == 0. + // We would not see much benefit, if any at all, because the CPU can schedule + // two instructions together with a branch anyway. + for (int row_num = 0; row_num < num_rows; row_num++) { + int row_offset = in_bytes(ReceiverTypeData::receiver_offset(row_num)); + + // Has this row a zero receiver_klass, i.e. is it empty? + if (VM_Version::has_CompareBranch()) { + z_lg(Rwanted_receiver_klass, row_offset, Z_R0, Rdata); + // Rmatching_row = Rdata + row_offset + add2reg(Rmatching_row, row_offset, Rdata); + // if (*row_recv == (intptr_t) 0) goto found_free_row + compare64_and_branch(Rwanted_receiver_klass, (intptr_t)0, Assembler::bcondEqual, found_free_row); + } else { + add2reg(Rmatching_row, row_offset, Rdata); + load_and_test_long(Rwanted_receiver_klass, Address(Rdata, row_offset)); + z_bre(found_free_row); // zero -> Found a free row. + } + } + + // No match, no empty row found. + // Increment total counter to indicate polymorphic case. + if (is_virtual_call) { + add2mem_64(Address(Rdata, CounterData::count_offset()), 1, Rmatching_row); + } + z_bru(found_no_slot); + + // Here we found an empty row, but we have not found Rwanted_receiver_klass. + // Rmatching_row holds the address to the first empty row. + bind(found_free_row); + // Store receiver_klass into empty slot. + z_stg(Rreceiver_klass, 0, Z_R0, Rmatching_row); + + // Increment the counter of Rmatching_row. + bind(do_increment); + ByteSize counter_offset = ReceiverTypeData::receiver_count_offset(0) - ReceiverTypeData::receiver_offset(0); + add2mem_64(Address(Rmatching_row, counter_offset), 1, Rdata); + + bind(found_no_slot); + + BLOCK_COMMENT("} type profiling"); +} + +//--------------------------------------- +// Helpers for Intrinsic Emitters +//--------------------------------------- + +/** + * uint32_t crc; + * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); + */ +void MacroAssembler::fold_byte_crc32(Register crc, Register val, Register table, Register tmp) { + assert_different_registers(crc, table, tmp); + assert_different_registers(val, table); + if (crc == val) { // Must rotate first to use the unmodified value. + rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest. + z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits. + } else { + z_srl(crc, 8); // Unsigned shift, clear leftmost 8 bits. + rotate_then_insert(tmp, val, 56-2, 63-2, 2, true); // Insert byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest. + } + z_x(crc, Address(table, tmp, 0)); +} + +/** + * uint32_t crc; + * timesXtoThe32[crc & 0xFF] ^ (crc >> 8); + */ +void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) { + fold_byte_crc32(crc, crc, table, tmp); +} + +/** + * Emits code to update CRC-32 with a byte value according to constants in table. + * + * @param [in,out]crc Register containing the crc. + * @param [in]val Register containing the byte to fold into the CRC. + * @param [in]table Register containing the table of crc constants. + * + * uint32_t crc; + * val = crc_table[(val ^ crc) & 0xFF]; + * crc = val ^ (crc >> 8); + */ +void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { + z_xr(val, crc); + fold_byte_crc32(crc, val, table, val); +} + + +/** + * @param crc register containing existing CRC (32-bit) + * @param buf register pointing to input byte buffer (byte*) + * @param len register containing number of bytes + * @param table register pointing to CRC table + */ +void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, + Register data, bool invertCRC) { + assert_different_registers(crc, buf, len, table, data); + + Label L_mainLoop, L_done; + const int mainLoop_stepping = 1; + + // Process all bytes in a single-byte loop. + z_ltr(len, len); + z_brnh(L_done); + + if (invertCRC) { + not_(crc, noreg, false); // ~c + } + + bind(L_mainLoop); + z_llgc(data, Address(buf, (intptr_t)0));// Current byte of input buffer (zero extended). Avoids garbage in upper half of register. + add2reg(buf, mainLoop_stepping); // Advance buffer position. + update_byte_crc32(crc, data, table); + z_brct(len, L_mainLoop); // Iterate. + + if (invertCRC) { + not_(crc, noreg, false); // ~c + } + + bind(L_done); +} + +/** + * Emits code to update CRC-32 with a 4-byte value according to constants in table. + * Implementation according to jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.c. + * + */ +void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc, + Register t0, Register t1, Register t2, Register t3) { + // This is what we implement (the DOBIG4 part): + // + // #define DOBIG4 c ^= *++buf4; \ + // c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ + // crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] + // #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 + const int ix0 = 4*(4*CRC32_COLUMN_SIZE); + const int ix1 = 5*(4*CRC32_COLUMN_SIZE); + const int ix2 = 6*(4*CRC32_COLUMN_SIZE); + const int ix3 = 7*(4*CRC32_COLUMN_SIZE); + + // XOR crc with next four bytes of buffer. + lgr_if_needed(t0, crc); + z_x(t0, Address(buf, bufDisp)); + if (bufInc != 0) { + add2reg(buf, bufInc); + } + + // Chop crc into 4 single-byte pieces, shifted left 2 bits, to form the table indices. + rotate_then_insert(t3, t0, 56-2, 63-2, 2, true); // ((c >> 0) & 0xff) << 2 + rotate_then_insert(t2, t0, 56-2, 63-2, 2-8, true); // ((c >> 8) & 0xff) << 2 + rotate_then_insert(t1, t0, 56-2, 63-2, 2-16, true); // ((c >> 16) & 0xff) << 2 + rotate_then_insert(t0, t0, 56-2, 63-2, 2-24, true); // ((c >> 24) & 0xff) << 2 + + // Load pre-calculated table values. + // Use columns 4..7 for big-endian. + z_ly(t3, Address(table, t3, (intptr_t)ix0)); + z_ly(t2, Address(table, t2, (intptr_t)ix1)); + z_ly(t1, Address(table, t1, (intptr_t)ix2)); + z_ly(t0, Address(table, t0, (intptr_t)ix3)); + + // Calculate new crc from table values. + z_xr(t2, t3); + z_xr(t0, t1); + z_xr(t0, t2); // Now crc contains the final checksum value. + lgr_if_needed(crc, t0); +} + +/** + * @param crc register containing existing CRC (32-bit) + * @param buf register pointing to input byte buffer (byte*) + * @param len register containing number of bytes + * @param table register pointing to CRC table + * + * uses Z_R10..Z_R13 as work register. Must be saved/restored by caller! + */ +void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table, + Register t0, Register t1, Register t2, Register t3) { + assert_different_registers(crc, buf, len, table); + + Label L_mainLoop, L_tail; + Register data = t0; + Register ctr = Z_R0; + const int mainLoop_stepping = 8; + const int tailLoop_stepping = 1; + const int log_stepping = exact_log2(mainLoop_stepping); + + // Don't test for len <= 0 here. This pathological case should not occur anyway. + // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles. + // The situation itself is detected and handled correctly by the conditional branches + // following aghi(len, -stepping) and aghi(len, +stepping). + + not_(crc, noreg, false); // 1s complement of crc + +#if 0 + { + // Pre-mainLoop alignment did not show any positive effect on performance. + // We leave the code in for reference. Maybe the vector instructions in z13 depend on alignment. + + z_cghi(len, mainLoop_stepping); // Alignment is useless for short data streams. + z_brnh(L_tail); + + // Align buf to word (4-byte) boundary. + z_lcr(ctr, buf); + rotate_then_insert(ctr, ctr, 62, 63, 0, true); // TODO: should set cc + z_sgfr(len, ctr); // Remaining len after alignment. + + update_byteLoop_crc32(crc, buf, ctr, table, data, false); + } +#endif + + // Check for short (= 0; idx--, kdx--) { + // huge_128 product = y[idx] * x[xstart] + carry; + // z[kdx] = (jlong)product; + // carry = (jlong)(product >>> 64); + // } + // z[xstart] = carry; + + Label L_first_loop, L_first_loop_exit; + Label L_one_x, L_one_y, L_multiply; + + z_aghi(xstart, -1); + z_brl(L_one_x); // Special case: length of x is 1. + + // Load next two integers of x. + z_sllg(Z_R1_scratch, xstart, LogBytesPerInt); + mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0)); + + + bind(L_first_loop); + + z_aghi(idx, -1); + z_brl(L_first_loop_exit); + z_aghi(idx, -1); + z_brl(L_one_y); + + // Load next two integers of y. + z_sllg(Z_R1_scratch, idx, LogBytesPerInt); + mem2reg_opt(y_idx, Address(y, Z_R1_scratch, 0)); + + + bind(L_multiply); + + Register multiplicand = product->successor(); + Register product_low = multiplicand; + + lgr_if_needed(multiplicand, x_xstart); + z_mlgr(product, y_idx); // multiplicand * y_idx -> product::multiplicand + clear_reg(Z_R7); + z_algr(product_low, carry); // Add carry to result. + z_alcgr(product, Z_R7); // Add carry of the last addition. + add2reg(kdx, -2); + + // Store result. + z_sllg(Z_R7, kdx, LogBytesPerInt); + reg2mem_opt(product_low, Address(z, Z_R7, 0)); + lgr_if_needed(carry, product); + z_bru(L_first_loop); + + + bind(L_one_y); // Load one 32 bit portion of y as (0,value). + + clear_reg(y_idx); + mem2reg_opt(y_idx, Address(y, (intptr_t) 0), false); + z_bru(L_multiply); + + + bind(L_one_x); // Load one 32 bit portion of x as (0,value). + + clear_reg(x_xstart); + mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false); + z_bru(L_first_loop); + + bind(L_first_loop_exit); +} + +// Multiply 64 bit by 64 bit and add 128 bit. +void MacroAssembler::multiply_add_128_x_128(Register x_xstart, Register y, + Register z, + Register yz_idx, Register idx, + Register carry, Register product, + int offset) { + // huge_128 product = (y[idx] * x_xstart) + z[kdx] + carry; + // z[kdx] = (jlong)product; + + Register multiplicand = product->successor(); + Register product_low = multiplicand; + + z_sllg(Z_R7, idx, LogBytesPerInt); + mem2reg_opt(yz_idx, Address(y, Z_R7, offset)); + + lgr_if_needed(multiplicand, x_xstart); + z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand + mem2reg_opt(yz_idx, Address(z, Z_R7, offset)); + + add2_with_carry(product, product_low, carry, yz_idx); + + z_sllg(Z_R7, idx, LogBytesPerInt); + reg2mem_opt(product_low, Address(z, Z_R7, offset)); + +} + +// Multiply 128 bit by 128 bit. Unrolled inner loop. +void MacroAssembler::multiply_128_x_128_loop(Register x_xstart, + Register y, Register z, + Register yz_idx, Register idx, + Register jdx, + Register carry, Register product, + Register carry2) { + // jlong carry, x[], y[], z[]; + // int kdx = ystart+1; + // for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop + // huge_128 product = (y[idx+1] * x_xstart) + z[kdx+idx+1] + carry; + // z[kdx+idx+1] = (jlong)product; + // jlong carry2 = (jlong)(product >>> 64); + // product = (y[idx] * x_xstart) + z[kdx+idx] + carry2; + // z[kdx+idx] = (jlong)product; + // carry = (jlong)(product >>> 64); + // } + // idx += 2; + // if (idx > 0) { + // product = (y[idx] * x_xstart) + z[kdx+idx] + carry; + // z[kdx+idx] = (jlong)product; + // carry = (jlong)(product >>> 64); + // } + + Label L_third_loop, L_third_loop_exit, L_post_third_loop_done; + + // scale the index + lgr_if_needed(jdx, idx); + and_imm(jdx, 0xfffffffffffffffcL); + rshift(jdx, 2); + + + bind(L_third_loop); + + z_aghi(jdx, -1); + z_brl(L_third_loop_exit); + add2reg(idx, -4); + + multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 8); + lgr_if_needed(carry2, product); + + multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry2, product, 0); + lgr_if_needed(carry, product); + z_bru(L_third_loop); + + + bind(L_third_loop_exit); // Handle any left-over operand parts. + + and_imm(idx, 0x3); + z_brz(L_post_third_loop_done); + + Label L_check_1; + + z_aghi(idx, -2); + z_brl(L_check_1); + + multiply_add_128_x_128(x_xstart, y, z, yz_idx, idx, carry, product, 0); + lgr_if_needed(carry, product); + + + bind(L_check_1); + + add2reg(idx, 0x2); + and_imm(idx, 0x1); + z_aghi(idx, -1); + z_brl(L_post_third_loop_done); + + Register multiplicand = product->successor(); + Register product_low = multiplicand; + + z_sllg(Z_R7, idx, LogBytesPerInt); + clear_reg(yz_idx); + mem2reg_opt(yz_idx, Address(y, Z_R7, 0), false); + lgr_if_needed(multiplicand, x_xstart); + z_mlgr(product, yz_idx); // multiplicand * yz_idx -> product::multiplicand + clear_reg(yz_idx); + mem2reg_opt(yz_idx, Address(z, Z_R7, 0), false); + + add2_with_carry(product, product_low, yz_idx, carry); + + z_sllg(Z_R7, idx, LogBytesPerInt); + reg2mem_opt(product_low, Address(z, Z_R7, 0), false); + rshift(product_low, 32); + + lshift(product, 32); + z_ogr(product_low, product); + lgr_if_needed(carry, product_low); + + bind(L_post_third_loop_done); +} + +void MacroAssembler::multiply_to_len(Register x, Register xlen, + Register y, Register ylen, + Register z, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + Register tmp5) { + ShortBranchVerifier sbv(this); + + assert_different_registers(x, xlen, y, ylen, z, + tmp1, tmp2, tmp3, tmp4, tmp5, Z_R1_scratch, Z_R7); + assert_different_registers(x, xlen, y, ylen, z, + tmp1, tmp2, tmp3, tmp4, tmp5, Z_R8); + + z_stmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP); + + // In openJdk, we store the argument as 32-bit value to slot. + Address zlen(Z_SP, _z_abi(remaining_cargs)); // Int in long on big endian. + + const Register idx = tmp1; + const Register kdx = tmp2; + const Register xstart = tmp3; + + const Register y_idx = tmp4; + const Register carry = tmp5; + const Register product = Z_R0_scratch; + const Register x_xstart = Z_R8; + + // First Loop. + // + // final static long LONG_MASK = 0xffffffffL; + // int xstart = xlen - 1; + // int ystart = ylen - 1; + // long carry = 0; + // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) { + // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry; + // z[kdx] = (int)product; + // carry = product >>> 32; + // } + // z[xstart] = (int)carry; + // + + lgr_if_needed(idx, ylen); // idx = ylen + z_llgf(kdx, zlen); // C2 does not respect int to long conversion for stub calls, thus load zero-extended. + clear_reg(carry); // carry = 0 + + Label L_done; + + lgr_if_needed(xstart, xlen); + z_aghi(xstart, -1); + z_brl(L_done); + + multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx); + + NearLabel L_second_loop; + compare64_and_branch(kdx, RegisterOrConstant((intptr_t) 0), bcondEqual, L_second_loop); + + NearLabel L_carry; + z_aghi(kdx, -1); + z_brz(L_carry); + + // Store lower 32 bits of carry. + z_sllg(Z_R1_scratch, kdx, LogBytesPerInt); + reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); + rshift(carry, 32); + z_aghi(kdx, -1); + + + bind(L_carry); + + // Store upper 32 bits of carry. + z_sllg(Z_R1_scratch, kdx, LogBytesPerInt); + reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); + + // Second and third (nested) loops. + // + // for (int i = xstart-1; i >= 0; i--) { // Second loop + // carry = 0; + // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop + // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + + // (z[k] & LONG_MASK) + carry; + // z[k] = (int)product; + // carry = product >>> 32; + // } + // z[i] = (int)carry; + // } + // + // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = rdx + + const Register jdx = tmp1; + + bind(L_second_loop); + + clear_reg(carry); // carry = 0; + lgr_if_needed(jdx, ylen); // j = ystart+1 + + z_aghi(xstart, -1); // i = xstart-1; + z_brl(L_done); + + // Use free slots in the current stackframe instead of push/pop. + Address zsave(Z_SP, _z_abi(carg_1)); + reg2mem_opt(z, zsave); + + + Label L_last_x; + + z_sllg(Z_R1_scratch, xstart, LogBytesPerInt); + load_address(z, Address(z, Z_R1_scratch, 4)); // z = z + k - j + z_aghi(xstart, -1); // i = xstart-1; + z_brl(L_last_x); + + z_sllg(Z_R1_scratch, xstart, LogBytesPerInt); + mem2reg_opt(x_xstart, Address(x, Z_R1_scratch, 0)); + + + Label L_third_loop_prologue; + + bind(L_third_loop_prologue); + + Address xsave(Z_SP, _z_abi(carg_2)); + Address xlensave(Z_SP, _z_abi(carg_3)); + Address ylensave(Z_SP, _z_abi(carg_4)); + + reg2mem_opt(x, xsave); + reg2mem_opt(xstart, xlensave); + reg2mem_opt(ylen, ylensave); + + + multiply_128_x_128_loop(x_xstart, y, z, y_idx, jdx, ylen, carry, product, x); + + mem2reg_opt(z, zsave); + mem2reg_opt(x, xsave); + mem2reg_opt(xlen, xlensave); // This is the decrement of the loop counter! + mem2reg_opt(ylen, ylensave); + + add2reg(tmp3, 1, xlen); + z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt); + reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); + z_aghi(tmp3, -1); + z_brl(L_done); + + rshift(carry, 32); + z_sllg(Z_R1_scratch, tmp3, LogBytesPerInt); + reg2mem_opt(carry, Address(z, Z_R1_scratch, 0), false); + z_bru(L_second_loop); + + // Next infrequent code is moved outside loops. + bind(L_last_x); + + clear_reg(x_xstart); + mem2reg_opt(x_xstart, Address(x, (intptr_t) 0), false); + z_bru(L_third_loop_prologue); + + bind(L_done); + + z_lmg(Z_R7, Z_R13, _z_abi(gpr7), Z_SP); +} + +#ifndef PRODUCT +// Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false). +void MacroAssembler::asm_assert(bool check_equal, const char *msg, int id) { + Label ok; + if (check_equal) { + z_bre(ok); + } else { + z_brne(ok); + } + stop(msg, id); + bind(ok); +} + +// Assert if CC indicates "low". +void MacroAssembler::asm_assert_low(const char *msg, int id) { + Label ok; + z_brnl(ok); + stop(msg, id); + bind(ok); +} + +// Assert if CC indicates "high". +void MacroAssembler::asm_assert_high(const char *msg, int id) { + Label ok; + z_brnh(ok); + stop(msg, id); + bind(ok); +} + +// Assert if CC indicates "not equal" (check_equal==true) or "equal" (check_equal==false) +// generate non-relocatable code. +void MacroAssembler::asm_assert_static(bool check_equal, const char *msg, int id) { + Label ok; + if (check_equal) { z_bre(ok); } + else { z_brne(ok); } + stop_static(msg, id); + bind(ok); +} + +void MacroAssembler::asm_assert_mems_zero(bool check_equal, bool allow_relocation, int size, int64_t mem_offset, + Register mem_base, const char* msg, int id) { + switch (size) { + case 4: + load_and_test_int(Z_R0, Address(mem_base, mem_offset)); + break; + case 8: + load_and_test_long(Z_R0, Address(mem_base, mem_offset)); + break; + default: + ShouldNotReachHere(); + } + if (allow_relocation) { asm_assert(check_equal, msg, id); } + else { asm_assert_static(check_equal, msg, id); } +} + +// Check the condition +// expected_size == FP - SP +// after transformation: +// expected_size - FP + SP == 0 +// Destroys Register expected_size if no tmp register is passed. +void MacroAssembler::asm_assert_frame_size(Register expected_size, Register tmp, const char* msg, int id) { + if (tmp == noreg) { + tmp = expected_size; + } else { + if (tmp != expected_size) { + z_lgr(tmp, expected_size); + } + z_algr(tmp, Z_SP); + z_slg(tmp, 0, Z_R0, Z_SP); + asm_assert_eq(msg, id); + } +} +#endif // !PRODUCT + +void MacroAssembler::verify_thread() { + if (VerifyThread) { + unimplemented("", 117); + } +} + +// Plausibility check for oops. +void MacroAssembler::verify_oop(Register oop, const char* msg) { + if (!VerifyOops) return; + + BLOCK_COMMENT("verify_oop {"); + Register tmp = Z_R0; + unsigned int nbytes_save = 6 *8; + address entry = StubRoutines::verify_oop_subroutine_entry_address(); + save_return_pc(); + push_frame_abi160(nbytes_save); + z_stmg(Z_R0, Z_R5, 160, Z_SP); + + z_lgr(Z_ARG2, oop); + load_const(Z_ARG1, (address) msg); + load_const(Z_R1, entry); + z_lg(Z_R1, 0, Z_R1); + call_c(Z_R1); + + z_lmg(Z_R0, Z_R5, 160, Z_SP); + pop_frame(); + + restore_return_pc(); + BLOCK_COMMENT("} verify_oop "); +} + +const char* MacroAssembler::stop_types[] = { + "stop", + "untested", + "unimplemented", + "shouldnotreachhere" +}; + +static void stop_on_request(const char* tp, const char* msg) { + tty->print("Z assembly code requires stop: (%s) %s\n", tp, msg); + guarantee(false, "Z assembly code requires stop: %s", msg); +} + +void MacroAssembler::stop(int type, const char* msg, int id) { + BLOCK_COMMENT(err_msg("stop: %s {", msg)); + + // Setup arguments. + load_const(Z_ARG1, (void*) stop_types[type%stop_end]); + load_const(Z_ARG2, (void*) msg); + get_PC(Z_R14); // Following code pushes a frame without entering a new function. Use current pc as return address. + save_return_pc(); // Saves return pc Z_R14. + push_frame_abi160(0); + call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2); + // The plain disassembler does not recognize illtrap. It instead displays + // a 32-bit value. Issueing two illtraps assures the disassembler finds + // the proper beginning of the next instruction. + z_illtrap(); // Illegal instruction. + z_illtrap(); // Illegal instruction. + + BLOCK_COMMENT(" } stop"); +} + +// Special version of stop() for code size reduction. +// Reuses the previously generated call sequence, if any. +// Generates the call sequence on its own, if necessary. +// Note: This code will work only in non-relocatable code! +// The relative address of the data elements (arg1, arg2) must not change. +// The reentry point must not move relative to it's users. This prerequisite +// should be given for "hand-written" code, if all chain calls are in the same code blob. +// Generated code must not undergo any transformation, e.g. ShortenBranches, to be safe. +address MacroAssembler::stop_chain(address reentry, int type, const char* msg, int id, bool allow_relocation) { + BLOCK_COMMENT(err_msg("stop_chain(%s,%s): %s {", reentry==NULL?"init":"cont", allow_relocation?"reloc ":"static", msg)); + + // Setup arguments. + if (allow_relocation) { + // Relocatable version (for comparison purposes). Remove after some time. + load_const(Z_ARG1, (void*) stop_types[type%stop_end]); + load_const(Z_ARG2, (void*) msg); + } else { + load_absolute_address(Z_ARG1, (address)stop_types[type%stop_end]); + load_absolute_address(Z_ARG2, (address)msg); + } + if ((reentry != NULL) && RelAddr::is_in_range_of_RelAddr16(reentry, pc())) { + BLOCK_COMMENT("branch to reentry point:"); + z_brc(bcondAlways, reentry); + } else { + BLOCK_COMMENT("reentry point:"); + reentry = pc(); // Re-entry point for subsequent stop calls. + save_return_pc(); // Saves return pc Z_R14. + push_frame_abi160(0); + if (allow_relocation) { + reentry = NULL; // Prevent reentry if code relocation is allowed. + call_VM_leaf(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2); + } else { + call_VM_leaf_static(CAST_FROM_FN_PTR(address, stop_on_request), Z_ARG1, Z_ARG2); + } + z_illtrap(); // Illegal instruction as emergency stop, should the above call return. + } + BLOCK_COMMENT(" } stop_chain"); + + return reentry; +} + +// Special version of stop() for code size reduction. +// Assumes constant relative addresses for data and runtime call. +void MacroAssembler::stop_static(int type, const char* msg, int id) { + stop_chain(NULL, type, msg, id, false); +} + +void MacroAssembler::stop_subroutine() { + unimplemented("stop_subroutine", 710); +} + +// Prints msg to stdout from within generated code.. +void MacroAssembler::warn(const char* msg) { + RegisterSaver::save_live_registers(this, RegisterSaver::all_registers, Z_R14); + load_absolute_address(Z_R1, (address) warning); + load_absolute_address(Z_ARG1, (address) msg); + (void) call(Z_R1); + RegisterSaver::restore_live_registers(this, RegisterSaver::all_registers); +} + +#ifndef PRODUCT + +// Write pattern 0x0101010101010101 in region [low-before, high+after]. +void MacroAssembler::zap_from_to(Register low, Register high, Register val, Register addr, int before, int after) { + if (!ZapEmptyStackFields) return; + BLOCK_COMMENT("zap memory region {"); + load_const_optimized(val, 0x0101010101010101); + int size = before + after; + if (low == high && size < 5 && size > 0) { + int offset = -before*BytesPerWord; + for (int i = 0; i < size; ++i) { + z_stg(val, Address(low, offset)); + offset +=(1*BytesPerWord); + } + } else { + add2reg(addr, -before*BytesPerWord, low); + if (after) { +#ifdef ASSERT + jlong check = after * BytesPerWord; + assert(Immediate::is_simm32(check) && Immediate::is_simm32(-check), "value not encodable !"); +#endif + add2reg(high, after * BytesPerWord); + } + NearLabel loop; + bind(loop); + z_stg(val, Address(addr)); + add2reg(addr, 8); + compare64_and_branch(addr, high, bcondNotHigh, loop); + if (after) { + add2reg(high, -after * BytesPerWord); + } + } + BLOCK_COMMENT("} zap memory region"); +} +#endif // !PRODUCT + +SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value, Register _rscratch) { + _masm = masm; + _masm->load_absolute_address(_rscratch, (address)flag_addr); + _masm->load_and_test_int(_rscratch, Address(_rscratch)); + if (value) { + _masm->z_brne(_label); // Skip if true, i.e. != 0. + } else { + _masm->z_bre(_label); // Skip if false, i.e. == 0. + } +} + +SkipIfEqual::~SkipIfEqual() { + _masm->bind(_label); +} diff --git a/hotspot/src/cpu/s390/vm/macroAssembler_s390.hpp b/hotspot/src/cpu/s390/vm/macroAssembler_s390.hpp new file mode 100644 index 00000000000..73a981863bb --- /dev/null +++ b/hotspot/src/cpu/s390/vm/macroAssembler_s390.hpp @@ -0,0 +1,1073 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_MACROASSEMBLER_S390_HPP +#define CPU_S390_VM_MACROASSEMBLER_S390_HPP + +#include "asm/assembler.hpp" + +#define MODERN_IFUN(name) ((void (MacroAssembler::*)(Register, int64_t, Register, Register))&MacroAssembler::name) +#define CLASSIC_IFUN(name) ((void (MacroAssembler::*)(Register, int64_t, Register, Register))&MacroAssembler::name) +#define MODERN_FFUN(name) ((void (MacroAssembler::*)(FloatRegister, int64_t, Register, Register))&MacroAssembler::name) +#define CLASSIC_FFUN(name) ((void (MacroAssembler::*)(FloatRegister, int64_t, Register, Register))&MacroAssembler::name) + +class MacroAssembler: public Assembler { + public: + MacroAssembler(CodeBuffer* code) : Assembler(code) {} + + // + // Optimized instruction emitters + // + + // Move register if destination register and target register are different. + void lr_if_needed(Register rd, Register rs); + void lgr_if_needed(Register rd, Register rs); + void llgfr_if_needed(Register rd, Register rs); + void ldr_if_needed(FloatRegister rd, FloatRegister rs); + + void move_reg_if_needed(Register dest, BasicType dest_type, Register src, BasicType src_type); + void move_freg_if_needed(FloatRegister dest, BasicType dest_type, FloatRegister src, BasicType src_type); + + void freg2mem_opt(FloatRegister reg, + int64_t disp, + Register index, + Register base, + void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register), + void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register), + Register scratch = Z_R0); + void freg2mem_opt(FloatRegister reg, + const Address &a, bool is_double = true); + + void mem2freg_opt(FloatRegister reg, + int64_t disp, + Register index, + Register base, + void (MacroAssembler::*modern) (FloatRegister, int64_t, Register, Register), + void (MacroAssembler::*classic)(FloatRegister, int64_t, Register, Register), + Register scratch = Z_R0); + void mem2freg_opt(FloatRegister reg, + const Address &a, bool is_double = true); + + void reg2mem_opt(Register reg, + int64_t disp, + Register index, + Register base, + void (MacroAssembler::*modern) (Register, int64_t, Register, Register), + void (MacroAssembler::*classic)(Register, int64_t, Register, Register), + Register scratch = Z_R0); + // returns offset of the store instruction + int reg2mem_opt(Register reg, const Address &a, bool is_double = true); + + void mem2reg_opt(Register reg, + int64_t disp, + Register index, + Register base, + void (MacroAssembler::*modern) (Register, int64_t, Register, Register), + void (MacroAssembler::*classic)(Register, int64_t, Register, Register)); + void mem2reg_opt(Register reg, const Address &a, bool is_double = true); + void mem2reg_signed_opt(Register reg, const Address &a); + + // AND immediate and set condition code, works for 64 bit immediates/operation as well. + void and_imm(Register r, long mask, Register tmp = Z_R0, bool wide = false); + + // 1's complement, 32bit or 64bit. Optimized to exploit distinct operands facility. + // Note: The condition code is neither preserved nor correctly set by this code!!! + // Note: (wide == false) does not protect the high order half of the target register + // from alternation. It only serves as optimization hint for 32-bit results. + void not_(Register r1, Register r2 = noreg, bool wide = false); // r1 = ~r2 + + // Expanded support of all "rotate_then_" instructions. + // + // Generalize and centralize rotate_then_ emitter. + // Functional description. For details, see Principles of Operation, Chapter 7, "Rotate Then Insert..." + // - Bits in a register are numbered left (most significant) to right (least significant), i.e. [0..63]. + // - Bytes in a register are numbered left (most significant) to right (least significant), i.e. [0..7]. + // - Register src is rotated to the left by (nRotate&0x3f) positions. + // - Negative values for nRotate result in a rotation to the right by abs(nRotate) positions. + // - The bits in positions [lBitPos..rBitPos] of the _ROTATED_ src operand take part in the + // logical operation performed on the contents (in those positions) of the dst operand. + // - The logical operation that is performed on the dst operand is one of + // o insert the selected bits (replacing the original contents of those bit positions) + // o and the selected bits with the corresponding bits of the dst operand + // o or the selected bits with the corresponding bits of the dst operand + // o xor the selected bits with the corresponding bits of the dst operand + // - For clear_dst == true, the destination register is cleared before the bits are inserted. + // For clear_dst == false, only the bit positions that get data inserted from src + // are changed. All other bit positions remain unchanged. + // - For test_only == true, the result of the logicalOP is only used to set the condition code, dst remains unchanged. + // For test_only == false, the result of the logicalOP replaces the selected bits of dst. + // - src32bit and dst32bit indicate the respective register is used as 32bit value only. + // Knowledge can simplify code generation. + // + // Here is an important performance note, valid for all s except "insert": + // Due to the too complex nature of the operation, it cannot be done in a single cycle. + // Timing constraints require the instructions to be cracked into two micro-ops, taking + // one or two cycles each to execute. In some cases, an additional pipeline bubble might get added. + // Macroscopically, that makes up for a three- or four-cycle instruction where you would + // expect just a single cycle. + // It is thus not beneficial from a performance point of view to exploit those instructions. + // Other reasons (code compactness, register pressure, ...) might outweigh this penalty. + // + unsigned long create_mask(int lBitPos, int rBitPos); + void rotate_then_mask(Register dst, Register src, int lBitPos, int rBitPos, + int nRotate, bool src32bit, bool dst32bit, bool oneBits); + void rotate_then_insert(Register dst, Register src, int lBitPos, int rBitPos, int nRotate, + bool clear_dst); + void rotate_then_and(Register dst, Register src, int lBitPos, int rBitPos, int nRotate, + bool test_only); + void rotate_then_or(Register dst, Register src, int lBitPos, int rBitPos, int nRotate, + bool test_onlyt); + void rotate_then_xor(Register dst, Register src, int lBitPos, int rBitPos, int nRotate, + bool test_only); + + void add64(Register r1, RegisterOrConstant inc); + + // Helper function to multiply the 64bit contents of a register by a 16bit constant. + // The optimization tries to avoid the mghi instruction, since it uses the FPU for + // calculation and is thus rather slow. + // + // There is no handling for special cases, e.g. cval==0 or cval==1. + // + // Returns len of generated code block. + unsigned int mul_reg64_const16(Register rval, Register work, int cval); + + // Generic operation r1 := r2 + imm. + void add2reg(Register r1, int64_t imm, Register r2 = noreg); + // Generic operation r := b + x + d. + void add2reg_with_index(Register r, int64_t d, Register x, Register b = noreg); + + // Add2mem* methods for direct memory increment. + void add2mem_32(const Address &a, int64_t imm, Register tmp); + void add2mem_64(const Address &a, int64_t imm, Register tmp); + + // *((int8_t*)(dst)) |= imm8 + inline void or2mem_8(Address& dst, int64_t imm8); + + // Load values by size and signedness. + void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed); + void store_sized_value(Register src, Address dst, size_t size_in_bytes); + + // Load values with large offsets to base address. + private: + int split_largeoffset(int64_t si20_offset, Register tmp, bool fixed_codelen, bool accumulate); + public: + void load_long_largeoffset(Register t, int64_t si20, Register a, Register tmp); + void load_float_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp); + void load_double_largeoffset(FloatRegister t, int64_t si20, Register a, Register tmp); + + private: + long toc_distance(); + public: + void load_toc(Register Rtoc); + void load_long_pcrelative(Register Rdst, address dataLocation); + static int load_long_pcrelative_size() { return 6; } + void load_addr_pcrelative(Register Rdst, address dataLocation); + static int load_addr_pcrel_size() { return 6; } // Just a LARL. + + // Load a value from memory and test (set CC). + void load_and_test_byte (Register dst, const Address &a); + void load_and_test_short (Register dst, const Address &a); + void load_and_test_int (Register dst, const Address &a); + void load_and_test_int2long(Register dst, const Address &a); + void load_and_test_long (Register dst, const Address &a); + + // Test a bit in memory. Result is reflected in CC. + void testbit(const Address &a, unsigned int bit); + // Test a bit in a register. Result is reflected in CC. + void testbit(Register r, unsigned int bitPos); + + // Clear a register, i.e. load const zero into reg. Return len (in bytes) of + // generated instruction(s). + // whole_reg: Clear 64 bits if true, 32 bits otherwise. + // set_cc: Use instruction that sets the condition code, if true. + int clear_reg(Register r, bool whole_reg = true, bool set_cc = true); + +#ifdef ASSERT + int preset_reg(Register r, unsigned long pattern, int pattern_len); +#endif + + // Clear (store zeros) a small piece of memory. + // CAUTION: Do not use this for atomic memory clearing. Use store_const() instead. + // addr: Address descriptor of memory to clear. + // Index register will not be used! + // size: Number of bytes to clear. + void clear_mem(const Address& addr, unsigned size); + + // Move immediate values to memory. Currently supports 32 and 64 bit stores, + // but may be extended to 16 bit store operation, if needed. + // For details, see implementation in *.cpp file. + int store_const(const Address &dest, long imm, + unsigned int lm, unsigned int lc, + Register scratch = Z_R0); + inline int store_const(const Address &dest, long imm, + Register scratch = Z_R0, bool is_long = true); + + // Move/initialize arbitrarily large memory area. No check for destructive overlap. + // Being interruptible, these instructions need a retry-loop. + void move_long_ext(Register dst, Register src, unsigned int pad); + + void compare_long_ext(Register left, Register right, unsigned int pad); + void compare_long_uni(Register left, Register right, unsigned int pad); + + void search_string(Register end, Register start); + void search_string_uni(Register end, Register start); + + // Translate instructions + // Being interruptible, these instructions need a retry-loop. + void translate_oo(Register dst, Register src, uint mask); + void translate_ot(Register dst, Register src, uint mask); + void translate_to(Register dst, Register src, uint mask); + void translate_tt(Register dst, Register src, uint mask); + + // Crypto instructions. + // Being interruptible, these instructions need a retry-loop. + void cksm(Register crcBuff, Register srcBuff); + void km( Register dstBuff, Register srcBuff); + void kmc(Register dstBuff, Register srcBuff); + void kimd(Register srcBuff); + void klmd(Register srcBuff); + void kmac(Register srcBuff); + + // nop padding + void align(int modulus); + void align_address(int modulus); + + // + // Constants, loading constants, TOC support + // + // Safepoint check factored out. + void generate_safepoint_check(Label& slow_path, Register scratch = noreg, bool may_relocate = true); + + // Load generic address: d <- base(a) + index(a) + disp(a). + inline void load_address(Register d, const Address &a); + // Load absolute address (and try to optimize). + void load_absolute_address(Register d, address addr); + + // Address of Z_ARG1 and argument_offset. + // If temp_reg == arg_slot, arg_slot will be overwritten. + Address argument_address(RegisterOrConstant arg_slot, + Register temp_reg = noreg, + int64_t extra_slot_offset = 0); + + // Load a narrow ptr constant (oop or klass ptr). + void load_narrow_oop( Register t, narrowOop a); + void load_narrow_klass(Register t, Klass* k); + + static bool is_load_const_32to64(address pos); + static bool is_load_narrow_oop(address pos) { return is_load_const_32to64(pos); } + static bool is_load_narrow_klass(address pos) { return is_load_const_32to64(pos); } + + static int load_const_32to64_size() { return 6; } + static bool load_narrow_oop_size() { return load_const_32to64_size(); } + static bool load_narrow_klass_size() { return load_const_32to64_size(); } + + static int patch_load_const_32to64(address pos, int64_t a); + static int patch_load_narrow_oop(address pos, oop o); + static int patch_load_narrow_klass(address pos, Klass* k); + + // cOops. CLFI exploit. + void compare_immediate_narrow_oop(Register oop1, narrowOop oop2); + void compare_immediate_narrow_klass(Register op1, Klass* op2); + static bool is_compare_immediate32(address pos); + static bool is_compare_immediate_narrow_oop(address pos); + static bool is_compare_immediate_narrow_klass(address pos); + static int compare_immediate_narrow_size() { return 6; } + static int compare_immediate_narrow_oop_size() { return compare_immediate_narrow_size(); } + static int compare_immediate_narrow_klass_size() { return compare_immediate_narrow_size(); } + static int patch_compare_immediate_32(address pos, int64_t a); + static int patch_compare_immediate_narrow_oop(address pos, oop o); + static int patch_compare_immediate_narrow_klass(address pos, Klass* k); + + // Load a 32bit constant into a 64bit register. + void load_const_32to64(Register t, int64_t x, bool sign_extend=true); + // Load a 64 bit constant. + void load_const(Register t, long a); + inline void load_const(Register t, void* a); + inline void load_const(Register t, Label& L); + inline void load_const(Register t, const AddressLiteral& a); + // Get the 64 bit constant from a `load_const' sequence. + static long get_const(address load_const); + // Patch the 64 bit constant of a `load_const' sequence. This is a low level + // procedure. It neither flushes the instruction cache nor is it atomic. + static void patch_const(address load_const, long x); + static int load_const_size() { return 12; } + + // Turn a char into boolean. NOTE: destroys r. + void c2bool(Register r, Register t = Z_R0); + + // Optimized version of load_const for constants that do not need to be + // loaded by a sequence of instructions of fixed length and that do not + // need to be patched. + int load_const_optimized_rtn_len(Register t, long x, bool emit); + inline void load_const_optimized(Register t, long x); + inline void load_const_optimized(Register t, void* a); + inline void load_const_optimized(Register t, Label& L); + inline void load_const_optimized(Register t, const AddressLiteral& a); + + public: + + //---------------------------------------------------------- + // oops in code ------------- + // including compressed oops support ------------- + //---------------------------------------------------------- + + // Metadata in code that we have to keep track of. + AddressLiteral allocate_metadata_address(Metadata* obj); // allocate_index + AddressLiteral constant_metadata_address(Metadata* obj); // find_index + + // allocate_index + AddressLiteral allocate_oop_address(jobject obj); + // find_index + AddressLiteral constant_oop_address(jobject obj); + // Uses allocate_oop_address. + inline void set_oop (jobject obj, Register d); + // Uses constant_oop_address. + inline void set_oop_constant(jobject obj, Register d); + // Uses constant_metadata_address. + inline bool set_metadata_constant(Metadata* md, Register d); + + virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset); + // + // branch, jump + // + + // Use one generic function for all branch patches. + static unsigned long patched_branch(address dest_pos, unsigned long inst, address inst_pos); + + void pd_patch_instruction(address branch, address target); + + // Extract relative address from "relative" instructions. + static long get_pcrel_offset(unsigned long inst); + static long get_pcrel_offset(address pc); + static address get_target_addr_pcrel(address pc); + + static inline bool is_call_pcrelative_short(unsigned long inst); + static inline bool is_call_pcrelative_long(unsigned long inst); + static inline bool is_branch_pcrelative_short(unsigned long inst); + static inline bool is_branch_pcrelative_long(unsigned long inst); + static inline bool is_compareandbranch_pcrelative_short(unsigned long inst); + static inline bool is_branchoncount_pcrelative_short(unsigned long inst); + static inline bool is_branchonindex32_pcrelative_short(unsigned long inst); + static inline bool is_branchonindex64_pcrelative_short(unsigned long inst); + static inline bool is_branchonindex_pcrelative_short(unsigned long inst); + static inline bool is_branch_pcrelative16(unsigned long inst); + static inline bool is_branch_pcrelative32(unsigned long inst); + static inline bool is_branch_pcrelative(unsigned long inst); + static inline bool is_load_pcrelative_long(unsigned long inst); + static inline bool is_misc_pcrelative_long(unsigned long inst); + static inline bool is_pcrelative_short(unsigned long inst); + static inline bool is_pcrelative_long(unsigned long inst); + // PCrelative TOC access. Variants with address argument. + static inline bool is_load_pcrelative_long(address iLoc); + static inline bool is_pcrelative_short(address iLoc); + static inline bool is_pcrelative_long(address iLoc); + + static inline bool is_pcrelative_instruction(address iloc); + static inline bool is_load_addr_pcrel(address a); + + static void patch_target_addr_pcrel(address pc, address con); + static void patch_addr_pcrel(address pc, address con) { + patch_target_addr_pcrel(pc, con); // Just delegate. This is only for nativeInst_s390.cpp. + } + + //--------------------------------------------------------- + // Some macros for more comfortable assembler programming. + //--------------------------------------------------------- + + // NOTE: pass NearLabel T to signal that the branch target T will be bound to a near address. + + void compare32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& target); + void compareU32_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& target); + void compare64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& target); + void compareU64_and_branch(Register r1, RegisterOrConstant x2, branch_condition cond, Label& target); + + void branch_optimized(Assembler::branch_condition cond, address branch_target); + void branch_optimized(Assembler::branch_condition cond, Label& branch_target); + void compare_and_branch_optimized(Register r1, + Register r2, + Assembler::branch_condition cond, + address branch_addr, + bool len64, + bool has_sign); + void compare_and_branch_optimized(Register r1, + jlong x2, + Assembler::branch_condition cond, + Label& branch_target, + bool len64, + bool has_sign); + void compare_and_branch_optimized(Register r1, + Register r2, + Assembler::branch_condition cond, + Label& branch_target, + bool len64, + bool has_sign); + + // + // Support for frame handling + // + // Specify the register that should be stored as the return pc in the + // current frame (default is R14). + inline void save_return_pc(Register pc = Z_R14); + inline void restore_return_pc(); + + // Get current PC. + address get_PC(Register result); + + // Get current PC + offset. Offset given in bytes, must be even! + address get_PC(Register result, int64_t offset); + + // Resize current frame either relatively wrt to current SP or absolute. + void resize_frame_sub(Register offset, Register fp, bool load_fp=true); + void resize_frame_absolute(Register addr, Register fp, bool load_fp=true); + void resize_frame(RegisterOrConstant offset, Register fp, bool load_fp=true); + + // Push a frame of size bytes, if copy_sp is false, old_sp must already + // contain a copy of Z_SP. + void push_frame(Register bytes, Register old_sp, bool copy_sp = true, bool bytes_with_inverted_sign = false); + + // Push a frame of size `bytes'. no abi space provided. + // Don't rely on register locking, instead pass a scratch register + // (Z_R0 by default). + // CAUTION! passing registers >= Z_R2 may produce bad results on + // old CPUs! + unsigned int push_frame(unsigned int bytes, Register scratch = Z_R0); + + // Push a frame of size `bytes' with abi160 on top. + unsigned int push_frame_abi160(unsigned int bytes); + + // Pop current C frame. + void pop_frame(); + + // + // Calls + // + + private: + address _last_calls_return_pc; + + public: + // Support for VM calls. This is the base routine called by the + // different versions of call_VM_leaf. The interpreter may customize + // this version by overriding it for its purposes (e.g., to + // save/restore additional registers when doing a VM call). + void call_VM_leaf_base(address entry_point); + void call_VM_leaf_base(address entry_point, bool allow_relocation); + + // It is imperative that all calls into the VM are handled via the + // call_VM macros. They make sure that the stack linkage is setup + // correctly. Call_VM's correspond to ENTRY/ENTRY_X entry points + // while call_VM_leaf's correspond to LEAF entry points. + // + // This is the base routine called by the different versions of + // call_VM. The interpreter may customize this version by overriding + // it for its purposes (e.g., to save/restore additional registers + // when doing a VM call). + + // If no last_java_sp is specified (noreg) then SP will be used instead. + + virtual void call_VM_base( + Register oop_result, // Where an oop-result ends up if any; use noreg otherwise. + Register last_java_sp, // To set up last_Java_frame in stubs; use noreg otherwise. + address entry_point, // The entry point. + bool check_exception); // Flag which indicates if exception should be checked. + virtual void call_VM_base( + Register oop_result, // Where an oop-result ends up if any; use noreg otherwise. + Register last_java_sp, // To set up last_Java_frame in stubs; use noreg otherwise. + address entry_point, // The entry point. + bool allow_relocation, // Flag to request generation of relocatable code. + bool check_exception); // Flag which indicates if exception should be checked. + + // Call into the VM. + // Passes the thread pointer (in Z_ARG1) as a prepended argument. + // Makes sure oop return values are visible to the GC. + void call_VM(Register oop_result, address entry_point, bool check_exceptions = true); + void call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions = true); + void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); + void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, + Register arg_3, bool check_exceptions = true); + + void call_VM_static(Register oop_result, address entry_point, bool check_exceptions = true); + void call_VM_static(Register oop_result, address entry_point, Register arg_1, Register arg_2, + Register arg_3, bool check_exceptions = true); + + // Overloaded with last_java_sp. + void call_VM(Register oop_result, Register last_java_sp, address entry_point, bool check_exceptions = true); + void call_VM(Register oop_result, Register last_java_sp, address entry_point, + Register arg_1, bool check_exceptions = true); + void call_VM(Register oop_result, Register last_java_sp, address entry_point, + Register arg_1, Register arg_2, bool check_exceptions = true); + void call_VM(Register oop_result, Register last_java_sp, address entry_point, + Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); + + void call_VM_leaf(address entry_point); + void call_VM_leaf(address entry_point, Register arg_1); + void call_VM_leaf(address entry_point, Register arg_1, Register arg_2); + void call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); + + // Really static VM leaf call (never patched). + void call_VM_leaf_static(address entry_point); + void call_VM_leaf_static(address entry_point, Register arg_1); + void call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2); + void call_VM_leaf_static(address entry_point, Register arg_1, Register arg_2, Register arg_3); + + // Call a C function via its function entry. Updates and returns _last_calls_return_pc. + inline address call(Register function_entry); + inline address call_c(Register function_entry); + address call_c(address function_entry); + // Variant for really static (non-relocatable) calls which are never patched. + address call_c_static(address function_entry); + // TOC or pc-relative call + emits a runtime_call relocation. + address call_c_opt(address function_entry); + + inline address call_stub(Register function_entry); + inline address call_stub(address function_entry); + + // Get the pc where the last call will return to. Returns _last_calls_return_pc. + inline address last_calls_return_pc(); + + private: + static bool is_call_far_patchable_variant0_at(address instruction_addr); // Dynamic TOC: load target addr from CP and call. + static bool is_call_far_patchable_variant2_at(address instruction_addr); // PC-relative call, prefixed with NOPs. + + + public: + bool call_far_patchable(address target, int64_t toc_offset); + static bool is_call_far_patchable_at(address inst_start); // All supported forms of patchable calls. + static bool is_call_far_patchable_pcrelative_at(address inst_start); // Pc-relative call with leading nops. + static bool is_call_far_pcrelative(address instruction_addr); // Pure far pc-relative call, with one leading size adjustment nop. + static void set_dest_of_call_far_patchable_at(address inst_start, address target, int64_t toc_offset); + static address get_dest_of_call_far_patchable_at(address inst_start, address toc_start); + + void align_call_far_patchable(address pc); + + // PCrelative TOC access. + + // This value is independent of code position - constant for the lifetime of the VM. + static int call_far_patchable_size() { + return load_const_from_toc_size() + call_byregister_size(); + } + + static int call_far_patchable_ret_addr_offset() { return call_far_patchable_size(); } + + static bool call_far_patchable_requires_alignment_nop(address pc) { + int size = call_far_patchable_size(); + return ((intptr_t)(pc + size) & 0x03L) != 0; + } + + // END OF PCrelative TOC access. + + static int jump_byregister_size() { return 2; } + static int jump_pcrelative_size() { return 4; } + static int jump_far_pcrelative_size() { return 6; } + static int call_byregister_size() { return 2; } + static int call_pcrelative_size() { return 4; } + static int call_far_pcrelative_size() { return 2 + 6; } // Prepend each BRASL with a nop. + static int call_far_pcrelative_size_raw() { return 6; } // Prepend each BRASL with a nop. + + // + // Java utilities + // + + // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. + // The implementation is only non-empty for the InterpreterMacroAssembler, + // as only the interpreter handles PopFrame and ForceEarlyReturn requests. + virtual void check_and_handle_popframe(Register java_thread); + virtual void check_and_handle_earlyret(Register java_thread); + + // Polling page support. + enum poll_mask { + mask_stackbang = 0xde, // 222 (dec) + mask_safepoint = 0x6f, // 111 (dec) + mask_profiling = 0xba // 186 (dec) + }; + + // Read from the polling page. + void load_from_polling_page(Register polling_page_address, int64_t offset = 0); + + // Check if given instruction is a read from the polling page + // as emitted by load_from_polling_page. + static bool is_load_from_polling_page(address instr_loc); + // Extract poll address from instruction and ucontext. + static address get_poll_address(address instr_loc, void* ucontext); + // Extract poll register from instruction. + static uint get_poll_register(address instr_loc); + + // Check if instruction is a write access to the memory serialization page + // realized by one of the instructions stw, stwu, stwx, or stwux. + static bool is_memory_serialization(int instruction, JavaThread* thread, void* ucontext); + + // Support for serializing memory accesses between threads. + void serialize_memory(Register thread, Register tmp1, Register tmp2); + + // Stack overflow checking + void bang_stack_with_offset(int offset); + + // Atomics + // -- none? + + void tlab_allocate(Register obj, // Result: pointer to object after successful allocation + Register var_size_in_bytes, // Object size in bytes if unknown at compile time; invalid otherwise. + int con_size_in_bytes, // Object size in bytes if known at compile time. + Register t1, // temp register + Label& slow_case); // Continuation point if fast allocation fails. + + // Emitter for interface method lookup. + // input: recv_klass, intf_klass, itable_index + // output: method_result + // kills: itable_index, temp1_reg, Z_R0, Z_R1 + void lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register temp1_reg, + Register temp2_reg, + Label& no_such_interface); + + // virtual method calling + void lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result); + + // Factor out code to call ic_miss_handler. + unsigned int call_ic_miss_handler(Label& ICM, int trapMarker, int requiredSize, Register scratch); + void nmethod_UEP(Label& ic_miss); + + // Emitters for "partial subtype" checks. + + // Test sub_klass against super_klass, with fast and slow paths. + + // The fast path produces a tri-state answer: yes / no / maybe-slow. + // One of the three labels can be NULL, meaning take the fall-through. + // If super_check_offset is -1, the value is loaded up from super_klass. + // No registers are killed, except temp_reg and temp2_reg. + // If super_check_offset is not -1, temp1_reg is not used and can be noreg. + void check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp1_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); + + // The rest of the type check; must be wired to a corresponding fast path. + // It does not repeat the fast path logic, so don't use it standalone. + // The temp_reg can be noreg, if no temps are available. + // It can also be sub_klass or super_klass, meaning it's OK to kill that one. + // Updates the sub's secondary super cache as necessary. + void check_klass_subtype_slow_path(Register Rsubklass, + Register Rsuperklas, + Register Rarray_ptr, // tmp + Register Rlength, // tmp + Label* L_success, + Label* L_failure); + + // Simplified, combined version, good for typical uses. + // Falls through on failure. + void check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp1_reg, + Register temp2_reg, + Label& L_success); + + // Increment a counter at counter_address when the eq condition code is set. + // Kills registers tmp1_reg and tmp2_reg and preserves the condition code. + void increment_counter_eq(address counter_address, Register tmp1_reg, Register tmp2_reg); + // Biased locking support + // Upon entry,obj_reg must contain the target object, and mark_reg + // must contain the target object's header. + // Destroys mark_reg if an attempt is made to bias an anonymously + // biased lock. In this case a failure will go either to the slow + // case or fall through with the notEqual condition code set with + // the expectation that the slow case in the runtime will be called. + // In the fall-through case where the CAS-based lock is done, + // mark_reg is not destroyed. + void biased_locking_enter(Register obj_reg, Register mark_reg, Register temp_reg, + Register temp2_reg, Label& done, Label* slow_case = NULL); + // Upon entry, the base register of mark_addr must contain the oop. + // Destroys temp_reg. + // If allow_delay_slot_filling is set to true, the next instruction + // emitted after this one will go in an annulled delay slot if the + // biased locking exit case failed. + void biased_locking_exit(Register mark_addr, Register temp_reg, Label& done); + + void compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias = UseBiasedLocking); + void compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2, bool try_bias = UseBiasedLocking); + + // Write to card table for modification at store_addr - register is destroyed afterwards. + void card_write_barrier_post(Register store_addr, Register tmp); + +#if INCLUDE_ALL_GCS + // General G1 pre-barrier generator. + // Purpose: record the previous value if it is not null. + // All non-tmps are preserved. + void g1_write_barrier_pre(Register Robj, + RegisterOrConstant offset, + Register Rpre_val, // Ideally, this is a non-volatile register. + Register Rval, // Will be preserved. + Register Rtmp1, // If Rpre_val is volatile, either Rtmp1 + Register Rtmp2, // or Rtmp2 has to be non-volatile. + bool pre_val_needed); // Save Rpre_val across runtime call, caller uses it. + + // General G1 post-barrier generator. + // Purpose: Store cross-region card. + void g1_write_barrier_post(Register Rstore_addr, + Register Rnew_val, + Register Rtmp1, + Register Rtmp2, + Register Rtmp3); +#endif // INCLUDE_ALL_GCS + + // Support for last Java frame (but use call_VM instead where possible). + private: + void set_last_Java_frame(Register last_Java_sp, Register last_Java_pc, bool allow_relocation); + void reset_last_Java_frame(bool allow_relocation); + void set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1, bool allow_relocation); + public: + inline void set_last_Java_frame(Register last_java_sp, Register last_Java_pc); + inline void set_last_Java_frame_static(Register last_java_sp, Register last_Java_pc); + inline void reset_last_Java_frame(void); + inline void reset_last_Java_frame_static(void); + inline void set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1); + inline void set_top_ijava_frame_at_SP_as_last_Java_frame_static(Register sp, Register tmp1); + + void set_thread_state(JavaThreadState new_state); + + // Read vm result from thread. + void get_vm_result (Register oop_result); + void get_vm_result_2(Register result); + + // Vm result is currently getting hijacked to for oop preservation. + void set_vm_result(Register oop_result); + + // Support for NULL-checks + // + // Generates code that causes a NULL OS exception if the content of reg is NULL. + // If the accessed location is M[reg + offset] and the offset is known, provide the + // offset. No explicit code generation is needed if the offset is within a certain + // range (0 <= offset <= page_size). + // + // %%%%%% Currently not done for z/Architecture + + void null_check(Register reg, Register tmp = Z_R0, int64_t offset = -1); + static bool needs_explicit_null_check(intptr_t offset); // Implemented in shared file ?! + + // Klass oop manipulations if compressed. + void encode_klass_not_null(Register dst, Register src = noreg); + void decode_klass_not_null(Register dst, Register src); + void decode_klass_not_null(Register dst); + void load_klass(Register klass, Address mem); + void load_klass(Register klass, Register src_oop); + void load_prototype_header(Register Rheader, Register Rsrc_oop); + void store_klass(Register klass, Register dst_oop, Register ck = noreg); // Klass will get compressed if ck not provided. + void store_klass_gap(Register s, Register dst_oop); + + // This function calculates the size of the code generated by + // decode_klass_not_null(register dst) + // when (Universe::heap() != NULL). Hence, if the instructions + // it generates change, then this method needs to be updated. + static int instr_size_for_decode_klass_not_null(); + + void encode_heap_oop(Register oop); + void encode_heap_oop_not_null(Register oop); + + static int get_oop_base_pow2_offset(uint64_t oop_base); + int get_oop_base(Register Rbase, uint64_t oop_base); + int get_oop_base_complement(Register Rbase, uint64_t oop_base); + void compare_heap_oop(Register Rop1, Address mem, bool maybeNULL); + void compare_klass_ptr(Register Rop1, int64_t disp, Register Rbase, bool maybeNULL); + void load_heap_oop(Register dest, const Address &a); + void load_heap_oop(Register d, int64_t si16, Register s1); + void load_heap_oop_not_null(Register d, int64_t si16, Register s1); + void store_heap_oop(Register Roop, RegisterOrConstant offset, Register base); + void store_heap_oop_not_null(Register Roop, RegisterOrConstant offset, Register base); + void store_heap_oop_null(Register zero, RegisterOrConstant offset, Register base); + void oop_encoder(Register Rdst, Register Rsrc, bool maybeNULL, + Register Rbase = Z_R1, int pow2_offset = -1, bool only32bitValid = false); + void oop_decoder(Register Rdst, Register Rsrc, bool maybeNULL, + Register Rbase = Z_R1, int pow2_offset = -1); + + void load_mirror(Register mirror, Register method); + + //-------------------------- + //--- perations on arrays. + //-------------------------- + unsigned int Clear_Array(Register cnt_arg, Register base_pointer_arg, Register src_addr, Register src_len); + unsigned int Clear_Array_Const(long cnt, Register base); + unsigned int Clear_Array_Const_Big(long cnt, Register base_pointer_arg, Register src_addr, Register src_len); + unsigned int CopyRawMemory_AlignedDisjoint(Register src_reg, Register dst_reg, + Register cnt_reg, + Register tmp1_reg, Register tmp2_reg); + + //------------------------------------------- + // Special String Intrinsics Implementation. + //------------------------------------------- + // Intrinsics for CompactStrings + // Compress char[] to byte[]. odd_reg contains cnt. tmp3 is only needed for precise behavior in failure case. Kills dst. + unsigned int string_compress(Register result, Register src, Register dst, Register odd_reg, + Register even_reg, Register tmp, Register tmp2 = noreg); + + // Kills src. + unsigned int has_negatives(Register result, Register src, Register cnt, + Register odd_reg, Register even_reg, Register tmp); + + // Inflate byte[] to char[]. + unsigned int string_inflate_trot(Register src, Register dst, Register cnt, Register tmp); + // Odd_reg contains cnt. Kills src. + unsigned int string_inflate(Register src, Register dst, Register odd_reg, + Register even_reg, Register tmp); + + unsigned int string_compare(Register str1, Register str2, Register cnt1, Register cnt2, + Register odd_reg, Register even_reg, Register result, int ae); + + unsigned int array_equals(bool is_array_equ, Register ary1, Register ary2, Register limit, + Register odd_reg, Register even_reg, Register result, bool is_byte); + + unsigned int string_indexof(Register result, Register haystack, Register haycnt, + Register needle, Register needlecnt, int needlecntval, + Register odd_reg, Register even_reg, int ae); + + unsigned int string_indexof_char(Register result, Register haystack, Register haycnt, + Register needle, jchar needleChar, Register odd_reg, Register even_reg, bool is_byte); + + // Emit an oop const to the constant pool and set a relocation info + // with address current_pc. Return the TOC offset of the constant. + int store_const_in_toc(AddressLiteral& val); + int store_oop_in_toc(AddressLiteral& oop); + // Emit an oop const to the constant pool via store_oop_in_toc, or + // emit a scalar const to the constant pool via store_const_in_toc, + // and load the constant into register dst. + bool load_const_from_toc(Register dst, AddressLiteral& a, Register Rtoc = noreg); + // Get CPU version dependent size of load_const sequence. + // The returned value is valid only for code sequences + // generated by load_const, not load_const_optimized. + static int load_const_from_toc_size() { + return load_long_pcrelative_size(); + } + bool load_oop_from_toc(Register dst, AddressLiteral& a, Register Rtoc = noreg); + static intptr_t get_const_from_toc(address pc); + static void set_const_in_toc(address pc, unsigned long new_data, CodeBlob *cb); + + // Dynamic TOC. + static bool is_load_const(address a); + static bool is_load_const_from_toc_pcrelative(address a); + static bool is_load_const_from_toc(address a) { return is_load_const_from_toc_pcrelative(a); } + + // PCrelative TOC access. + static bool is_call_byregister(address a) { return is_z_basr(*(short*)a); } + static bool is_load_const_from_toc_call(address a); + static bool is_load_const_call(address a); + static int load_const_call_size() { return load_const_size() + call_byregister_size(); } + static int load_const_from_toc_call_size() { return load_const_from_toc_size() + call_byregister_size(); } + // Offset is +/- 2**32 -> use long. + static long get_load_const_from_toc_offset(address a); + + + void generate_type_profiling(const Register Rdata, + const Register Rreceiver_klass, + const Register Rwanted_receiver_klass, + const Register Rmatching_row, + bool is_virtual_call); + + // Bit operations for single register operands. + inline void lshift(Register r, int places, bool doubl = true); // << + inline void rshift(Register r, int places, bool doubl = true); // >> + + // + // Debugging + // + + // Assert on CC (condition code in CPU state). + void asm_assert(bool check_equal, const char* msg, int id) PRODUCT_RETURN; + void asm_assert_low(const char *msg, int id) PRODUCT_RETURN; + void asm_assert_high(const char *msg, int id) PRODUCT_RETURN; + void asm_assert_eq(const char* msg, int id) { asm_assert(true, msg, id); } + void asm_assert_ne(const char* msg, int id) { asm_assert(false, msg, id); } + + void asm_assert_static(bool check_equal, const char* msg, int id) PRODUCT_RETURN; + + private: + // Emit assertions. + void asm_assert_mems_zero(bool check_equal, bool allow_relocation, int size, int64_t mem_offset, + Register mem_base, const char* msg, int id) PRODUCT_RETURN; + + public: + inline void asm_assert_mem4_is_zero(int64_t mem_offset, Register mem_base, const char* msg, int id) { + asm_assert_mems_zero(true, true, 4, mem_offset, mem_base, msg, id); + } + inline void asm_assert_mem8_is_zero(int64_t mem_offset, Register mem_base, const char* msg, int id) { + asm_assert_mems_zero(true, true, 8, mem_offset, mem_base, msg, id); + } + inline void asm_assert_mem4_isnot_zero(int64_t mem_offset, Register mem_base, const char* msg, int id) { + asm_assert_mems_zero(false, true, 4, mem_offset, mem_base, msg, id); + } + inline void asm_assert_mem8_isnot_zero(int64_t mem_offset, Register mem_base, const char* msg, int id) { + asm_assert_mems_zero(false, true, 8, mem_offset, mem_base, msg, id); + } + + inline void asm_assert_mem4_is_zero_static(int64_t mem_offset, Register mem_base, const char* msg, int id) { + asm_assert_mems_zero(true, false, 4, mem_offset, mem_base, msg, id); + } + inline void asm_assert_mem8_is_zero_static(int64_t mem_offset, Register mem_base, const char* msg, int id) { + asm_assert_mems_zero(true, false, 8, mem_offset, mem_base, msg, id); + } + inline void asm_assert_mem4_isnot_zero_static(int64_t mem_offset, Register mem_base, const char* msg, int id) { + asm_assert_mems_zero(false, false, 4, mem_offset, mem_base, msg, id); + } + inline void asm_assert_mem8_isnot_zero_static(int64_t mem_offset, Register mem_base, const char* msg, int id) { + asm_assert_mems_zero(false, false, 8, mem_offset, mem_base, msg, id); + } + void asm_assert_frame_size(Register expected_size, Register tmp, const char* msg, int id) PRODUCT_RETURN; + + // Verify Z_thread contents. + void verify_thread(); + + // Only if +VerifyOops. + void verify_oop(Register reg, const char* s = "broken oop"); + + // TODO: verify_method and klass metadata (compare against vptr?). + void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} + void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line) {} + +#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) +#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) + + private: + // Generate printout in stop(). + static const char* stop_types[]; + enum { + stop_stop = 0, + stop_untested = 1, + stop_unimplemented = 2, + stop_shouldnotreachhere = 3, + stop_end = 4 + }; + // Prints msg and stops execution. + void stop(int type, const char* msg, int id = 0); + address stop_chain(address reentry, int type, const char* msg, int id, bool allow_relocation); // Non-relocateable code only!! + void stop_static(int type, const char* msg, int id); // Non-relocateable code only!! + + public: + + // Prints msg and stops. + address stop_chain( address reentry, const char* msg = "", int id = 0) { return stop_chain(reentry, stop_stop, msg, id, true); } + address stop_chain_static(address reentry, const char* msg = "", int id = 0) { return stop_chain(reentry, stop_stop, msg, id, false); } + void stop_static (const char* msg = "", int id = 0) { stop_static(stop_stop, msg, id); } + void stop (const char* msg = "", int id = 0) { stop(stop_stop, msg, id); } + void untested (const char* msg = "", int id = 0) { stop(stop_untested, msg, id); } + void unimplemented(const char* msg = "", int id = 0) { stop(stop_unimplemented, msg, id); } + void should_not_reach_here(const char* msg = "", int id = -1) { stop(stop_shouldnotreachhere, msg, id); } + + // Factor out part of stop into subroutine to save space. + void stop_subroutine(); + + // Prints msg, but don't stop. + void warn(const char* msg); + + //----------------------------- + //--- basic block tracing code + //----------------------------- + void trace_basic_block(uint i); + void init_basic_block_trace(); + // Number of bytes a basic block gets larger due to the tracing code macro (worst case). + // Currently, worst case is 48 bytes. 64 puts us securely on the safe side. + static int basic_blck_trace_blk_size_incr() { return 64; } + + // Write pattern 0x0101010101010101 in region [low-before, high+after]. + // Low and high may be the same registers. Before and after are + // the numbers of 8-byte words. + void zap_from_to(Register low, Register high, Register tmp1 = Z_R0, Register tmp2 = Z_R1, + int before = 0, int after = 0) PRODUCT_RETURN; + + // Emitters for CRC32 calculation. + private: + void fold_byte_crc32(Register crc, Register table, Register val, Register tmp); + void fold_8bit_crc32(Register crc, Register table, Register tmp); + void update_byteLoop_crc32(Register crc, Register buf, Register len, Register table, + Register data, bool invertCRC); + void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc, + Register t0, Register t1, Register t2, Register t3); + public: + void update_byte_crc32( Register crc, Register val, Register table); + void kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp); + void kernel_crc32_1byte(Register crc, Register buf, Register len, Register table, + Register t0, Register t1, Register t2, Register t3); + void kernel_crc32_1word(Register crc, Register buf, Register len, Register table, + Register t0, Register t1, Register t2, Register t3); + void kernel_crc32_2word(Register crc, Register buf, Register len, Register table, + Register t0, Register t1, Register t2, Register t3); + + // Emitters for BigInteger.multiplyToLen intrinsic + // note: length of result array (zlen) is passed on the stack + private: + void add2_with_carry(Register dest_hi, Register dest_lo, + Register src1, Register src2); + void multiply_64_x_64_loop(Register x, Register xstart, + Register x_xstart, + Register y, Register y_idx, Register z, + Register carry, Register product, + Register idx, Register kdx); + void multiply_add_128_x_128(Register x_xstart, Register y, Register z, + Register yz_idx, Register idx, + Register carry, Register product, int offset); + void multiply_128_x_128_loop(Register x_xstart, + Register y, Register z, + Register yz_idx, Register idx, + Register jdx, + Register carry, Register product, + Register carry2); + public: + void multiply_to_len(Register x, Register xlen, + Register y, Register ylen, + Register z, + Register tmp1, Register tmp2, + Register tmp3, Register tmp4, Register tmp5); +}; + +/** + * class SkipIfEqual: + * + * Instantiating this class will result in assembly code being output that will + * jump around any code emitted between the creation of the instance and it's + * automatic destruction at the end of a scope block, depending on the value of + * the flag passed to the constructor, which will be checked at run-time. + */ +class SkipIfEqual { + private: + MacroAssembler* _masm; + Label _label; + + public: + SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value, Register _rscratch); + ~SkipIfEqual(); +}; + +#ifdef ASSERT +// Return false (e.g. important for our impl. of virtual calls). +inline bool AbstractAssembler::pd_check_instruction_mark() { return false; } +#endif + +#endif // CPU_S390_VM_MACROASSEMBLER_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/macroAssembler_s390.inline.hpp b/hotspot/src/cpu/s390/vm/macroAssembler_s390.inline.hpp new file mode 100644 index 00000000000..4fca7129903 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/macroAssembler_s390.inline.hpp @@ -0,0 +1,314 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_MACROASSEMBLER_S390_INLINE_HPP +#define CPU_S390_VM_MACROASSEMBLER_S390_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" +#include "runtime/thread.hpp" + +// Simplified shift operations for single register operands, constant shift amount. +inline void MacroAssembler::lshift(Register r, int places, bool is_DW) { + if (is_DW) { + z_sllg(r, r, places); + } else { + z_sll(r, places); + } +} + +inline void MacroAssembler::rshift(Register r, int places, bool is_DW) { + if (is_DW) { + z_srlg(r, r, places); + } else { + z_srl(r, places); + } +} + +// *((int8_t*)(dst)) |= imm8 +inline void MacroAssembler::or2mem_8(Address& dst, int64_t imm8) { + if (Displacement::is_shortDisp(dst.disp())) { + z_oi(dst, imm8); + } else { + z_oiy(dst, imm8); + } +} + +inline int MacroAssembler::store_const(const Address &dest, long imm, Register scratch, bool is_long) { + unsigned int lm = is_long ? 8 : 4; + unsigned int lc = is_long ? 8 : 4; + return store_const(dest, imm, lm, lc, scratch); +} + +// Do not rely on add2reg* emitter. +// Depending on CmdLine switches and actual parameter values, +// the generated code may alter the condition code, which is counter-intuitive +// to the semantics of the "load address" (LA/LAY) instruction. +// Generic address loading d <- base(a) + index(a) + disp(a) +inline void MacroAssembler::load_address(Register d, const Address &a) { + if (Displacement::is_shortDisp(a.disp())) { + z_la(d, a.disp(), a.indexOrR0(), a.baseOrR0()); + } else if (Displacement::is_validDisp(a.disp())) { + z_lay(d, a.disp(), a.indexOrR0(), a.baseOrR0()); + } else { + guarantee(false, "displacement = " SIZE_FORMAT_HEX ", out of range for LA/LAY", a.disp()); + } +} + +inline void MacroAssembler::load_const(Register t, void* x) { + load_const(t, (long)x); +} + +// Load a 64 bit constant encoded by a `Label'. +// Works for bound as well as unbound labels. For unbound labels, the +// code will become patched as soon as the label gets bound. +inline void MacroAssembler::load_const(Register t, Label& L) { + load_const(t, target(L)); +} + +inline void MacroAssembler::load_const(Register t, const AddressLiteral& a) { + assert(t != Z_R0, "R0 not allowed"); + // First relocate (we don't change the offset in the RelocationHolder, + // just pass a.rspec()), then delegate to load_const(Register, long). + relocate(a.rspec()); + load_const(t, (long)a.value()); +} + +inline void MacroAssembler::load_const_optimized(Register t, long x) { + (void) load_const_optimized_rtn_len(t, x, true); +} + +inline void MacroAssembler::load_const_optimized(Register t, void* a) { + load_const_optimized(t, (long)a); +} + +inline void MacroAssembler::load_const_optimized(Register t, Label& L) { + load_const_optimized(t, target(L)); +} + +inline void MacroAssembler::load_const_optimized(Register t, const AddressLiteral& a) { + assert(t != Z_R0, "R0 not allowed"); + assert((relocInfo::relocType)a.rspec().reloc()->type() == relocInfo::none, + "cannot relocate optimized load_consts"); + load_const_optimized(t, a.value()); +} + +inline void MacroAssembler::set_oop(jobject obj, Register d) { + load_const(d, allocate_oop_address(obj)); +} + +inline void MacroAssembler::set_oop_constant(jobject obj, Register d) { + load_const(d, constant_oop_address(obj)); +} + +// Adds MetaData constant md to TOC and loads it from there. +// md is added to the oop_recorder, but no relocation is added. +inline bool MacroAssembler::set_metadata_constant(Metadata* md, Register d) { + AddressLiteral a = constant_metadata_address(md); + return load_const_from_toc(d, a, d); // Discards the relocation. +} + + +inline bool MacroAssembler::is_call_pcrelative_short(unsigned long inst) { + return is_equal(inst, BRAS_ZOPC); // off 16, len 16 +} + +inline bool MacroAssembler::is_call_pcrelative_long(unsigned long inst) { + return is_equal(inst, BRASL_ZOPC); // off 16, len 32 +} + +inline bool MacroAssembler::is_branch_pcrelative_short(unsigned long inst) { + // Branch relative, 16-bit offset. + return is_equal(inst, BRC_ZOPC); // off 16, len 16 +} + +inline bool MacroAssembler::is_branch_pcrelative_long(unsigned long inst) { + // Branch relative, 32-bit offset. + return is_equal(inst, BRCL_ZOPC); // off 16, len 32 +} + +inline bool MacroAssembler::is_compareandbranch_pcrelative_short(unsigned long inst) { + // Compare and branch relative, 16-bit offset. + return is_equal(inst, CRJ_ZOPC, CMPBRANCH_MASK) || is_equal(inst, CGRJ_ZOPC, CMPBRANCH_MASK) || + is_equal(inst, CIJ_ZOPC, CMPBRANCH_MASK) || is_equal(inst, CGIJ_ZOPC, CMPBRANCH_MASK) || + is_equal(inst, CLRJ_ZOPC, CMPBRANCH_MASK) || is_equal(inst, CLGRJ_ZOPC, CMPBRANCH_MASK) || + is_equal(inst, CLIJ_ZOPC, CMPBRANCH_MASK) || is_equal(inst, CLGIJ_ZOPC, CMPBRANCH_MASK); +} + +inline bool MacroAssembler::is_branchoncount_pcrelative_short(unsigned long inst) { + // Branch relative on count, 16-bit offset. + return is_equal(inst, BRCT_ZOPC) || is_equal(inst, BRCTG_ZOPC); // off 16, len 16 +} + +inline bool MacroAssembler::is_branchonindex32_pcrelative_short(unsigned long inst) { + // Branch relative on index (32bit), 16-bit offset. + return is_equal(inst, BRXH_ZOPC) || is_equal(inst, BRXLE_ZOPC); // off 16, len 16 +} + +inline bool MacroAssembler::is_branchonindex64_pcrelative_short(unsigned long inst) { + // Branch relative on index (64bit), 16-bit offset. + return is_equal(inst, BRXHG_ZOPC) || is_equal(inst, BRXLG_ZOPC); // off 16, len 16 +} + +inline bool MacroAssembler::is_branchonindex_pcrelative_short(unsigned long inst) { + return is_branchonindex32_pcrelative_short(inst) || + is_branchonindex64_pcrelative_short(inst); +} + +inline bool MacroAssembler::is_branch_pcrelative16(unsigned long inst) { + return is_branch_pcrelative_short(inst) || + is_compareandbranch_pcrelative_short(inst) || + is_branchoncount_pcrelative_short(inst) || + is_branchonindex_pcrelative_short(inst); +} + +inline bool MacroAssembler::is_branch_pcrelative32(unsigned long inst) { + return is_branch_pcrelative_long(inst); +} + +inline bool MacroAssembler::is_branch_pcrelative(unsigned long inst) { + return is_branch_pcrelative16(inst) || + is_branch_pcrelative32(inst); +} + +inline bool MacroAssembler::is_load_pcrelative_long(unsigned long inst) { + // Load relative, 32-bit offset. + return is_equal(inst, LRL_ZOPC, REL_LONG_MASK) || is_equal(inst, LGRL_ZOPC, REL_LONG_MASK); // off 16, len 32 +} + +inline bool MacroAssembler::is_misc_pcrelative_long(unsigned long inst) { + // Load address, execute relative, 32-bit offset. + return is_equal(inst, LARL_ZOPC, REL_LONG_MASK) || is_equal(inst, EXRL_ZOPC, REL_LONG_MASK); // off 16, len 32 +} + +inline bool MacroAssembler::is_pcrelative_short(unsigned long inst) { + return is_branch_pcrelative16(inst) || + is_call_pcrelative_short(inst); +} + +inline bool MacroAssembler::is_pcrelative_long(unsigned long inst) { + return is_branch_pcrelative32(inst) || + is_call_pcrelative_long(inst) || + is_load_pcrelative_long(inst) || + is_misc_pcrelative_long(inst); +} + +inline bool MacroAssembler::is_load_pcrelative_long(address iLoc) { + unsigned long inst; + unsigned int len = get_instruction(iLoc, &inst); + return (len == 6) && is_load_pcrelative_long(inst); +} + +inline bool MacroAssembler::is_pcrelative_short(address iLoc) { + unsigned long inst; + unsigned int len = get_instruction(iLoc, &inst); + return ((len == 4) || (len == 6)) && is_pcrelative_short(inst); +} + +inline bool MacroAssembler::is_pcrelative_long(address iLoc) { + unsigned long inst; + unsigned int len = get_instruction(iLoc, &inst); + return (len == 6) && is_pcrelative_long(inst); +} + +// Dynamic TOC. Test for any pc-relative instruction. +inline bool MacroAssembler::is_pcrelative_instruction(address iloc) { + unsigned long inst; + get_instruction(iloc, &inst); + return is_pcrelative_short(inst) || + is_pcrelative_long(inst); +} + +inline bool MacroAssembler::is_load_addr_pcrel(address a) { + return is_equal(a, LARL_ZOPC, LARL_MASK); +} + +// Save the return pc in the register that should be stored as the return pc +// in the current frame (default is R14). +inline void MacroAssembler::save_return_pc(Register pc) { + z_stg(pc, _z_abi16(return_pc), Z_SP); +} + +inline void MacroAssembler::restore_return_pc() { + z_lg(Z_R14, _z_abi16(return_pc), Z_SP); +} + +// Call a function with given entry. +inline address MacroAssembler::call(Register function_entry) { + assert(function_entry != Z_R0, "function_entry cannot be Z_R0"); + + Assembler::z_basr(Z_R14, function_entry); + _last_calls_return_pc = pc(); + + return _last_calls_return_pc; +} + +// Call a C function via a function entry. +inline address MacroAssembler::call_c(Register function_entry) { + return call(function_entry); +} + +// Call a stub function via a function descriptor, but don't save TOC before +// call, don't setup TOC and ENV for call, and don't restore TOC after call +inline address MacroAssembler::call_stub(Register function_entry) { + return call_c(function_entry); +} + +inline address MacroAssembler::call_stub(address function_entry) { + return call_c(function_entry); +} + +// Get the pc where the last emitted call will return to. +inline address MacroAssembler::last_calls_return_pc() { + return _last_calls_return_pc; +} + +inline void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc) { + set_last_Java_frame(last_Java_sp, last_Java_pc, true); +} + +inline void MacroAssembler::set_last_Java_frame_static(Register last_Java_sp, Register last_Java_pc) { + set_last_Java_frame(last_Java_sp, last_Java_pc, false); +} + +inline void MacroAssembler::reset_last_Java_frame(void) { + reset_last_Java_frame(true); +} + +inline void MacroAssembler::reset_last_Java_frame_static(void) { + reset_last_Java_frame(false); +} + +inline void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1) { + set_top_ijava_frame_at_SP_as_last_Java_frame(sp, tmp1, true); +} + +inline void MacroAssembler::set_top_ijava_frame_at_SP_as_last_Java_frame_static(Register sp, Register tmp1) { + set_top_ijava_frame_at_SP_as_last_Java_frame(sp, tmp1, true); +} + +#endif // CPU_S390_VM_MACROASSEMBLER_S390_INLINE_HPP diff --git a/hotspot/src/cpu/s390/vm/metaspaceShared_s390.cpp b/hotspot/src/cpu/s390/vm/metaspaceShared_s390.cpp new file mode 100644 index 00000000000..8e7feacf527 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/metaspaceShared_s390.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/codeBuffer.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "memory/metaspaceShared.hpp" + +// Generate the self-patching vtable method: +// +// This method will be called (as any other Klass virtual method) with +// the Klass itself as the first argument. Example: +// +// oop obj; +// int size = obj->klass()->klass_part()->oop_size(this); +// +// for which the virtual method call is Klass::oop_size();. +// +// The dummy method is called with the Klass object as the first +// operand, and an object as the second argument. +// + +//===================================================================== + +// All of the dummy methods in the vtable are essentially identical, +// differing only by an ordinal constant, and they bear no releationship +// to the original method which the caller intended. Also, there needs +// to be 'vtbl_list_size' instances of the vtable in order to +// differentiate between the 'vtable_list_size' original Klass objects. + +#undef __ +#define __ masm-> + +void MetaspaceShared::generate_vtable_methods(void** vtbl_list, + void** vtable, + char** md_top, + char* md_end, + char** mc_top, + char* mc_end) { + + intptr_t vtable_bytes = (num_virtuals * vtbl_list_size) * sizeof(void*); + *(intptr_t *)(*md_top) = vtable_bytes; + *md_top += sizeof(intptr_t); + void** dummy_vtable = (void**)*md_top; + *vtable = dummy_vtable; + *md_top += vtable_bytes; + + // Get ready to generate dummy methods. + + CodeBuffer cb((unsigned char*)*mc_top, mc_end - *mc_top); + MacroAssembler* masm = new MacroAssembler(&cb); + + __ unimplemented(); +} diff --git a/hotspot/src/cpu/s390/vm/methodHandles_s390.cpp b/hotspot/src/cpu/s390/vm/methodHandles_s390.cpp new file mode 100644 index 00000000000..e2f0d32cc8a --- /dev/null +++ b/hotspot/src/cpu/s390/vm/methodHandles_s390.cpp @@ -0,0 +1,635 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "classfile/javaClasses.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/allocation.inline.hpp" +#include "memory/resourceArea.hpp" +#include "prims/methodHandles.hpp" + +#ifdef PRODUCT +#define __ _masm-> +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)-> +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +// Workaround for C++ overloading nastiness on '0' for RegisterOrConstant. +static RegisterOrConstant constant(int value) { + return RegisterOrConstant(value); +} + +void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg, + Register temp_reg, Register temp2_reg) { + if (VerifyMethodHandles) { + verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), + temp_reg, temp2_reg, "MH argument is a Class"); + } + __ z_lg(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); +} + + +#ifdef ASSERT +static int check_nonzero(const char* xname, int x) { + assert(x != 0, "%s should be nonzero", xname); + return x; +} +#define NONZERO(x) check_nonzero(#x, x) +#else +#define NONZERO(x) (x) +#endif + +#ifdef ASSERT +void MethodHandles::verify_klass(MacroAssembler* _masm, + Register obj_reg, SystemDictionary::WKID klass_id, + Register temp_reg, Register temp2_reg, + const char* error_message) { + + InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id); + KlassHandle klass = SystemDictionary::well_known_klass(klass_id); + + assert(temp_reg != Z_R0 && // Is used as base register! + temp_reg != noreg && temp2_reg != noreg, "need valid registers!"); + + NearLabel L_ok, L_bad; + + BLOCK_COMMENT("verify_klass {"); + + __ verify_oop(obj_reg); + __ compareU64_and_branch(obj_reg, (intptr_t)0L, Assembler::bcondEqual, L_bad); + __ load_klass(temp_reg, obj_reg); + // klass_addr is a klass in allstatic SystemDictionaryHandles. Can't get GCed. + __ load_const_optimized(temp2_reg, (address)klass_addr); + __ z_lg(temp2_reg, Address(temp2_reg)); + __ compareU64_and_branch(temp_reg, temp2_reg, Assembler::bcondEqual, L_ok); + + intptr_t super_check_offset = klass->super_check_offset(); + __ z_lg(temp_reg, Address(temp_reg, super_check_offset)); + __ compareU64_and_branch(temp_reg, temp2_reg, Assembler::bcondEqual, L_ok); + __ BIND(L_bad); + __ stop(error_message); + __ BIND(L_ok); + + BLOCK_COMMENT("} verify_klass"); +} + +void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, + Register member_reg, Register temp ) { + NearLabel L; + BLOCK_COMMENT("verify_ref_kind {"); + + __ z_llgf(temp, + Address(member_reg, + NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes()))); + __ z_srl(temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT); + __ z_nilf(temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK); + __ compare32_and_branch(temp, constant(ref_kind), Assembler::bcondEqual, L); + + { + char *buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal); + + jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind); + if (ref_kind == JVM_REF_invokeVirtual || ref_kind == JVM_REF_invokeSpecial) { + // Could do this for all ref_kinds, but would explode assembly code size. + trace_method_handle(_masm, buf); + } + __ stop(buf); + } + + BLOCK_COMMENT("} verify_ref_kind"); + + __ bind(L); +} +#endif // ASSERT + +void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register target, + Register temp, bool for_compiler_entry) { + assert(method == Z_method, "interpreter calling convention"); + __ verify_method_ptr(method); + + assert(target != method, "don 't you kill the method reg!"); + + Label L_no_such_method; + + if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { + // JVMTI events, such as single-stepping, are implemented partly + // by avoiding running compiled code in threads for which the + // event is enabled. Check here for interp_only_mode if these + // events CAN be enabled. + __ verify_thread(); + + Label run_compiled_code; + + __ load_and_test_int(temp, Address(Z_thread, JavaThread::interp_only_mode_offset())); + __ z_bre(run_compiled_code); + + // Null method test is replicated below in compiled case, + // it might be able to address across the verify_thread(). + __ z_ltgr(temp, method); + __ z_bre(L_no_such_method); + + __ z_lg(target, Address(method, Method::interpreter_entry_offset())); + __ z_br(target); + + __ bind(run_compiled_code); + } + + // Compiled case, either static or fall-through from runtime conditional. + __ z_ltgr(temp, method); + __ z_bre(L_no_such_method); + + ByteSize offset = for_compiler_entry ? + Method::from_compiled_offset() : Method::from_interpreted_offset(); + Address method_from(method, offset); + + __ z_lg(target, method_from); + __ z_br(target); + + __ bind(L_no_such_method); + assert(StubRoutines::throw_AbstractMethodError_entry() != NULL, "not yet generated!"); + __ load_const_optimized(target, StubRoutines::throw_AbstractMethodError_entry()); + __ z_br(target); +} + +void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, Register temp3, + bool for_compiler_entry) { + + // This is the initial entry point of a lazy method handle. + // After type checking, it picks up the invoker from the LambdaForm. + assert_different_registers(recv, method_temp, temp2, temp3); + assert(method_temp == Z_method, "required register for loading method"); + + BLOCK_COMMENT("jump_to_lambda_form {"); + + // Load the invoker, as MH -> MH.form -> LF.vmentry + __ verify_oop(recv); + __ load_heap_oop(method_temp, + Address(recv, + NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes()))); + __ verify_oop(method_temp); + __ load_heap_oop(method_temp, + Address(method_temp, + NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes()))); + __ verify_oop(method_temp); + // The following assumes that a method is normally compressed in the vmtarget field. + __ z_lg(method_temp, + Address(method_temp, + NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()))); + + if (VerifyMethodHandles && !for_compiler_entry) { + // Make sure recv is already on stack. + NearLabel L; + Address paramSize(temp2, ConstMethod::size_of_parameters_offset()); + + __ z_lg(temp2, Address(method_temp, Method::const_offset())); + __ load_sized_value(temp2, paramSize, sizeof(u2), /*is_signed*/ false); + // if (temp2 != recv) stop + __ z_lg(temp2, __ argument_address(temp2, temp2, 0)); + __ compare64_and_branch(temp2, recv, Assembler::bcondEqual, L); + __ stop("receiver not on stack"); + __ BIND(L); + } + + jump_from_method_handle(_masm, method_temp, temp2, Z_R0, for_compiler_entry); + + BLOCK_COMMENT("} jump_to_lambda_form"); +} + +// code generation +address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, + vmIntrinsics::ID iid) { + const bool not_for_compiler_entry = false; // This is the interpreter entry. + assert(is_signature_polymorphic(iid), "expected invoke iid"); + + if (iid == vmIntrinsics::_invokeGeneric || iid == vmIntrinsics::_compiledLambdaForm) { + // Perhaps surprisingly, the symbolic references visible to Java + // are not directly used. They are linked to Java-generated + // adapters via MethodHandleNatives.linkMethod. They all allow an + // appendix argument. + __ should_not_reach_here(); // Empty stubs make SG sick. + return NULL; + } + + // Z_R10: sender SP (must preserve; see prepare_to_jump_from_interprted) + // Z_method: method + // Z_ARG1 (Gargs): incoming argument list (must preserve) + Register Z_R4_param_size = Z_R4; // size of parameters + address code_start = __ pc(); + + // Here is where control starts out: + __ align(CodeEntryAlignment); + + address entry_point = __ pc(); + + if (VerifyMethodHandles) { + Label L; + BLOCK_COMMENT("verify_intrinsic_id {"); + + // Supplement to 8139891: _intrinsic_id exceeded 1-byte size limit. + if (Method::intrinsic_id_size_in_bytes() == 1) { + __ z_cli(Address(Z_method, Method::intrinsic_id_offset_in_bytes()), (int)iid); + } else { + assert(Method::intrinsic_id_size_in_bytes() == 2, "size error: check Method::_intrinsic_id"); + __ z_lh(Z_R0_scratch, Address(Z_method, Method::intrinsic_id_offset_in_bytes())); + __ z_chi(Z_R0_scratch, (int)iid); + } + __ z_bre(L); + + if (iid == vmIntrinsics::_linkToVirtual || iid == vmIntrinsics::_linkToSpecial) { + // Could do this for all kinds, but would explode assembly code size. + trace_method_handle(_masm, "bad Method::intrinsic_id"); + } + + __ stop("bad Method::intrinsic_id"); + __ bind(L); + + BLOCK_COMMENT("} verify_intrinsic_id"); + } + + // First task: Find out how big the argument list is. + Address Z_R4_first_arg_addr; + int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); + + assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, + "must be _invokeBasic or a linkTo intrinsic"); + + if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { + Address paramSize(Z_R1_scratch, ConstMethod::size_of_parameters_offset()); + + __ z_lg(Z_R1_scratch, Address(Z_method, Method::const_offset())); + __ load_sized_value(Z_R4_param_size, paramSize, sizeof(u2), /*is_signed*/ false); + Z_R4_first_arg_addr = __ argument_address(Z_R4_param_size, Z_R4_param_size, 0); + } else { + DEBUG_ONLY(Z_R4_param_size = noreg); + } + + Register Z_mh = noreg; + if (!is_signature_polymorphic_static(iid)) { + Z_mh = Z_ARG4; + __ z_lg(Z_mh, Z_R4_first_arg_addr); + DEBUG_ONLY(Z_R4_param_size = noreg); + } + + // Z_R4_first_arg_addr is live! + + trace_method_handle_interpreter_entry(_masm, iid); + + if (iid == vmIntrinsics::_invokeBasic) { + __ pc(); // just for the block comment + generate_method_handle_dispatch(_masm, iid, Z_mh, noreg, not_for_compiler_entry); + } else { + // Adjust argument list by popping the trailing MemberName argument. + Register Z_recv = noreg; + + if (MethodHandles::ref_kind_has_receiver(ref_kind)) { + // Load the receiver (not the MH; the actual MemberName's receiver) + // up from the interpreter stack. + __ z_lg(Z_recv = Z_R5, Z_R4_first_arg_addr); + DEBUG_ONLY(Z_R4_param_size = noreg); + } + + Register Z_member = Z_method; // MemberName ptr; incoming method ptr is dead now + + __ z_lg(Z_member, __ argument_address(constant(1))); + __ add2reg(Z_esp, Interpreter::stackElementSize); + generate_method_handle_dispatch(_masm, iid, Z_recv, Z_member, not_for_compiler_entry); + } + + return entry_point; +} + +void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, + vmIntrinsics::ID iid, + Register receiver_reg, + Register member_reg, + bool for_compiler_entry) { + assert(is_signature_polymorphic(iid), "expected invoke iid"); + + Register temp1 = for_compiler_entry ? Z_R10 : Z_R6; + Register temp2 = Z_R12; + Register temp3 = Z_R11; + Register temp4 = Z_R13; + + if (for_compiler_entry) { + assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : Z_ARG1), + "only valid assignment"); + } + if (receiver_reg != noreg) { + assert_different_registers(temp1, temp2, temp3, temp4, receiver_reg); + } + if (member_reg != noreg) { + assert_different_registers(temp1, temp2, temp3, temp4, member_reg); + } + if (!for_compiler_entry) { // Don't trash last SP. + assert_different_registers(temp1, temp2, temp3, temp4, Z_R10); + } + + if (iid == vmIntrinsics::_invokeBasic) { + __ pc(); // Just for the block comment. + // Indirect through MH.form.vmentry.vmtarget. + jump_to_lambda_form(_masm, receiver_reg, Z_method, Z_R1, temp3, for_compiler_entry); + return; + } + + // The method is a member invoker used by direct method handles. + if (VerifyMethodHandles) { + // Make sure the trailing argument really is a MemberName (caller responsibility). + verify_klass(_masm, member_reg, + SystemDictionary::WK_KLASS_ENUM_NAME(MemberName_klass), + temp1, temp2, + "MemberName required for invokeVirtual etc."); + } + + Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); + Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); + Address member_vmtarget(member_reg, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())); + Register temp1_recv_klass = temp1; + + if (iid != vmIntrinsics::_linkToStatic) { + __ verify_oop(receiver_reg); + if (iid == vmIntrinsics::_linkToSpecial) { + // Don't actually load the klass; just null-check the receiver. + __ null_check(receiver_reg); + } else { + // Load receiver klass itself. + __ null_check(receiver_reg, Z_R0, oopDesc::klass_offset_in_bytes()); + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + BLOCK_COMMENT("check_receiver {"); + // The receiver for the MemberName must be in receiver_reg. + // Check the receiver against the MemberName.clazz. + if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { + // Did not load it above... + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + + if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { + NearLabel L_ok; + Register temp2_defc = temp2; + + __ load_heap_oop(temp2_defc, member_clazz); + load_klass_from_Class(_masm, temp2_defc, temp3, temp4); + __ verify_klass_ptr(temp2_defc); + __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, temp4, L_ok); + // If we get here, the type check failed! + __ stop("receiver class disagrees with MemberName.clazz"); + __ bind(L_ok); + } + BLOCK_COMMENT("} check_receiver"); + } + if (iid == vmIntrinsics::_linkToSpecial || iid == vmIntrinsics::_linkToStatic) { + DEBUG_ONLY(temp1_recv_klass = noreg); // These guys didn't load the recv_klass. + } + + // Live registers at this point: + // member_reg - MemberName that was the trailing argument. + // temp1_recv_klass - Klass of stacked receiver, if needed. + // Z_R10 - Interpreter linkage if interpreted. + + bool method_is_live = false; + + switch (iid) { + case vmIntrinsics::_linkToSpecial: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); + } + __ z_lg(Z_method, member_vmtarget); + method_is_live = true; + break; + + case vmIntrinsics::_linkToStatic: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); + } + __ z_lg(Z_method, member_vmtarget); + method_is_live = true; + break; + + case vmIntrinsics::_linkToVirtual: { + // Same as TemplateTable::invokevirtual, minus the CP setup and profiling. + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); + } + + // Pick out the vtable index from the MemberName, and then we can discard it. + Register temp2_index = temp2; + __ z_lg(temp2_index, member_vmindex); + + if (VerifyMethodHandles) { + // if (member_vmindex < 0) stop + NearLabel L_index_ok; + __ compare32_and_branch(temp2_index, constant(0), Assembler::bcondNotLow, L_index_ok); + __ stop("no virtual index"); + __ BIND(L_index_ok); + } + + // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget + // at this point. And VerifyMethodHandles has already checked clazz, if needed. + + // Get target method and entry point. + __ lookup_virtual_method(temp1_recv_klass, temp2_index, Z_method); + method_is_live = true; + break; + } + + case vmIntrinsics::_linkToInterface: { + // Same as TemplateTable::invokeinterface, minus the CP setup + // and profiling, with different argument motion. + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); + } + + Register temp3_intf = temp3; + + __ load_heap_oop(temp3_intf, member_clazz); + load_klass_from_Class(_masm, temp3_intf, temp2, temp4); + + Register Z_index = Z_method; + + __ z_lg(Z_index, member_vmindex); + + if (VerifyMethodHandles) { + NearLabel L; + // if (member_vmindex < 0) stop + __ compare32_and_branch(Z_index, constant(0), Assembler::bcondNotLow, L); + __ stop("invalid vtable index for MH.invokeInterface"); + __ bind(L); + } + + // Given interface, index, and recv klass, dispatch to the implementation method. + Label L_no_such_interface; + __ lookup_interface_method(temp1_recv_klass, temp3_intf, + // Note: next two args must be the same: + Z_index, Z_method, temp2, noreg, + L_no_such_interface); + jump_from_method_handle(_masm, Z_method, temp2, Z_R0, for_compiler_entry); + + __ bind(L_no_such_interface); + + // Throw exception. + __ load_const_optimized(Z_R1, StubRoutines::throw_IncompatibleClassChangeError_entry()); + __ z_br(Z_R1); + break; + } + + default: + fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)); + break; + } + + if (method_is_live) { + // Live at this point: Z_method, O5_savedSP (if interpreted). + + // After figuring out which concrete method to call, jump into it. + // Note that this works in the interpreter with no data motion. + // But the compiled version will require that rcx_recv be shifted out. + jump_from_method_handle(_masm, Z_method, temp1, Z_R0, for_compiler_entry); + } +} + +#ifndef PRODUCT +void trace_method_handle_stub(const char* adaptername, + oopDesc* mh, + intptr_t* sender_sp, + intptr_t* args, + intptr_t* tracing_fp) { + bool has_mh = (strstr(adaptername, "/static") == NULL && + strstr(adaptername, "linkTo") == NULL); // Static linkers don't have MH. + const char* mh_reg_name = has_mh ? "Z_R4_mh" : "Z_R4"; + tty->print_cr("MH %s %s=" INTPTR_FORMAT " sender_sp=" INTPTR_FORMAT " args=" INTPTR_FORMAT, + adaptername, mh_reg_name, + p2i(mh), p2i(sender_sp), p2i(args)); + + if (Verbose) { + // Dumping last frame with frame::describe. + + JavaThread* p = JavaThread::active(); + + ResourceMark rm; + PRESERVE_EXCEPTION_MARK; // May not be needed by safer and unexpensive here. + FrameValues values; + + // Note: We want to allow trace_method_handle from any call site. + // While trace_method_handle creates a frame, it may be entered + // without a valid return PC in Z_R14 (e.g. not just after a call). + // Walking that frame could lead to failures due to that invalid PC. + // => carefully detect that frame when doing the stack walking. + + // Walk up to the right frame using the "tracing_fp" argument. + frame cur_frame = os::current_frame(); // Current C frame. + + while (cur_frame.fp() != tracing_fp) { + cur_frame = os::get_sender_for_C_frame(&cur_frame); + } + + // Safely create a frame and call frame::describe. + intptr_t *dump_sp = cur_frame.sender_sp(); + intptr_t *dump_fp = cur_frame.link(); + + bool walkable = has_mh; // Whether the traced frame shoud be walkable. + + // The sender for cur_frame is the caller of trace_method_handle. + if (walkable) { + // The previous definition of walkable may have to be refined + // if new call sites cause the next frame constructor to start + // failing. Alternatively, frame constructors could be + // modified to support the current or future non walkable + // frames (but this is more intrusive and is not considered as + // part of this RFE, which will instead use a simpler output). + frame dump_frame = frame(dump_sp); + dump_frame.describe(values, 1); + } else { + // Robust dump for frames which cannot be constructed from sp/younger_sp + // Add descriptions without building a Java frame to avoid issues. + values.describe(-1, dump_fp, "fp for #1 "); + values.describe(-1, dump_sp, "sp"); + } + + bool has_args = has_mh; // Whether Z_esp is meaningful. + + // Mark args, if seems valid (may not be valid for some adapters). + if (has_args) { + if ((args >= dump_sp) && (args < dump_fp)) { + values.describe(-1, args, "*Z_esp"); + } + } + + // Note: the unextended_sp may not be correct. + tty->print_cr(" stack layout:"); + values.print(p); + if (has_mh && mh->is_oop()) { + mh->print(); + if (java_lang_invoke_MethodHandle::is_instance(mh)) { + if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0) { + java_lang_invoke_MethodHandle::form(mh)->print(); + } + } + } + } +} + +void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { + if (!TraceMethodHandles) { return; } + + BLOCK_COMMENT("trace_method_handle {"); + + // Save argument registers (they are used in raise exception stub). + __ z_stg(Z_ARG1, Address(Z_SP, 16)); + __ z_stg(Z_ARG2, Address(Z_SP, 24)); + __ z_stg(Z_ARG3, Address(Z_SP, 32)); + __ z_stg(Z_ARG4, Address(Z_SP, 40)); + __ z_stg(Z_ARG5, Address(Z_SP, 48)); + + // Setup arguments. + __ z_lgr(Z_ARG2, Z_ARG4); // mh, see generate_method_handle_interpreter_entry() + __ z_lgr(Z_ARG3, Z_R10); // sender_sp + __ z_lgr(Z_ARG4, Z_esp); + __ load_const_optimized(Z_ARG1, (void *)adaptername); + __ z_lgr(Z_ARG5, Z_SP); // tracing_fp + __ save_return_pc(); // saves Z_R14 + __ push_frame_abi160(0); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub)); + __ pop_frame(); + __ restore_return_pc(); // restores to Z_R14 + __ z_lg(Z_ARG1, Address(Z_SP, 16)); + __ z_lg(Z_ARG2, Address(Z_SP, 24)); + __ z_lg(Z_ARG3, Address(Z_SP, 32)); + __ z_lg(Z_ARG4, Address(Z_SP, 40)); + __ z_lg(Z_ARG5, Address(Z_SP, 45)); + __ zap_from_to(Z_SP, Z_SP, Z_R0, Z_R1, 50, -1); + __ zap_from_to(Z_SP, Z_SP, Z_R0, Z_R1, -1, 5); + + BLOCK_COMMENT("} trace_method_handle"); +} +#endif // !PRODUCT diff --git a/hotspot/src/cpu/s390/vm/methodHandles_s390.hpp b/hotspot/src/cpu/s390/vm/methodHandles_s390.hpp new file mode 100644 index 00000000000..806fed842a9 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/methodHandles_s390.hpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// Platform-specific definitions for method handles. +// These definitions are inlined into class MethodHandles. + + // Adapters + enum /* platform_dependent_constants */ { + adapter_code_size = NOT_LP64(23000 DEBUG_ONLY(+ 40000)) LP64_ONLY(35000 DEBUG_ONLY(+ 50000)) + }; + + // Additional helper methods for MethodHandles code generation: + public: + static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg, Register temp_reg, Register temp2_reg); + + static void verify_klass(MacroAssembler* _masm, + Register obj_reg, SystemDictionary::WKID klass_id, + Register temp_reg, Register temp2_reg, + const char* error_message = "wrong klass") NOT_DEBUG_RETURN; + + static void verify_method_handle(MacroAssembler* _masm, Register mh_reg, + Register temp_reg, Register temp2_reg) { + verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), + temp_reg, temp2_reg, + "reference is a MH"); + } + + static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; + + // Similar to InterpreterMacroAssembler::jump_from_interpreted. + // Takes care of special dispatch from single stepping too. + static void jump_from_method_handle(MacroAssembler* _masm, Register method, + Register temp, Register temp2, + bool for_compiler_entry); + + static void jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, Register temp3, + bool for_compiler_entry); diff --git a/hotspot/src/cpu/s390/vm/nativeInst_s390.cpp b/hotspot/src/cpu/s390/vm/nativeInst_s390.cpp new file mode 100644 index 00000000000..d84785b0793 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/nativeInst_s390.cpp @@ -0,0 +1,690 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// Major contributions by JL, LS + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_s390.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/ostream.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#endif + +#define LUCY_DBG + +//------------------------------------- +// N a t i v e I n s t r u c t i o n +//------------------------------------- + +// Define this switch to prevent identity updates. +// In high-concurrency scenarios, it is beneficial to prevent +// identity updates. It has a positive effect on cache line steals. +// and invalidations. +// Test runs of JVM98, JVM2008, and JBB2005 show a very low frequency +// of identity updates. Detection is therefore disabled. +#undef SUPPRESS_IDENTITY_UPDATE + +void NativeInstruction::verify() { + // Make sure code pattern is actually an instruction address. + // Do not allow: + // - NULL + // - any address in first page (0x0000 .. 0x0fff) + // - odd address (will cause a "specification exception") + address addr = addr_at(0); + if ((addr == 0) || (((unsigned long)addr & ~0x0fff) == 0) || ((intptr_t)addr & 1) != 0) { + tty->print_cr(INTPTR_FORMAT ": bad instruction address", p2i(addr)); + fatal("not an instruction address"); + } +} + +// Print location and value (hex representation) of current NativeInstruction +void NativeInstruction::print(const char* msg) const { + int len = Assembler::instr_len(addr_at(0)); + if (msg == NULL) { // Output line without trailing blanks. + switch (len) { + case 2: tty->print_cr(INTPTR_FORMAT "(len=%d): %4.4x", p2i(addr_at(0)), len, halfword_at(0)); break; + case 4: tty->print_cr(INTPTR_FORMAT "(len=%d): %4.4x %4.4x", p2i(addr_at(0)), len, halfword_at(0), halfword_at(2)); break; + case 6: tty->print_cr(INTPTR_FORMAT "(len=%d): %4.4x %4.4x %4.4x", p2i(addr_at(0)), len, halfword_at(0), halfword_at(2), halfword_at(4)); break; + default: // Never reached. instr_len() always returns one of the above values. Keep the compiler happy. + ShouldNotReachHere(); + break; + } + } else { // Output line with filler blanks to have msg aligned. + switch (len) { + case 2: tty->print_cr(INTPTR_FORMAT "(len=%d): %4.4x %s", p2i(addr_at(0)), len, halfword_at(0), msg); break; + case 4: tty->print_cr(INTPTR_FORMAT "(len=%d): %4.4x %4.4x %s", p2i(addr_at(0)), len, halfword_at(0), halfword_at(2), msg); break; + case 6: tty->print_cr(INTPTR_FORMAT "(len=%d): %4.4x %4.4x %4.4x %s", p2i(addr_at(0)), len, halfword_at(0), halfword_at(2), halfword_at(4), msg); break; + default: // Never reached. instr_len() always returns one of the above values. Keep the compiler happy. + ShouldNotReachHere(); + break; + } + } +} +void NativeInstruction::print() const { + print(NULL); +} + +// Hex-Dump of storage around current NativeInstruction. Also try disassembly. +void NativeInstruction::dump(const unsigned int range, const char* msg) const { + Assembler::dump_code_range(tty, addr_at(0), range, (msg == NULL) ? "":msg); +} + +void NativeInstruction::dump(const unsigned int range) const { + dump(range, NULL); +} + +void NativeInstruction::dump() const { + dump(32, NULL); +} + +void NativeInstruction::set_halfword_at(int offset, short i) { + address addr = addr_at(offset); +#ifndef SUPPRESS_IDENTITY_UPDATE + *(short*)addr = i; +#else + if (*(short*)addr != i) { + *(short*)addr = i; + } +#endif + ICache::invalidate_word(addr); +} + +void NativeInstruction::set_word_at(int offset, int i) { + address addr = addr_at(offset); +#ifndef SUPPRESS_IDENTITY_UPDATE + *(int*)addr = i; +#else + if (*(int*)addr != i) { + *(int*)addr = i; + } +#endif + ICache::invalidate_word(addr); +} + +void NativeInstruction::set_jlong_at(int offset, jlong i) { + address addr = addr_at(offset); +#ifndef SUPPRESS_IDENTITY_UPDATE + *(jlong*)addr = i; +#else + if (*(jlong*)addr != i) { + *(jlong*)addr = i; + } +#endif + // Don't need to invalidate 2 words here, because + // the flush instruction operates on doublewords. + ICache::invalidate_word(addr); +} + +#undef SUPPRESS_IDENTITY_UPDATE + +//------------------------------------------------------------ + +int NativeInstruction::illegal_instruction() { + return 0; +} + +bool NativeInstruction::is_illegal() { + // An instruction with main opcode 0x00 (leftmost byte) is not a valid instruction + // (and will never be) and causes a SIGILL where the pc points to the next instruction. + // The caller of this method wants to know if such a situation exists at the current pc. + // + // The result of this method is unsharp with respect to the following facts: + // - Stepping backwards in the instruction stream is not possible on z/Architecture. + // - z/Architecture instructions are 2, 4, or 6 bytes in length. + // - The instruction length is coded in the leftmost two bits of the main opcode. + // - The result is exact if the caller knows by some other means that the + // instruction is of length 2. + // + // If this method returns false, then the 2-byte instruction at *-2 is not a 0x00 opcode. + // If this method returns true, then the 2-byte instruction at *-2 is a 0x00 opcode. + return halfword_at(-2) == illegal_instruction(); +} + +// We use an illtrap for marking a method as not_entrant or zombie. +bool NativeInstruction::is_sigill_zombie_not_entrant() { + if (!is_illegal()) return false; // Just a quick path. + + // One-sided error of is_illegal tolerable here + // (see implementation of is_illegal() for details). + + CodeBlob* cb = CodeCache::find_blob_unsafe(addr_at(0)); + if (cb == NULL || !cb->is_nmethod()) { + return false; + } + + nmethod *nm = (nmethod *)cb; + // This method is not_entrant or zombie if the illtrap instruction + // is located at the verified entry point. + // BE AWARE: the current pc (this) points to the instruction after the + // "illtrap" location. + address sig_addr = ((address) this) - 2; + return nm->verified_entry_point() == sig_addr; +} + +bool NativeInstruction::is_jump() { + unsigned long inst; + Assembler::get_instruction((address)this, &inst); + return MacroAssembler::is_branch_pcrelative_long(inst); +} + +//--------------------------------------------------- +// N a t i v e I l l e g a l I n s t r u c t i o n +//--------------------------------------------------- + +void NativeIllegalInstruction::insert(address code_pos) { + NativeIllegalInstruction* nii = (NativeIllegalInstruction*) nativeInstruction_at(code_pos); + nii->set_halfword_at(0, illegal_instruction()); +} + +//----------------------- +// N a t i v e C a l l +//----------------------- + +void NativeCall::verify() { + if (NativeCall::is_call_at(addr_at(0))) return; + + fatal("this is not a `NativeCall' site"); +} + +address NativeCall::destination() const { + if (MacroAssembler::is_call_far_pcrelative(instruction_address())) { + address here = addr_at(MacroAssembler::nop_size()); + return MacroAssembler::get_target_addr_pcrel(here); + } + + return (address)((NativeMovConstReg *)this)->data(); +} + +// Similar to replace_mt_safe, but just changes the destination. The +// important thing is that free-running threads are able to execute this +// call instruction at all times. Thus, the displacement field must be +// 4-byte-aligned. We enforce this on z/Architecture by inserting a nop +// instruction in front of 'brasl' when needed. +// +// Used in the runtime linkage of calls; see class CompiledIC. +void NativeCall::set_destination_mt_safe(address dest) { + if (MacroAssembler::is_call_far_pcrelative(instruction_address())) { + address iaddr = addr_at(MacroAssembler::nop_size()); + // Ensure that patching is atomic hence mt safe. + assert(((long)addr_at(MacroAssembler::call_far_pcrelative_size()) & (call_far_pcrelative_displacement_alignment-1)) == 0, + "constant must be 4-byte aligned"); + set_word_at(MacroAssembler::call_far_pcrelative_size() - 4, Assembler::z_pcrel_off(dest, iaddr)); + } else { + assert(MacroAssembler::is_load_const_from_toc(instruction_address()), "unsupported instruction"); + nativeMovConstReg_at(instruction_address())->set_data(((intptr_t)dest)); + } +} + +//----------------------------- +// N a t i v e F a r C a l l +//----------------------------- + +void NativeFarCall::verify() { + NativeInstruction::verify(); + if (NativeFarCall::is_far_call_at(addr_at(0))) return; + fatal("not a NativeFarCall"); +} + +address NativeFarCall::destination() { + assert(MacroAssembler::is_call_far_patchable_at((address)this), "unexpected call type"); + address ctable = NULL; + if (MacroAssembler::call_far_patchable_requires_alignment_nop((address)this)) { + return MacroAssembler::get_dest_of_call_far_patchable_at(((address)this)+MacroAssembler::nop_size(), ctable); + } else { + return MacroAssembler::get_dest_of_call_far_patchable_at((address)this, ctable); + } +} + + +// Handles both patterns of patchable far calls. +void NativeFarCall::set_destination(address dest, int toc_offset) { + address inst_addr = (address)this; + + // Set new destination (implementation of call may change here). + assert(MacroAssembler::is_call_far_patchable_at(inst_addr), "unexpected call type"); + + if (!MacroAssembler::is_call_far_patchable_pcrelative_at(inst_addr)) { + address ctable = CodeCache::find_blob(inst_addr)->ctable_begin(); + // Need distance of TOC entry from current instruction. + toc_offset = (ctable + toc_offset) - inst_addr; + // Call is via constant table entry. + MacroAssembler::set_dest_of_call_far_patchable_at(inst_addr, dest, toc_offset); + } else { + // Here, we have a pc-relative call (brasl). + // Be aware: dest may have moved in this case, so really patch the displacement, + // when necessary! + // This while loop will also consume the nop which always preceeds a call_far_pcrelative. + // We need to revert this after the loop. Pc-relative calls are always assumed to have a leading nop. + unsigned int nop_sz = MacroAssembler::nop_size(); + unsigned int nop_bytes = 0; + while(MacroAssembler::is_z_nop(inst_addr+nop_bytes)) { + nop_bytes += nop_sz; + } + if (nop_bytes > 0) { + inst_addr += nop_bytes - nop_sz; + } + + assert(MacroAssembler::is_call_far_pcrelative(inst_addr), "not a pc-relative call"); + address target = MacroAssembler::get_target_addr_pcrel(inst_addr + nop_sz); + if (target != dest) { + NativeCall *call = nativeCall_at(inst_addr); + call->set_destination_mt_safe(dest); + } + } +} + +//------------------------------------- +// N a t i v e M o v C o n s t R e g +//------------------------------------- + +// Do not use an assertion here. Let clients decide whether they only +// want this when assertions are enabled. +void NativeMovConstReg::verify() { + address loc = addr_at(0); + + // This while loop will also consume the nop which always preceeds a + // call_far_pcrelative. We need to revert this after the + // loop. Pc-relative calls are always assumed to have a leading nop. + unsigned int nop_sz = MacroAssembler::nop_size(); + unsigned int nop_bytes = 0; + while(MacroAssembler::is_z_nop(loc+nop_bytes)) { + nop_bytes += nop_sz; + } + + if (nop_bytes > 0) { + if (MacroAssembler::is_call_far_pcrelative(loc+nop_bytes-nop_sz)) return; + loc += nop_bytes; + } + + if (!MacroAssembler::is_load_const_from_toc(loc) && // Load const from TOC. + !MacroAssembler::is_load_const(loc) && // Load const inline. + !MacroAssembler::is_load_narrow_oop(loc) && // Load narrow oop. + !MacroAssembler::is_load_narrow_klass(loc) && // Load narrow Klass ptr. + !MacroAssembler::is_compare_immediate_narrow_oop(loc) && // Compare immediate narrow. + !MacroAssembler::is_compare_immediate_narrow_klass(loc) && // Compare immediate narrow. + !MacroAssembler::is_pcrelative_instruction(loc)) { // Just to make it run. + tty->cr(); + tty->print_cr("NativeMovConstReg::verify(): verifying addr %p(0x%x), %d leading nops", loc, *(uint*)loc, nop_bytes/nop_sz); + tty->cr(); + ((NativeMovConstReg*)loc)->dump(64, "NativeMovConstReg::verify()"); +#ifdef LUCY_DBG + VM_Version::z_SIGSEGV(); +#endif + fatal("this is not a `NativeMovConstReg' site"); + } +} + +address NativeMovConstReg::next_instruction_address(int offset) const { + address inst_addr = addr_at(offset); + + // Load address (which is a constant) pc-relative. + if (MacroAssembler::is_load_addr_pcrel(inst_addr)) { return addr_at(offset+MacroAssembler::load_addr_pcrel_size()); } + + // Load constant from TOC. + if (MacroAssembler::is_load_const_from_toc(inst_addr)) { return addr_at(offset+MacroAssembler::load_const_from_toc_size()); } + + // Load constant inline. + if (MacroAssembler::is_load_const(inst_addr)) { return addr_at(offset+MacroAssembler::load_const_size()); } + + // Load constant narrow inline. + if (MacroAssembler::is_load_narrow_oop(inst_addr)) { return addr_at(offset+MacroAssembler::load_narrow_oop_size()); } + if (MacroAssembler::is_load_narrow_klass(inst_addr)) { return addr_at(offset+MacroAssembler::load_narrow_klass_size()); } + + // Compare constant narrow inline. + if (MacroAssembler::is_compare_immediate_narrow_oop(inst_addr)) { return addr_at(offset+MacroAssembler::compare_immediate_narrow_oop_size()); } + if (MacroAssembler::is_compare_immediate_narrow_klass(inst_addr)) { return addr_at(offset+MacroAssembler::compare_immediate_narrow_klass_size()); } + + if (MacroAssembler::is_call_far_patchable_pcrelative_at(inst_addr)) { return addr_at(offset+MacroAssembler::call_far_patchable_size()); } + + if (MacroAssembler::is_pcrelative_instruction(inst_addr)) { return addr_at(offset+Assembler::instr_len(inst_addr)); } + + ((NativeMovConstReg*)inst_addr)->dump(64, "NativeMovConstReg site is not recognized as such"); +#ifdef LUCY_DBG + VM_Version::z_SIGSEGV(); +#else + guarantee(false, "Not a NativeMovConstReg site"); +#endif + return NULL; +} + +intptr_t NativeMovConstReg::data() const { + address loc = addr_at(0); + if (MacroAssembler::is_load_const(loc)) { + return MacroAssembler::get_const(loc); + } else if (MacroAssembler::is_load_narrow_oop(loc) || + MacroAssembler::is_compare_immediate_narrow_oop(loc) || + MacroAssembler::is_load_narrow_klass(loc) || + MacroAssembler::is_compare_immediate_narrow_klass(loc)) { + ((NativeMovConstReg*)loc)->dump(32, "NativeMovConstReg::data(): cannot extract data from narrow ptr (oop or klass)"); +#ifdef LUCY_DBG + VM_Version::z_SIGSEGV(); +#else + ShouldNotReachHere(); +#endif + return *(intptr_t *)NULL; + } else { + // Otherwise, assume data resides in TOC. Is asserted in called method. + return MacroAssembler::get_const_from_toc(loc); + } +} + + +// Patch in a new constant. +// +// There are situations where we have multiple (hopefully two at most) +// relocations connected to one instruction. Loading an oop from CP +// using pcrelative addressing would one such example. Here we have an +// oop relocation, modifying the oop itself, and an internal word relocation, +// modifying the relative address. +// +// NativeMovConstReg::set_data is then called once for each relocation. To be +// able to distinguish between the relocations, we use a rather dirty hack: +// +// All calls that deal with an internal word relocation to fix their relative +// address are on a faked, odd instruction address. The instruction can be +// found on the next lower, even address. +// +// All other calls are "normal", i.e. on even addresses. +address NativeMovConstReg::set_data_plain(intptr_t src, CodeBlob *cb) { + unsigned long x = (unsigned long)src; + address loc = instruction_address(); + address next_address; + + if (MacroAssembler::is_load_addr_pcrel(loc)) { + MacroAssembler::patch_target_addr_pcrel(loc, (address)src); + ICache::invalidate_range(loc, MacroAssembler::load_addr_pcrel_size()); + next_address = next_instruction_address(); + } else if (MacroAssembler::is_load_const_from_toc(loc)) { // Load constant from TOC. + MacroAssembler::set_const_in_toc(loc, src, cb); + next_address = next_instruction_address(); + } else if (MacroAssembler::is_load_const(loc)) { + // Not mt safe, ok in methods like CodeBuffer::copy_code(). + MacroAssembler::patch_const(loc, x); + ICache::invalidate_range(loc, MacroAssembler::load_const_size()); + next_address = next_instruction_address(); + } + // cOops + else if (MacroAssembler::is_load_narrow_oop(loc)) { + MacroAssembler::patch_load_narrow_oop(loc, (oop) (void*) x); + ICache::invalidate_range(loc, MacroAssembler::load_narrow_oop_size()); + next_address = next_instruction_address(); + } + // compressed klass ptrs + else if (MacroAssembler::is_load_narrow_klass(loc)) { + MacroAssembler::patch_load_narrow_klass(loc, (Klass*)x); + ICache::invalidate_range(loc, MacroAssembler::load_narrow_klass_size()); + next_address = next_instruction_address(); + } + // cOops + else if (MacroAssembler::is_compare_immediate_narrow_oop(loc)) { + MacroAssembler::patch_compare_immediate_narrow_oop(loc, (oop) (void*) x); + ICache::invalidate_range(loc, MacroAssembler::compare_immediate_narrow_oop_size()); + next_address = next_instruction_address(); + } + // compressed klass ptrs + else if (MacroAssembler::is_compare_immediate_narrow_klass(loc)) { + MacroAssembler::patch_compare_immediate_narrow_klass(loc, (Klass*)x); + ICache::invalidate_range(loc, MacroAssembler::compare_immediate_narrow_klass_size()); + next_address = next_instruction_address(); + } + else if (MacroAssembler::is_call_far_patchable_pcrelative_at(loc)) { + assert(ShortenBranches, "Wait a minute! A pc-relative call w/o ShortenBranches?"); + // This NativeMovConstReg site does not need to be patched. It was + // patched when it was converted to a call_pcrelative site + // before. The value of the src argument is not related to the + // branch target. + next_address = next_instruction_address(); + } + + else { + tty->print_cr("WARNING: detected an unrecognized code pattern at loc = %p -> 0x%8.8x %8.8x", + loc, *((unsigned int*)loc), *((unsigned int*)(loc+4))); + next_address = next_instruction_address(); // Failure should be handled in next_instruction_address(). +#ifdef LUCY_DBG + VM_Version::z_SIGSEGV(); +#endif + } + + return next_address; +} + +// Divided up in set_data_plain() which patches the instruction in the +// code stream and set_data() which additionally patches the oop pool +// if necessary. +void NativeMovConstReg::set_data(intptr_t src) { + // Also store the value into an oop_Relocation cell, if any. + CodeBlob *cb = CodeCache::find_blob(instruction_address()); + address next_address = set_data_plain(src, cb); + + relocInfo::update_oop_pool(instruction_address(), next_address, (address)src, cb); +} + +void NativeMovConstReg::set_narrow_oop(intptr_t data) { + const address start = addr_at(0); + int range = 0; + if (MacroAssembler::is_load_narrow_oop(start)) { + range = MacroAssembler::patch_load_narrow_oop(start, cast_to_oop (data)); + } else if (MacroAssembler::is_compare_immediate_narrow_oop(start)) { + range = MacroAssembler::patch_compare_immediate_narrow_oop(start, cast_to_oop (data)); + } else { + fatal("this is not a `NativeMovConstReg::narrow_oop' site"); + } + ICache::invalidate_range(start, range); +} + +// Compressed klass ptrs. patch narrow klass constant. +void NativeMovConstReg::set_narrow_klass(intptr_t data) { + const address start = addr_at(0); + int range = 0; + if (MacroAssembler::is_load_narrow_klass(start)) { + range = MacroAssembler::patch_load_narrow_klass(start, (Klass*)data); + } else if (MacroAssembler::is_compare_immediate_narrow_klass(start)) { + range = MacroAssembler::patch_compare_immediate_narrow_klass(start, (Klass*)data); + } else { + fatal("this is not a `NativeMovConstReg::narrow_klass' site"); + } + ICache::invalidate_range(start, range); +} + +void NativeMovConstReg::set_pcrel_addr(intptr_t newTarget, CompiledMethod *passed_nm /* = NULL */, bool copy_back_to_oop_pool) { + address next_address; + address loc = addr_at(0); + + if (MacroAssembler::is_load_addr_pcrel(loc)) { + address oldTarget = MacroAssembler::get_target_addr_pcrel(loc); + MacroAssembler::patch_target_addr_pcrel(loc, (address)newTarget); + + ICache::invalidate_range(loc, MacroAssembler::load_addr_pcrel_size()); + next_address = loc + MacroAssembler::load_addr_pcrel_size(); + } else if (MacroAssembler::is_load_const_from_toc_pcrelative(loc) ) { // Load constant from TOC. + address oldTarget = MacroAssembler::get_target_addr_pcrel(loc); + MacroAssembler::patch_target_addr_pcrel(loc, (address)newTarget); + + ICache::invalidate_range(loc, MacroAssembler::load_const_from_toc_size()); + next_address = loc + MacroAssembler::load_const_from_toc_size(); + } else if (MacroAssembler::is_call_far_patchable_pcrelative_at(loc)) { + assert(ShortenBranches, "Wait a minute! A pc-relative call w/o ShortenBranches?"); + next_address = next_instruction_address(); + } else { + assert(false, "Not a NativeMovConstReg site for set_pcrel_addr"); + next_address = next_instruction_address(); // Failure should be handled in next_instruction_address(). + } + + if (copy_back_to_oop_pool) { + if (relocInfo::update_oop_pool(instruction_address(), next_address, (address)newTarget, NULL)) { + ((NativeMovConstReg*)instruction_address())->dump(64, "NativeMovConstReg::set_pcrel_addr(): found oop reloc for pcrel_addr"); +#ifdef LUCY_DBG + VM_Version::z_SIGSEGV(); +#else + assert(false, "Ooooops: found oop reloc for pcrel_addr"); +#endif + } + } +} + +void NativeMovConstReg::set_pcrel_data(intptr_t newData, CompiledMethod *passed_nm /* = NULL */, bool copy_back_to_oop_pool) { + address next_address; + address loc = addr_at(0); + + if (MacroAssembler::is_load_const_from_toc(loc) ) { // Load constant from TOC. + // Offset is +/- 2**32 -> use long. + long offset = MacroAssembler::get_load_const_from_toc_offset(loc); + address target = MacroAssembler::get_target_addr_pcrel(loc); + intptr_t oldData = *(intptr_t*)target; + if (oldData != newData) { // Update only if data changes. Prevents cache invalidation. + *(intptr_t *)(target) = newData; + } + + // ICache::invalidate_range(target, sizeof(unsigned long)); // No ICache invalidate for CP data. + next_address = loc + MacroAssembler::load_const_from_toc_size(); + } else if (MacroAssembler::is_call_far_pcrelative(loc)) { + ((NativeMovConstReg*)loc)->dump(64, "NativeMovConstReg::set_pcrel_data() has a problem: setting data for a pc-relative call?"); +#ifdef LUCY_DBG + VM_Version::z_SIGSEGV(); +#else + assert(false, "Ooooops: setting data for a pc-relative call"); +#endif + next_address = next_instruction_address(); + } else { + assert(false, "Not a NativeMovConstReg site for set_pcrel_data"); + next_address = next_instruction_address(); // Failure should be handled in next_instruction_address(). + } + + if (copy_back_to_oop_pool) { + if (relocInfo::update_oop_pool(instruction_address(), next_address, (address)newData, NULL)) { + ((NativeMovConstReg*)instruction_address())->dump(64, "NativeMovConstReg::set_pcrel_data(): found oop reloc for pcrel_data"); +#ifdef LUCY_DBG + VM_Version::z_SIGSEGV(); +#else + assert(false, "Ooooops: found oop reloc for pcrel_data"); +#endif + } + } +} + +#ifdef COMPILER1 +//-------------------------------- +// N a t i v e M o v R e g M e m +//-------------------------------- + +void NativeMovRegMem::verify() { + address l1 = addr_at(0); + address l2 = addr_at(MacroAssembler::load_const_size()); + + if (!MacroAssembler::is_load_const(l1)) { + tty->cr(); + tty->print_cr("NativeMovRegMem::verify(): verifying addr " PTR_FORMAT, p2i(l1)); + tty->cr(); + ((NativeMovRegMem*)l1)->dump(64, "NativeMovConstReg::verify()"); + fatal("this is not a `NativeMovRegMem' site"); + } + + unsigned long inst1; + Assembler::get_instruction(l2, &inst1); + + if (!Assembler::is_z_lb(inst1) && + !Assembler::is_z_llgh(inst1) && + !Assembler::is_z_lh(inst1) && + !Assembler::is_z_l(inst1) && + !Assembler::is_z_llgf(inst1) && + !Assembler::is_z_lg(inst1) && + !Assembler::is_z_le(inst1) && + !Assembler::is_z_ld(inst1) && + !Assembler::is_z_stc(inst1) && + !Assembler::is_z_sth(inst1) && + !Assembler::is_z_st(inst1) && + !(Assembler::is_z_lgr(inst1) && UseCompressedOops) && + !Assembler::is_z_stg(inst1) && + !Assembler::is_z_ste(inst1) && + !Assembler::is_z_std(inst1)) { + tty->cr(); + tty->print_cr("NativeMovRegMem::verify(): verifying addr " PTR_FORMAT + ": wrong or missing load or store at " PTR_FORMAT, p2i(l1), p2i(l2)); + tty->cr(); + ((NativeMovRegMem*)l1)->dump(64, "NativeMovConstReg::verify()"); + fatal("this is not a `NativeMovRegMem' site"); + } +} +#endif // COMPILER1 + +//----------------------- +// N a t i v e J u m p +//----------------------- + +void NativeJump::verify() { + if (NativeJump::is_jump_at(addr_at(0))) return; + fatal("this is not a `NativeJump' site"); +} + +// Patch atomically with an illtrap. +void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { + ResourceMark rm; + int code_size = 2; + CodeBuffer cb(verified_entry, code_size + 1); + MacroAssembler* a = new MacroAssembler(&cb); +#ifdef COMPILER2 + assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); +#endif + a->z_illtrap(); + ICache::invalidate_range(verified_entry, code_size); +} + +#undef LUCY_DBG + +//------------------------------------- +// N a t i v e G e n e r a l J u m p +//------------------------------------- + +#ifndef PRODUCT +void NativeGeneralJump::verify() { + unsigned long inst; + Assembler::get_instruction((address)this, &inst); + assert(MacroAssembler::is_branch_pcrelative_long(inst), "not a general jump instruction"); +} +#endif + +void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { + uint64_t instr = BRCL_ZOPC | + Assembler::uimm4(Assembler::bcondAlways, 8, 48) | + Assembler::simm32(RelAddr::pcrel_off32(entry, code_pos), 16, 48); + *(uint64_t*) code_pos = (instr << 16); // Must shift into big end, then the brcl will be written to code_pos. + ICache::invalidate_range(code_pos, instruction_size); +} + +void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { + assert(((intptr_t)instr_addr & (BytesPerWord-1)) == 0, "requirement for mt safe patching"); + // Bytes_after_jump cannot change, because we own the Patching_lock. + assert(Patching_lock->owned_by_self(), "must hold lock to patch instruction"); + intptr_t bytes_after_jump = (*(intptr_t*)instr_addr) & 0x000000000000ffffL; // 2 bytes after jump. + intptr_t load_const_bytes = (*(intptr_t*)code_buffer) & 0xffffffffffff0000L; + *(intptr_t*)instr_addr = load_const_bytes | bytes_after_jump; + ICache::invalidate_range(instr_addr, 6); +} diff --git a/hotspot/src/cpu/s390/vm/nativeInst_s390.hpp b/hotspot/src/cpu/s390/vm/nativeInst_s390.hpp new file mode 100644 index 00000000000..dcf147082b3 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/nativeInst_s390.hpp @@ -0,0 +1,673 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// Major contributions by AHa, JL, LS + +#ifndef CPU_S390_VM_NATIVEINST_S390_HPP +#define CPU_S390_VM_NATIVEINST_S390_HPP + +#include "asm/macroAssembler.hpp" +#include "memory/allocation.hpp" +#include "runtime/icache.hpp" +#include "runtime/os.hpp" + +class NativeCall; +class NativeFarCall; +class NativeMovConstReg; +class NativeJump; +#ifndef COMPILER2 +class NativeGeneralJump; +class NativeMovRegMem; +#endif +class NativeInstruction; + +NativeCall* nativeCall_before(address return_address); +NativeCall* nativeCall_at(address instr); +NativeFarCall* nativeFarCall_before(address return_address); +NativeFarCall* nativeFarCall_at(address instr); +NativeMovConstReg* nativeMovConstReg_at(address address); +NativeMovConstReg* nativeMovConstReg_before(address address); +NativeJump* nativeJump_at(address address); +#ifndef COMPILER2 +NativeMovRegMem* nativeMovRegMem_at (address address); +NativeGeneralJump* nativeGeneralJump_at(address address); +#endif +NativeInstruction* nativeInstruction_at(address address); + +// We have interface for the following instructions: +// - NativeInstruction +// - NativeCall +// - NativeFarCall +// - NativeMovConstReg +// - NativeMovRegMem +// - NativeJump +// - NativeGeneralJump +// - NativeIllegalInstruction +// The base class for different kinds of native instruction abstractions. +// Provides the primitive operations to manipulate code relative to this. + +//------------------------------------- +// N a t i v e I n s t r u c t i o n +//------------------------------------- + +class NativeInstruction VALUE_OBJ_CLASS_SPEC { + friend class Relocation; + + public: + + enum z_specific_constants { + nop_instruction_size = 2 + }; + + bool is_illegal(); + + // Bcrl is currently the only accepted instruction here. + bool is_jump(); + + // We use an illtrap for marking a method as not_entrant or zombie. + bool is_sigill_zombie_not_entrant(); + + bool is_safepoint_poll() { + // Is the current instruction a POTENTIAL read access to the polling page? + // The instruction's current arguments are not checked! + return MacroAssembler::is_load_from_polling_page(addr_at(0)); + } + + address get_poll_address(void *ucontext) { + // Extract poll address from instruction and ucontext. + return MacroAssembler::get_poll_address(addr_at(0), ucontext); + } + + uint get_poll_register() { + // Extract poll register from instruction. + return MacroAssembler::get_poll_register(addr_at(0)); + } + + bool is_memory_serialization(JavaThread *thread, void *ucontext) { + // Is the current instruction a write access of thread to the + // memory serialization page? + return MacroAssembler::is_memory_serialization(long_at(0), thread, ucontext); + } + + public: + + // The output of __ breakpoint_trap(). + static int illegal_instruction(); + + // The address of the currently processed instruction. + address instruction_address() const { return addr_at(0); } + + protected: + address addr_at(int offset) const { return address(this) + offset; } + + // z/Architecture terminology + // halfword = 2 bytes + // word = 4 bytes + // doubleword = 8 bytes + unsigned short halfword_at(int offset) const { return *(unsigned short*)addr_at(offset); } + int word_at(int offset) const { return *(jint*)addr_at(offset); } + long long_at(int offset) const { return *(jlong*)addr_at(offset); } + void set_halfword_at(int offset, short i); // Deals with I-cache. + void set_word_at(int offset, int i); // Deals with I-cache. + void set_jlong_at(int offset, jlong i); // Deals with I-cache. + void set_addr_at(int offset, address x); // Deals with I-cache. + + void print() const; + void print(const char* msg) const; + void dump() const; + void dump(const unsigned int range) const; + void dump(const unsigned int range, const char* msg) const; + + public: + + void verify(); + + // unit test stuff + static void test() {} // Override for testing. + + friend NativeInstruction* nativeInstruction_at(address address) { + NativeInstruction* inst = (NativeInstruction*)address; + #ifdef ASSERT + inst->verify(); + #endif + return inst; + } +}; + +//--------------------------------------------------- +// N a t i v e I l l e g a l I n s t r u c t i o n +//--------------------------------------------------- + +class NativeIllegalInstruction: public NativeInstruction { + public: + enum z_specific_constants { + instruction_size = 2 + }; + + // Insert illegal opcode at specific address. + static void insert(address code_pos); +}; + +//----------------------- +// N a t i v e C a l l +//----------------------- + +// The NativeCall is an abstraction for accessing/manipulating call +// instructions. It is used to manipulate inline caches, primitive & +// dll calls, etc. + +// A native call, as defined by this abstraction layer, consists of +// all instructions required to set up for and actually make the call. +// +// On z/Architecture, there exist three different forms of native calls: +// 1) Call with pc-relative address, 1 instruction +// The location of the target function is encoded as relative address +// in the call instruction. The short form (BRAS) allows for a +// 16-bit signed relative address (in 2-byte units). The long form +// (BRASL) allows for a 32-bit signed relative address (in 2-byte units). +// 2) Call with immediate address, 3 or 5 instructions. +// The location of the target function is given by an immediate +// constant which is loaded into a (scratch) register. Depending on +// the hardware capabilities, this takes 2 or 4 instructions. +// The call itself is then a "call by register"(BASR) instruction. +// 3) Call with address from constant pool, 2(3) instructions (with dynamic TOC) +// The location of the target function is stored in the constant pool +// during compilation. From there it is loaded into a (scratch) register. +// The call itself is then a "call by register"(BASR) instruction. +// +// When initially generating a call, the compiler uses form 2) (not +// patchable, target address constant, e.g. runtime calls) or 3) (patchable, +// target address might eventually get relocated). Later in the process, +// a call could be transformed into form 1) (also patchable) during ShortenBranches. +// +// If a call is/has to be patchable, the instruction sequence generated for it +// has to be constant in length. Excessive space, created e.g. by ShortenBranches, +// is allocated to lower addresses and filled with nops. That is necessary to +// keep the return address constant, no matter what form the call has. +// Methods dealing with such calls have "patchable" as part of their name. + +class NativeCall: public NativeInstruction { + public: + + static int get_IC_pos_in_java_to_interp_stub() { + return 0; + } + + enum z_specific_constants { + instruction_size = 18, // Used in shared code for calls with reloc_info: + // value correct if !has_long_displacement_fast(). + call_far_pcrelative_displacement_offset = 4, // Includes 2 bytes for the nop. + call_far_pcrelative_displacement_alignment = 4 + }; + + + // Maximum size (in bytes) of a call to an absolute address. + // Used when emitting call to deopt handler blob, which is a + // "load_const_call". The code pattern is: + // tmpReg := load_const(address); (* depends on CPU ArchLvl, but is otherwise constant *) + // call(tmpReg); (* basr, 2 bytes *) + static unsigned int max_instruction_size() { + return MacroAssembler::load_const_size() + MacroAssembler::call_byregister_size(); + } + + // address instruction_address() const { return addr_at(0); } + + // For the ordering of the checks see note at nativeCall_before. + address next_instruction_address() const { + address iaddr = instruction_address(); + + if (MacroAssembler::is_load_const_call(iaddr)) { + // Form 2): load_const, BASR + return addr_at(MacroAssembler::load_const_call_size()); + } + + if (MacroAssembler::is_load_const_from_toc_call(iaddr)) { + // Form 3): load_const_from_toc (LARL+LG/LGRL), BASR. + return addr_at(MacroAssembler::load_const_from_toc_call_size()); + } + + if (MacroAssembler::is_call_far_pcrelative(iaddr)) { + // Form 1): NOP, BRASL + // The BRASL (Branch Relative And Save Long) is patched into the space created + // by the load_const_from_toc_call sequence (typically (LARL-LG)/LGRL - BASR. + // The BRASL must be positioned such that it's end is FW (4-byte) aligned (for atomic patching). + // It is achieved by aligning the end of the entire sequence on a 4byte boundary, by inserting + // a nop, if required, at the very beginning of the instruction sequence. The nop needs to + // be accounted for when calculating the next instruction address. The alignment takes place + // already when generating the original instruction sequence. The alignment requirement + // makes the size depend on location. + // The return address of the call must always be at the end of the instruction sequence. + // Inserting the extra alignment nop (or anything else) at the end is not an option. + // The patched-in brasl instruction is prepended with a nop to make it easier to + // distinguish from a load_const_from_toc_call sequence. + return addr_at(MacroAssembler::call_far_pcrelative_size()); + } + + ((NativeCall*)iaddr)->print(); + guarantee(false, "Not a NativeCall site"); + return NULL; + } + + address return_address() const { + return next_instruction_address(); + } + + address destination() const; + + void set_destination_mt_safe(address dest); + + void verify_alignment() {} // Yet another real do nothing guy :) + void verify(); + + // unit test stuff + static void test(); + + // Creation. + friend NativeCall* nativeCall_at(address instr) { + NativeCall* call; + + // Make sure not to return garbage. + if (NativeCall::is_call_at(instr)) { + call = (NativeCall*)instr; + } else { + call = (NativeCall*)instr; + call->print(); + guarantee(false, "Not a NativeCall site"); + } + +#ifdef ASSERT + call->verify(); +#endif + return call; + } + + // This is a very tricky function to implement. It involves stepping + // backwards in the instruction stream. On architectures with variable + // instruction length, this is a risky endeavor. From the return address, + // you do not know how far to step back to be at a location (your starting + // point) that will eventually bring you back to the return address. + // Furthermore, it may happen that there are multiple starting points. + // + // With only a few possible (allowed) code patterns, the risk is lower but + // does not diminish completely. Experience shows that there are code patterns + // which look like a load_const_from_toc_call @(return address-8), but in + // fact are a call_far_pcrelative @(return address-6). The other way around + // is possible as well, but was not knowingly observed so far. + // + // The unpredictability is caused by the pc-relative address field in both + // the call_far_pcrelative (BASR) and the load_const_from_toc (LGRL) + // instructions. This field can contain an arbitrary bit pattern. + // + // Here is a real-world example: + // Mnemonics: LGRL r10, BASR r14,r10 + // Hex code: eb01 9008 007a c498 ffff c4a8 c0e5 ffc1 0dea + // Mnemonics: AGSI ,I8 LGRL r9, BRASL r14, correct + // + // If you first check for a load_const_from_toc_call @(-8), you will find + // a false positive. In this example, it is obviously false, because the + // preceding bytes do not form a valid instruction pattern. If you first + // check for call_far_pcrelative @(-6), you get a true positive - in this + // case. + // + // The following remedy has been implemented/enforced: + // 1) Everywhere, the permissible code patterns are checked in the same + // sequence: Form 2) - Form 3) - Form 1). + // 2) The call_far_pcrelative, which would ideally be just one BRASL + // instruction, is always prepended with a NOP. This measure avoids + // ambiguities with load_const_from_toc_call. + friend NativeCall* nativeCall_before(address return_address) { + NativeCall *call = NULL; + + // Make sure not to return garbage + address instp = return_address - MacroAssembler::load_const_call_size(); + if (MacroAssembler::is_load_const_call(instp)) { // Form 2) + call = (NativeCall*)(instp); // load_const + basr + } else { + instp = return_address - MacroAssembler::load_const_from_toc_call_size(); + if (MacroAssembler::is_load_const_from_toc_call(instp)) { // Form 3) + call = (NativeCall*)(instp); // load_const_from_toc + basr + } else { + instp = return_address - MacroAssembler::call_far_pcrelative_size(); + if (MacroAssembler::is_call_far_pcrelative(instp)) { // Form 1) + call = (NativeCall*)(instp); // brasl (or nop + brasl) + } else { + call = (NativeCall*)(instp); + call->print(); + guarantee(false, "Not a NativeCall site"); + } + } + } + +#ifdef ASSERT + call->verify(); +#endif + return call; + } + + // Ordering of checks 2) 3) 1) is relevant! + static bool is_call_at(address a) { + // Check plain instruction sequence. Do not care about filler or alignment nops. + bool b = MacroAssembler::is_load_const_call(a) || // load_const + basr + MacroAssembler::is_load_const_from_toc_call(a) || // load_const_from_toc + basr + MacroAssembler::is_call_far_pcrelative(a); // nop + brasl + return b; + } + + // Ordering of checks 2) 3) 1) is relevant! + static bool is_call_before(address a) { + // check plain instruction sequence. Do not care about filler or alignment nops. + bool b = MacroAssembler::is_load_const_call( a - MacroAssembler::load_const_call_size()) || // load_const + basr + MacroAssembler::is_load_const_from_toc_call(a - MacroAssembler::load_const_from_toc_call_size()) || // load_const_from_toc + basr + MacroAssembler::is_call_far_pcrelative( a - MacroAssembler::call_far_pcrelative_size()); // nop+brasl + return b; + } + + static bool is_call_to(address instr, address target) { + // Check whether there is a `NativeCall' at the address `instr' + // calling to the address `target'. + return is_call_at(instr) && target == ((NativeCall *)instr)->destination(); + } + + bool is_pcrelative() { + return MacroAssembler::is_call_far_pcrelative((address)this); + } +}; + +//----------------------------- +// N a t i v e F a r C a l l +//----------------------------- + +// The NativeFarCall is an abstraction for accessing/manipulating native +// call-anywhere instructions. +// Used to call native methods which may be loaded anywhere in the address +// space, possibly out of reach of a call instruction. + +// Refer to NativeCall for a description of the supported call forms. + +class NativeFarCall: public NativeInstruction { + + public: + // We use MacroAssembler::call_far_patchable() for implementing a + // call-anywhere instruction. + + static int instruction_size() { return MacroAssembler::call_far_patchable_size(); } + static int return_address_offset() { return MacroAssembler::call_far_patchable_ret_addr_offset(); } + + // address instruction_address() const { return addr_at(0); } + + address next_instruction_address() const { + return addr_at(instruction_size()); + } + + address return_address() const { + return addr_at(return_address_offset()); + } + + // Returns the NativeFarCall's destination. + address destination(); + + // Sets the NativeCall's destination, not necessarily mt-safe. + // Used when relocating code. + void set_destination(address dest, int toc_offset); + + // Checks whether instr points at a NativeFarCall instruction. + static bool is_far_call_at(address instr) { + // Use compound inspection function which, in addition to instruction sequence, + // also checks for expected nops and for instruction alignment. + return MacroAssembler::is_call_far_patchable_at(instr); + } + + // Does the NativeFarCall implementation use a pc-relative encoding + // of the call destination? + // Used when relocating code. + bool is_pcrelative() { + address iaddr = (address)this; + assert(is_far_call_at(iaddr), "unexpected call type"); + return MacroAssembler::is_call_far_patchable_pcrelative_at(iaddr); + } + + void verify(); + + // Unit tests + static void test(); + + // Instantiates a NativeFarCall object starting at the given instruction + // address and returns the NativeFarCall object. + inline friend NativeFarCall* nativeFarCall_at(address instr) { + NativeFarCall* call = (NativeFarCall*)instr; +#ifdef ASSERT + call->verify(); +#endif + return call; + } +}; + + +//------------------------------------- +// N a t i v e M o v C o n s t R e g +//------------------------------------- + +// An interface for accessing/manipulating native set_oop imm, reg instructions. +// (Used to manipulate inlined data references, etc.) + +// A native move of a constant into a register, as defined by this abstraction layer, +// deals with instruction sequences that load "quasi constant" oops into registers +// for addressing. For multiple causes, those "quasi constant" oops eventually need +// to be changed (i.e. patched). The reason is quite simple: objects might get moved +// around in storage. Pc-relative oop addresses have to be patched also if the +// reference location is moved. That happens when executable code is relocated. + +class NativeMovConstReg: public NativeInstruction { + public: + + enum z_specific_constants { + instruction_size = 10 // Used in shared code for calls with reloc_info. + }; + + // address instruction_address() const { return addr_at(0); } + + // The current instruction might be located at an offset. + address next_instruction_address(int offset = 0) const; + + // (The [set_]data accessor respects oop_type relocs also.) + intptr_t data() const; + + // Patch data in code stream. + address set_data_plain(intptr_t x, CodeBlob *code); + // Patch data in code stream and oop pool if necessary. + void set_data(intptr_t x); + + // Patch narrow oop constant in code stream. + void set_narrow_oop(intptr_t data); + void set_narrow_klass(intptr_t data); + void set_pcrel_addr(intptr_t addr, CompiledMethod *nm = NULL, bool copy_back_to_oop_pool=false); + void set_pcrel_data(intptr_t data, CompiledMethod *nm = NULL, bool copy_back_to_oop_pool=false); + + void verify(); + + // unit test stuff + static void test(); + + // Creation. + friend NativeMovConstReg* nativeMovConstReg_at(address address) { + NativeMovConstReg* test = (NativeMovConstReg*)address; + #ifdef ASSERT + test->verify(); + #endif + return test; + } +}; + + +#ifdef COMPILER1 +//--------------------------------- +// N a t i v e M o v R e g M e m +//--------------------------------- + +// Interface to manipulate a code sequence that performs a memory access (load/store). +// The code is the patchable version of memory accesses generated by +// LIR_Assembler::reg2mem() and LIR_Assembler::mem2reg(). +// +// Loading the offset for the mem access is target of the manipulation. +// +// The instruction sequence looks like this: +// iihf %r1,$bits1 ; load offset for mem access +// iilf %r1,$bits2 +// [compress oop] ; optional, load only +// load/store %r2,0(%r1,%r2) ; memory access + +class NativeMovRegMem; +inline NativeMovRegMem* nativeMovRegMem_at (address address); +class NativeMovRegMem: public NativeInstruction { + public: + intptr_t offset() const { + return nativeMovConstReg_at(addr_at(0))->data(); + } + void set_offset(intptr_t x) { + nativeMovConstReg_at(addr_at(0))->set_data(x); + } + void add_offset_in_bytes(intptr_t radd_offset) { + set_offset(offset() + radd_offset); + } + void verify(); + + private: + friend inline NativeMovRegMem* nativeMovRegMem_at(address address) { + NativeMovRegMem* test = (NativeMovRegMem*)address; + #ifdef ASSERT + test->verify(); + #endif + return test; + } +}; +#endif // COMPILER1 + + +//----------------------- +// N a t i v e J u m p +//----------------------- + + +// An interface for accessing/manipulating native jumps +class NativeJump: public NativeInstruction { + public: + enum z_constants { + instruction_size = 2 // Size of z_illtrap(). + }; + + // Maximum size (in bytes) of a jump to an absolute address. + // Used when emitting branch to an exception handler which is a "load_const_optimized_branch". + // Thus, a pessimistic estimate is obtained when using load_const. + // code pattern is: + // tmpReg := load_const(address); (* varying size *) + // jumpTo(tmpReg); (* bcr, 2 bytes *) + // + static unsigned int max_instruction_size() { + return MacroAssembler::load_const_size() + MacroAssembler::jump_byregister_size(); + } + + +// address instruction_address() const { return addr_at(0); } + + address jump_destination() const { + return (address)nativeMovConstReg_at(instruction_address())->data(); + } + + void set_jump_destination(address dest) { + nativeMovConstReg_at(instruction_address())->set_data(((intptr_t)dest)); + } + + // Creation + friend NativeJump* nativeJump_at(address address) { + NativeJump* jump = (NativeJump*)address; + #ifdef ASSERT + jump->verify(); + #endif + return jump; + } + + static bool is_jump_at(address a) { + int off = 0; + bool b = (MacroAssembler::is_load_const_from_toc(a+off) && + Assembler::is_z_br(*(short*)(a+off + MacroAssembler::load_const_from_toc_size()))); + b = b || (MacroAssembler::is_load_const(a+off) && + Assembler::is_z_br(*(short*)(a+off + MacroAssembler::load_const_size()))); + return b; + } + + void verify(); + + // Unit testing stuff + static void test(); + + // Insertion of native jump instruction. + static void insert(address code_pos, address entry); + + // MT-safe insertion of native jump at verified method entry. + static void check_verified_entry_alignment(address entry, address verified_entry) { } + + static void patch_verified_entry(address entry, address verified_entry, address dest); +}; + +//------------------------------------- +// N a t i v e G e n e r a l J u m p +//------------------------------------- + +// Despite the name, handles only simple branches. +// On ZARCH_64 BRCL only. +class NativeGeneralJump; +inline NativeGeneralJump* nativeGeneralJump_at(address address); +class NativeGeneralJump: public NativeInstruction { + public: + enum ZARCH_specific_constants { + instruction_size = 6 + }; + + address instruction_address() const { return addr_at(0); } + address jump_destination() const { return addr_at(0) + MacroAssembler::get_pcrel_offset(addr_at(0)); } + + // Creation + friend inline NativeGeneralJump* nativeGeneralJump_at(address addr) { + NativeGeneralJump* jump = (NativeGeneralJump*)(addr); +#ifdef ASSERT + jump->verify(); +#endif + return jump; + } + + // Insertion of native general jump instruction. + static void insert_unconditional(address code_pos, address entry); + + void set_jump_destination(address dest) { + Unimplemented(); + // set_word_at(MacroAssembler::call_far_pcrelative_size()-4, Assembler::z_pcrel_off(dest, addr_at(0))); + } + + static void replace_mt_safe(address instr_addr, address code_buffer); + + void verify() PRODUCT_RETURN; +}; + +#endif // CPU_S390_VM_NATIVEINST_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/registerMap_s390.hpp b/hotspot/src/cpu/s390/vm/registerMap_s390.hpp new file mode 100644 index 00000000000..8d717b5b19b --- /dev/null +++ b/hotspot/src/cpu/s390/vm/registerMap_s390.hpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_REGISTERMAP_S390_HPP +#define CPU_S390_VM_REGISTERMAP_S390_HPP + +// Machine-dependent implementation for register maps. + + friend class frame; + + private: + // This is the hook for finding a register in a "well-known" location, + // such as a register block of a predetermined format. + // Since there is none, we just return NULL. + address pd_location(VMReg reg) const {return NULL;} + + // No PD state to clear or copy. + void pd_clear() {} + void pd_initialize() {} + void pd_initialize_from(const RegisterMap* map) {} + +#endif // CPU_S390_VM_REGISTERMAP_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/registerSaver_s390.hpp b/hotspot/src/cpu/s390/vm/registerSaver_s390.hpp new file mode 100644 index 00000000000..6b670007ea3 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/registerSaver_s390.hpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_REGISTERSAVER_S390_HPP +#define CPU_S390_VM_REGISTERSAVER_S390_HPP + +class RegisterSaver { + // Used for saving volatile registers. + + // Class declaration moved to separate file to make it available elsewhere. + // Implementation remains in sharedRuntime_s390.cpp + + public: + + // Set of registers to be saved. + typedef enum { + all_registers, + all_registers_except_r2, + all_integer_registers, + all_volatile_registers, // According to ABI calling convention. + arg_registers + } RegisterSet; + + // Boolean flags to force only argument registers to be saved. + static int live_reg_save_size(RegisterSet reg_set); + static int live_reg_frame_size(RegisterSet reg_set); + // Specify the register that should be stored as the return pc in the current frame. + static OopMap* save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc = Z_R14); + static void restore_live_registers(MacroAssembler* masm, RegisterSet reg_set); + + // Generate the OopMap (again, regs where saved before). + static OopMap* generate_oop_map(MacroAssembler* masm, RegisterSet reg_set); + + // During deoptimization only the result register need to be restored + // all the other values have already been extracted. + static void restore_result_registers(MacroAssembler* masm); + + // Constants and data structures: + + typedef enum { + int_reg = 0, + float_reg = 1, + excluded_reg = 2, // Not saved/restored. + } RegisterType; + + typedef enum { + reg_size = 8, + half_reg_size = reg_size / 2, + } RegisterConstants; + + // Remember type, number, and VMReg. + typedef struct { + RegisterType reg_type; + int reg_num; + VMReg vmreg; + } LiveRegType; + +}; + +#endif // CPU_S390_VM_REGISTERSAVER_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/register_definitions_s390.cpp b/hotspot/src/cpu/s390/vm/register_definitions_s390.cpp new file mode 100644 index 00000000000..99116f5399b --- /dev/null +++ b/hotspot/src/cpu/s390/vm/register_definitions_s390.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// Make sure the defines don't screw up the declarations later on in this file. +#define DONT_USE_REGISTER_DEFINES + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/register.hpp" +#include "register_s390.hpp" +#include "interp_masm_s390.hpp" + +REGISTER_DEFINITION(Register, noreg); + +REGISTER_DEFINITION(FloatRegister, fnoreg); diff --git a/hotspot/src/cpu/s390/vm/register_s390.cpp b/hotspot/src/cpu/s390/vm/register_s390.cpp new file mode 100644 index 00000000000..1746da9f150 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/register_s390.cpp @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "register_s390.hpp" + + +const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * 2; +const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + + FloatRegisterImpl::number_of_registers * 2; + +const char* RegisterImpl::name() const { + const char* names[number_of_registers] = { + "Z_R0", "Z_R1", "Z_R2", "Z_R3", "Z_R4", "Z_R5", "Z_R6", "Z_R7", + "Z_R8", "Z_R9", "Z_R10", "Z_R11", "Z_R12", "Z_R13", "Z_R14", "Z_R15" + }; + return is_valid() ? names[encoding()] : "noreg"; +} + +const char* FloatRegisterImpl::name() const { + const char* names[number_of_registers] = { + "Z_F0", "Z_F1", "Z_F2", "Z_F3", "Z_F4", "Z_F5", "Z_F6", "Z_F7", "Z_F8", "Z_F9", + "Z_F10", "Z_F11", "Z_F12", "Z_F13", "Z_F14", "Z_F15" + }; + return is_valid() ? names[encoding()] : "fnoreg"; +} diff --git a/hotspot/src/cpu/s390/vm/register_s390.hpp b/hotspot/src/cpu/s390/vm/register_s390.hpp new file mode 100644 index 00000000000..4c61174a613 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/register_s390.hpp @@ -0,0 +1,427 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_REGISTER_S390_HPP +#define CPU_S390_VM_REGISTER_S390_HPP + +#include "asm/register.hpp" +#include "vm_version_s390.hpp" + +class Address; +class VMRegImpl; + +typedef VMRegImpl* VMReg; + +// Use Register as shortcut. +class RegisterImpl; +typedef RegisterImpl* Register; + +// The implementation of integer registers for z/Architecture. + +// z/Architecture registers, see "LINUX for zSeries ELF ABI Supplement", IBM March 2001 +// +// r0-r1 General purpose (volatile) +// r2 Parameter and return value (volatile) +// r3 TOC pointer (volatile) +// r3-r5 Parameters (volatile) +// r6 Parameter (nonvolatile) +// r7-r11 Locals (nonvolatile) +// r12 Local, often used as GOT pointer (nonvolatile) +// r13 Local, often used as toc (nonvolatile) +// r14 return address (volatile) +// r15 stack pointer (nonvolatile) +// +// f0,f2,f4,f6 Parameters (volatile) +// f1,f3,f5,f7 General purpose (volatile) +// f8-f15 General purpose (nonvolatile) + +inline Register as_Register(int encoding) { + return (Register)(long)encoding; +} + +class RegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 16, + number_of_arg_registers = 5 + }; + + // general construction + inline friend Register as_Register(int encoding); + + inline VMReg as_VMReg(); + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return value(); } + const char* name() const; + + // testers + bool is_valid() const { return (0 <= (value()&0x7F) && (value()&0x7F) < number_of_registers); } + bool is_even() const { return (encoding() & 1) == 0; } + bool is_volatile() const { return (0 <= (value()&0x7F) && (value()&0x7F) <= 5) || (value()&0x7F)==14; } + bool is_nonvolatile() const { return is_valid() && !is_volatile(); } + + public: + // derived registers, offsets, and addresses + Register predecessor() const { return as_Register((encoding()-1) & (number_of_registers-1)); } + Register successor() const { return as_Register((encoding() + 1) & (number_of_registers-1)); } +}; + +// The integer registers of the z/Architecture. + +CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); + +CONSTANT_REGISTER_DECLARATION(Register, Z_R0, (0)); +CONSTANT_REGISTER_DECLARATION(Register, Z_R1, (1)); +CONSTANT_REGISTER_DECLARATION(Register, Z_R2, (2)); +CONSTANT_REGISTER_DECLARATION(Register, Z_R3, (3)); +CONSTANT_REGISTER_DECLARATION(Register, Z_R4, (4)); +CONSTANT_REGISTER_DECLARATION(Register, Z_R5, (5)); +CONSTANT_REGISTER_DECLARATION(Register, Z_R6, (6)); +CONSTANT_REGISTER_DECLARATION(Register, Z_R7, (7)); +CONSTANT_REGISTER_DECLARATION(Register, Z_R8, (8)); +CONSTANT_REGISTER_DECLARATION(Register, Z_R9, (9)); +CONSTANT_REGISTER_DECLARATION(Register, Z_R10, (10)); +CONSTANT_REGISTER_DECLARATION(Register, Z_R11, (11)); +CONSTANT_REGISTER_DECLARATION(Register, Z_R12, (12)); +CONSTANT_REGISTER_DECLARATION(Register, Z_R13, (13)); +CONSTANT_REGISTER_DECLARATION(Register, Z_R14, (14)); +CONSTANT_REGISTER_DECLARATION(Register, Z_R15, (15)); + +// Use ConditionRegister as shortcut +class ConditionRegisterImpl; +typedef ConditionRegisterImpl* ConditionRegister; + +// The implementation of condition register(s) for the z/Architecture. + +class ConditionRegisterImpl: public AbstractRegisterImpl { + public: + + enum { + number_of_registers = 1 + }; + + // accessors + int encoding() const { + assert(is_valid(), "invalid register"); return value(); + } + + // testers + bool is_valid() const { + return (0 <= value() && value() < number_of_registers); + } + bool is_volatile() const { + return true; + } + bool is_nonvolatile() const { + return false; + } + + // construction. + inline friend ConditionRegister as_ConditionRegister(int encoding); + + inline VMReg as_VMReg(); +}; + +inline ConditionRegister as_ConditionRegister(int encoding) { + assert(encoding >= 0 && encoding < ConditionRegisterImpl::number_of_registers, "bad condition register encoding"); + return (ConditionRegister)(long)encoding; +} + +// The condition register of the z/Architecture. + +CONSTANT_REGISTER_DECLARATION(ConditionRegister, Z_CR, (0)); + +// Because z/Architecture has so many registers, #define'ing values for them is +// beneficial in code size and is worth the cost of some of the +// dangers of defines. +// If a particular file has a problem with these defines then it's possible +// to turn them off in that file by defining +// DONT_USE_REGISTER_DEFINES. Register_definition_s390.cpp does that +// so that it's able to provide real definitions of these registers +// for use in debuggers and such. + +#ifndef DONT_USE_REGISTER_DEFINES +#define noreg ((Register)(noreg_RegisterEnumValue)) + +#define Z_R0 ((Register)(Z_R0_RegisterEnumValue)) +#define Z_R1 ((Register)(Z_R1_RegisterEnumValue)) +#define Z_R2 ((Register)(Z_R2_RegisterEnumValue)) +#define Z_R3 ((Register)(Z_R3_RegisterEnumValue)) +#define Z_R4 ((Register)(Z_R4_RegisterEnumValue)) +#define Z_R5 ((Register)(Z_R5_RegisterEnumValue)) +#define Z_R6 ((Register)(Z_R6_RegisterEnumValue)) +#define Z_R7 ((Register)(Z_R7_RegisterEnumValue)) +#define Z_R8 ((Register)(Z_R8_RegisterEnumValue)) +#define Z_R9 ((Register)(Z_R9_RegisterEnumValue)) +#define Z_R10 ((Register)(Z_R10_RegisterEnumValue)) +#define Z_R11 ((Register)(Z_R11_RegisterEnumValue)) +#define Z_R12 ((Register)(Z_R12_RegisterEnumValue)) +#define Z_R13 ((Register)(Z_R13_RegisterEnumValue)) +#define Z_R14 ((Register)(Z_R14_RegisterEnumValue)) +#define Z_R15 ((Register)(Z_R15_RegisterEnumValue)) + +#define Z_CR ((ConditionRegister)(Z_CR_ConditionRegisterEnumValue)) +#endif // DONT_USE_REGISTER_DEFINES + +// Use FloatRegister as shortcut +class FloatRegisterImpl; +typedef FloatRegisterImpl* FloatRegister; + +// The implementation of float registers for the z/Architecture. + +inline FloatRegister as_FloatRegister(int encoding) { + return (FloatRegister)(long)encoding; +} + +class FloatRegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 16, + number_of_arg_registers = 4 + }; + + // construction + inline friend FloatRegister as_FloatRegister(int encoding); + + inline VMReg as_VMReg(); + + // accessors + int encoding() const { + assert(is_valid(), "invalid register"); return value(); + } + + bool is_valid() const { return 0 <= value() && value() < number_of_registers; } + bool is_volatile() const { return (0 <= (value()&0x7F) && (value()&0x7F) <= 7); } + bool is_nonvolatile() const { return (8 <= (value()&0x7F) && (value()&0x7F) <= 15); } + + const char* name() const; + + FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } +}; + +// The float registers of z/Architecture. + +CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg, (-1)); + +CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F0, (0)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F1, (1)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F2, (2)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F3, (3)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F4, (4)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F5, (5)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F6, (6)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F7, (7)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F8, (8)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F9, (9)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F10, (10)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F11, (11)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F12, (12)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F13, (13)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F14, (14)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F15, (15)); + +#ifndef DONT_USE_REGISTER_DEFINES +#define fnoreg ((FloatRegister)(fnoreg_FloatRegisterEnumValue)) +#define Z_F0 ((FloatRegister)( Z_F0_FloatRegisterEnumValue)) +#define Z_F1 ((FloatRegister)( Z_F1_FloatRegisterEnumValue)) +#define Z_F2 ((FloatRegister)( Z_F2_FloatRegisterEnumValue)) +#define Z_F3 ((FloatRegister)( Z_F3_FloatRegisterEnumValue)) +#define Z_F4 ((FloatRegister)( Z_F4_FloatRegisterEnumValue)) +#define Z_F5 ((FloatRegister)( Z_F5_FloatRegisterEnumValue)) +#define Z_F6 ((FloatRegister)( Z_F6_FloatRegisterEnumValue)) +#define Z_F7 ((FloatRegister)( Z_F7_FloatRegisterEnumValue)) +#define Z_F8 ((FloatRegister)( Z_F8_FloatRegisterEnumValue)) +#define Z_F9 ((FloatRegister)( Z_F9_FloatRegisterEnumValue)) +#define Z_F10 ((FloatRegister)( Z_F10_FloatRegisterEnumValue)) +#define Z_F11 ((FloatRegister)( Z_F11_FloatRegisterEnumValue)) +#define Z_F12 ((FloatRegister)( Z_F12_FloatRegisterEnumValue)) +#define Z_F13 ((FloatRegister)( Z_F13_FloatRegisterEnumValue)) +#define Z_F14 ((FloatRegister)( Z_F14_FloatRegisterEnumValue)) +#define Z_F15 ((FloatRegister)( Z_F15_FloatRegisterEnumValue)) +#endif // DONT_USE_REGISTER_DEFINES + +// Need to know the total number of registers of all sorts for SharedInfo. +// Define a class that exports it. + +class ConcreteRegisterImpl : public AbstractRegisterImpl { + public: + enum { + number_of_registers = + (RegisterImpl::number_of_registers + + FloatRegisterImpl::number_of_registers) + * 2 // register halves + + 1 // condition code register + }; + static const int max_gpr; + static const int max_fpr; +}; + +// Single, Double and Quad fp reg classes. These exist to map the ADLC +// encoding for a floating point register, to the FloatRegister number +// desired by the macroassembler. A FloatRegister is a number between +// 0 and 31 passed around as a pointer. For ADLC, an fp register encoding +// is the actual bit encoding used by the z/Architecture hardware. When ADLC used +// the macroassembler to generate an instruction that references, e.g., a +// double fp reg, it passed the bit encoding to the macroassembler via +// as_FloatRegister, which, for double regs > 30, returns an illegal +// register number. +// +// Therefore we provide the following classes for use by ADLC. Their +// sole purpose is to convert from z/Architecture register encodings to FloatRegisters. +// At some future time, we might replace FloatRegister with these classes, +// hence the definitions of as_xxxFloatRegister as class methods rather +// than as external inline routines. + +class SingleFloatRegisterImpl; +typedef SingleFloatRegisterImpl *SingleFloatRegister; + +class SingleFloatRegisterImpl { + public: + friend FloatRegister as_SingleFloatRegister(int encoding) { + assert(encoding < 32, "bad single float register encoding"); + return as_FloatRegister(encoding); + } +}; + +class DoubleFloatRegisterImpl; +typedef DoubleFloatRegisterImpl *DoubleFloatRegister; + +class DoubleFloatRegisterImpl { + public: + friend FloatRegister as_DoubleFloatRegister(int encoding) { + assert(encoding < 32, "bad double float register encoding"); + return as_FloatRegister(((encoding & 1) << 5) | (encoding & 0x1e)); + } +}; + +class QuadFloatRegisterImpl; +typedef QuadFloatRegisterImpl *QuadFloatRegister; + +class QuadFloatRegisterImpl { + public: + friend FloatRegister as_QuadFloatRegister(int encoding) { + assert(encoding < 32 && ((encoding & 2) == 0), "bad quad float register encoding"); + return as_FloatRegister(((encoding & 1) << 5) | (encoding & 0x1c)); + } +}; + + +// Common register declarations used in assembler code. +REGISTER_DECLARATION(Register, Z_EXC_OOP, Z_R2); +REGISTER_DECLARATION(Register, Z_EXC_PC, Z_R3); +REGISTER_DECLARATION(Register, Z_RET, Z_R2); +REGISTER_DECLARATION(Register, Z_ARG1, Z_R2); +REGISTER_DECLARATION(Register, Z_ARG2, Z_R3); +REGISTER_DECLARATION(Register, Z_ARG3, Z_R4); +REGISTER_DECLARATION(Register, Z_ARG4, Z_R5); +REGISTER_DECLARATION(Register, Z_ARG5, Z_R6); +REGISTER_DECLARATION(Register, Z_SP, Z_R15); +REGISTER_DECLARATION(FloatRegister, Z_FRET, Z_F0); +REGISTER_DECLARATION(FloatRegister, Z_FARG1, Z_F0); +REGISTER_DECLARATION(FloatRegister, Z_FARG2, Z_F2); +REGISTER_DECLARATION(FloatRegister, Z_FARG3, Z_F4); +REGISTER_DECLARATION(FloatRegister, Z_FARG4, Z_F6); + +#ifndef DONT_USE_REGISTER_DEFINES +#define Z_EXC_OOP AS_REGISTER(Register, Z_R2) +#define Z_EXC_PC AS_REGISTER(Register, Z_R3) +#define Z_RET AS_REGISTER(Register, Z_R2) +#define Z_ARG1 AS_REGISTER(Register, Z_R2) +#define Z_ARG2 AS_REGISTER(Register, Z_R3) +#define Z_ARG3 AS_REGISTER(Register, Z_R4) +#define Z_ARG4 AS_REGISTER(Register, Z_R5) +#define Z_ARG5 AS_REGISTER(Register, Z_R6) +#define Z_SP AS_REGISTER(Register, Z_R15) +#define Z_FRET AS_REGISTER(FloatRegister, Z_F0) +#define Z_FARG1 AS_REGISTER(FloatRegister, Z_F0) +#define Z_FARG2 AS_REGISTER(FloatRegister, Z_F2) +#define Z_FARG3 AS_REGISTER(FloatRegister, Z_F4) +#define Z_FARG4 AS_REGISTER(FloatRegister, Z_F6) +#endif + +// Register declarations to be used in frame manager assembly code. +// Use only non-volatile registers in order to keep values across C-calls. + +// Register to cache the integer value on top of the operand stack. +REGISTER_DECLARATION(Register, Z_tos, Z_R2); +// Register to cache the fp value on top of the operand stack. +REGISTER_DECLARATION(FloatRegister, Z_ftos, Z_F0); +// Expression stack pointer in interpreted java frame. +REGISTER_DECLARATION(Register, Z_esp, Z_R7); +// Address of current thread. +REGISTER_DECLARATION(Register, Z_thread, Z_R8); +// Address of current method. only valid in interpreter_entry. +REGISTER_DECLARATION(Register, Z_method, Z_R9); +// Inline cache register. used by c1 and c2. +REGISTER_DECLARATION(Register, Z_inline_cache,Z_R9); +// Frame pointer of current interpreter frame. only valid while +// executing bytecodes. +REGISTER_DECLARATION(Register, Z_fp, Z_R9); +// Address of the locals array in an interpreted java frame. +REGISTER_DECLARATION(Register, Z_locals, Z_R12); +// Bytecode pointer. +REGISTER_DECLARATION(Register, Z_bcp, Z_R13); +// Bytecode which is dispatched (short lived!). +REGISTER_DECLARATION(Register, Z_bytecode, Z_R14); +#ifndef DONT_USE_REGISTER_DEFINES +#define Z_tos AS_REGISTER(Register, Z_R2) +#define Z_ftos AS_REGISTER(FloatRegister, Z_F0) +#define Z_esp AS_REGISTER(Register, Z_R7) +#define Z_thread AS_REGISTER(Register, Z_R8) +#define Z_method AS_REGISTER(Register, Z_R9) +#define Z_inline_cache AS_REGISTER(Register, Z_R9) +#define Z_fp AS_REGISTER(Register, Z_R9) +#define Z_locals AS_REGISTER(Register, Z_R12) +#define Z_bcp AS_REGISTER(Register, Z_R13) +#define Z_bytecode AS_REGISTER(Register, Z_R14) +#endif + +// Temporary registers to be used within frame manager. We can use +// the nonvolatiles because the call stub has saved them. +// Use only non-volatile registers in order to keep values across C-calls. +REGISTER_DECLARATION(Register, Z_tmp_1, Z_R10); +REGISTER_DECLARATION(Register, Z_tmp_2, Z_R11); +REGISTER_DECLARATION(Register, Z_tmp_3, Z_R12); +REGISTER_DECLARATION(Register, Z_tmp_4, Z_R13); +#ifndef DONT_USE_REGISTER_DEFINES +#define Z_tmp_1 AS_REGISTER(Register, Z_R10) +#define Z_tmp_2 AS_REGISTER(Register, Z_R11) +#define Z_tmp_3 AS_REGISTER(Register, Z_R12) +#define Z_tmp_4 AS_REGISTER(Register, Z_R13) +#endif + +// Scratch registers are volatile. +REGISTER_DECLARATION(Register, Z_R0_scratch, Z_R0); +REGISTER_DECLARATION(Register, Z_R1_scratch, Z_R1); +REGISTER_DECLARATION(FloatRegister, Z_fscratch_1, Z_F1); +#ifndef DONT_USE_REGISTER_DEFINES +#define Z_R0_scratch AS_REGISTER(Register, Z_R0) +#define Z_R1_scratch AS_REGISTER(Register, Z_R1) +#define Z_fscratch_1 AS_REGISTER(FloatRegister, Z_F1) +#endif + + +#endif // CPU_S390_VM_REGISTER_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/relocInfo_s390.cpp b/hotspot/src/cpu/s390/vm/relocInfo_s390.cpp new file mode 100644 index 00000000000..5ac592934d3 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/relocInfo_s390.cpp @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.inline.hpp" +#include "code/relocInfo.hpp" +#include "nativeInst_s390.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/safepoint.hpp" + +void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { + // we don't support splitting of relocations, so o must be zero: + assert(o == 0, "tried to split relocations"); + if (!verify_only) { + switch (format()) { + case relocInfo::uncompressed_format: + nativeMovConstReg_at(addr())->set_data_plain(((intptr_t)x) + o, code()); + break; + case relocInfo::compressed_format: + if (type() == relocInfo::metadata_type) + nativeMovConstReg_at(addr())->set_narrow_klass(((intptr_t)x) + o); + else if (type() == relocInfo::oop_type) + nativeMovConstReg_at(addr())->set_narrow_oop(((intptr_t)x) + o); + else + guarantee(false, "bad relocInfo type for relocInfo::narrow_oop_format"); + break; + case relocInfo::pcrel_addr_format: // patch target location + nativeMovConstReg_at(addr())->set_pcrel_addr(((intptr_t)x) + o, code()); + break; + case relocInfo::pcrel_data_format: // patch data at target location + nativeMovConstReg_at(addr())->set_pcrel_data(((intptr_t)x) + o, code()); + break; + default: + assert(false, "not a valid relocInfo format"); + break; + } + } else { + // TODO: Reading of narrow oops out of code stream is not implemented + // (see nativeMovConstReg::data()). Implement this if you want to verify. + // assert(x == (address) nativeMovConstReg_at(addr())->data(), "Instructions must match"); + switch (format()) { + case relocInfo::uncompressed_format: + break; + case relocInfo::compressed_format: + break; + case relocInfo::pcrel_addr_format: + break; + case relocInfo::pcrel_data_format: + break; + default: + assert(false, "not a valid relocInfo format"); + break; + } + } +} + +address Relocation::pd_call_destination(address orig_addr) { + address inst_addr = addr(); + + if (NativeFarCall::is_far_call_at(inst_addr)) { + if (!ShortenBranches) { + if (MacroAssembler::is_call_far_pcrelative(inst_addr)) { + address a1 = MacroAssembler::get_target_addr_pcrel(orig_addr+MacroAssembler::nop_size()); +#ifdef ASSERT + address a2 = MacroAssembler::get_target_addr_pcrel(inst_addr+MacroAssembler::nop_size()); + address a3 = nativeFarCall_at(orig_addr)->destination(); + address a4 = nativeFarCall_at(inst_addr)->destination(); + if ((a1 != a3) || (a2 != a4)) { + unsigned int range = 128; + Assembler::dump_code_range(tty, inst_addr, range, "pc-relative call w/o ShortenBranches?"); + Assembler::dump_code_range(tty, orig_addr, range, "pc-relative call w/o ShortenBranches?"); + assert(false, "pc-relative call w/o ShortenBranches?"); + } +#endif + return a1; + } + return (address)(-1); + } + NativeFarCall* call; + if (orig_addr == NULL) { + call = nativeFarCall_at(inst_addr); + } else { + if (MacroAssembler::is_call_far_patchable_pcrelative_at(inst_addr)) { + call = nativeFarCall_at(orig_addr); + } else { + call = nativeFarCall_at(orig_addr); // must access location (in CP) where destination is stored in unmoved code, because load from CP is pc-relative + } + } + return call->destination(); + } + + if (NativeCall::is_call_at(inst_addr)) { + NativeCall* call = nativeCall_at(inst_addr); + if (call->is_pcrelative()) { + intptr_t off = inst_addr - orig_addr; + return (address) (call->destination()-off); + } + } + + return (address) nativeMovConstReg_at(inst_addr)->data(); +} + +void Relocation::pd_set_call_destination(address x) { + address inst_addr = addr(); + + if (NativeFarCall::is_far_call_at(inst_addr)) { + if (!ShortenBranches) { + if (MacroAssembler::is_call_far_pcrelative(inst_addr)) { + address a1 = MacroAssembler::get_target_addr_pcrel(inst_addr+MacroAssembler::nop_size()); +#ifdef ASSERT + address a3 = nativeFarCall_at(inst_addr)->destination(); + if (a1 != a3) { + unsigned int range = 128; + Assembler::dump_code_range(tty, inst_addr, range, "pc-relative call w/o ShortenBranches?"); + assert(false, "pc-relative call w/o ShortenBranches?"); + } +#endif + nativeFarCall_at(inst_addr)->set_destination(x, 0); + return; + } + assert(x == (address)-1, "consistency check"); + return; + } + int toc_offset = -1; + if (type() == relocInfo::runtime_call_w_cp_type) { + toc_offset = ((runtime_call_w_cp_Relocation *)this)->get_constant_pool_offset(); + } + if (toc_offset>=0) { + NativeFarCall* call = nativeFarCall_at(inst_addr); + call->set_destination(x, toc_offset); + return; + } + } + + if (NativeCall::is_call_at(inst_addr)) { + NativeCall* call = nativeCall_at(inst_addr); + if (call->is_pcrelative()) { + call->set_destination_mt_safe(x); + return; + } + } + + // constant is absolute, must use x + nativeMovConstReg_at(inst_addr)->set_data(((intptr_t)x)); +} + + +// store the new target address into an oop_Relocation cell, if any +// return indication if update happened. +bool relocInfo::update_oop_pool(address begin, address end, address newTarget, CodeBlob* cb) { + + // Try to find the CodeBlob, if not given by caller + if (cb == NULL) cb = CodeCache::find_blob(begin); +#ifdef ASSERT + else + assert(cb == CodeCache::find_blob(begin), "consistency"); +#endif + + // 'RelocIterator' requires an nmethod + nmethod* nm = cb ? cb->as_nmethod_or_null() : NULL; + if (nm != NULL) { + RelocIterator iter(nm, begin, end); + oop* oop_addr = NULL; + Metadata** metadata_addr = NULL; + while (iter.next()) { + if (iter.type() == relocInfo::oop_type) { + oop_Relocation *r = iter.oop_reloc(); + if (oop_addr == NULL) { + oop_addr = r->oop_addr(); + *oop_addr = (oop)newTarget; + } else { + assert(oop_addr == r->oop_addr(), "must be only one set-oop here"); + } + } + if (iter.type() == relocInfo::metadata_type) { + metadata_Relocation *r = iter.metadata_reloc(); + if (metadata_addr == NULL) { + metadata_addr = r->metadata_addr(); + *metadata_addr = (Metadata*)newTarget; + } else { + assert(metadata_addr == r->metadata_addr(), "must be only one set-metadata here"); + } + } + } + return oop_addr || metadata_addr; + } + return false; +} + + +address* Relocation::pd_address_in_code() { + ShouldNotReachHere(); + return 0; +} + +address Relocation::pd_get_address_from_code() { + return (address) (nativeMovConstReg_at(addr())->data()); +} + +void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { +} + +void metadata_Relocation::pd_fix_value(address x) { +} diff --git a/hotspot/src/cpu/s390/vm/relocInfo_s390.hpp b/hotspot/src/cpu/s390/vm/relocInfo_s390.hpp new file mode 100644 index 00000000000..5462ba23525 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/relocInfo_s390.hpp @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_RELOCINFO_S390_HPP +#define CPU_S390_VM_RELOCINFO_S390_HPP + +//---------------------------- +// relocInfo layout +//---------------------------- + +// This description should be contained in code/relocInfo.hpp +// but was put here to minimize shared code diffs. + +// Relocation information for a nmethod is stored in compressed +// form in an array of element type short int (16 bits). +// Each array element constitutes one relocInfo record. +// The layout of one such record is described here. + +// +------------+---+---+------------------------------+ +// | type | fmt | offset/offset_unit | +// +------------+---+---+------------------------------+ +// +// |<-- value_width (16) ----------------------------->| +// ||<-- nontype_width (12) -------------->| +// (4) +// | |<--+-->|<-- offset_width (10) ------->| +// / \ +// / (2) \ +// /<--format->\ +// | width | + + +// only for type == data_prefix_tag: +// +------------+---+---+------------------------------+ +// | type | | data | +// +------------+---+---+------------------------------+ +// | 15 |<->|<-- datalen_width (11) ---------->| +// | +// +--datalen_tag (1) + +// relocType +// The type field holds a value of relocType (which is +// an enum of all possible relocation types). Currently, +// there are 16 distinct relocation types, requiring +// type_width to be (at least) 4. +// relocFormat +// The format field holds a value of relocFormat (which is +// an enum of all possible relocation formats). Currently, +// there are 4 distinct relocation formats, requiring +// format_width to be (at least) 2. +// offset +// Each relocInfo is related to one specific address in the CodeBlob. +// The address always points to the first byte of the target instruction. +// It does NOT refer directly to the relocation subfield or embedded constant. +// offset contains the distance of this relocInfo from the previous one. +// offset is scaled by offset_unit (the platform-specific instruction +// alignment requirement) to maximize the encodable distance. +// To obtain the absolute address in the CodeBlob the relocInfo is +// related to, you have to iterate over all relocInfos from the +// beginning, and then use RelocIterator::addr() to get the address. + +// relocType == data_prefix_tag +// These are relocInfo records containing inline data that belongs to +// the next non-data relocInfo record. Usage of that inline data is +// specific and private to that relocInfo record. +// For details refer to code/relocInfo.hpp + + + // machine-dependent parts of class relocInfo + private: + enum { + // Instructions are HW (2-byte) aligned on z/Architecture. + offset_unit = 2, + + // Encodes Assembler::disp32_operand vs. Assembler::imm64_operand. + // (Assembler::call32_operand is used on call instructions only.) + format_width = 2 + }; + + public: + + enum relocFormat { + no_format = 0, + uncompressed_format = 0, // Relocation is for a regular oop. + compressed_format = 1, // Relocation is for a narrow (compressed) oop or klass. + // Similar to relocInfo::narrow_oop_in_const. + pcrel_addr_format = 2, // Relocation is for the target LOCATION of a pc-relative instruction. + pcrel_data_format = 3 // Relocation is for the target data of a pc-relative instruction. + }; + + // Store the new target address into an oop_Relocation cell, if any. + // Return indication if update happened. + static bool update_oop_pool(address begin, address end, address newTarget, CodeBlob* cb); + +#endif // CPU_S390_VM_RELOCINFO_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/runtime_s390.cpp b/hotspot/src/cpu/s390/vm/runtime_s390.cpp new file mode 100644 index 00000000000..82a53130505 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/runtime_s390.cpp @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#ifdef COMPILER2 +#include "asm/macroAssembler.inline.hpp" +#include "classfile/systemDictionary.hpp" +#include "code/vmreg.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_s390.hpp" +#include "opto/runtime.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/globalDefinitions.hpp" +#include "vmreg_s390.inline.hpp" +#endif + +#define __ masm-> + + +//------------------------------generate_exception_blob--------------------------- +// creates exception blob at the end +// Using exception blob, this code is jumped from a compiled method. +// (see emit_exception_handler in s390.ad file) +// +// Given an exception pc at a call we call into the runtime for the +// handler in this method. This handler might merely restore state +// (i.e. callee save registers), unwind the frame, and jump to the +// exception handler for the nmethod if there is no Java level handler +// for the nmethod. +// +// This code is entered with a branch. +// +// Arguments: +// Z_R2(=Z_ARG1): exception oop +// Z_R3(=Z_ARG2): exception pc +// +// Results: +// Z_R2: exception oop +// Z_R3: exception pc in caller +// destination: exception handler of caller +// +// Note: the exception pc MUST be at a call (precise debug information) + +void OptoRuntime::generate_exception_blob() { + + // Allocate space for the code + ResourceMark rm; + // Setup code generation tools + CodeBuffer buffer("exception_blob", 2048, 1024); + MacroAssembler* masm = new MacroAssembler(&buffer); + + Register handle_exception = Z_ARG5; + + __ verify_thread(); + __ z_stg(Z_ARG1/*exception oop*/, Address(Z_thread, JavaThread::exception_oop_offset())); + __ z_stg(Z_ARG2/*issuing pc*/, Address(Z_thread, JavaThread::exception_pc_offset())); + + // Store issuing pc as return pc into + // caller's frame. stack-walking needs it. R14 is not valid here, + // because this code gets entered with a jump. + __ z_stg(Z_ARG2/*issuing pc*/, _z_abi(return_pc), Z_SP); + + // The following call to function OptoRuntime::handle_exception_C + // does all the hard work. It checks if an + // exception catch exists in the method. If so, it returns the + // handler address. If the nmethod has been deoptimized and it had + // a handler the handler address is the deopt blob's + // unpack_with_exception entry. + + // push a C frame for the exception blob. it is needed for the + // C call later on. + + Register saved_sp = Z_R11; + + __ z_lgr(saved_sp, Z_SP); + + // push frame for blob. + int frame_size = __ push_frame_abi160(0); + + __ get_PC(Z_R1/*scratch*/); + __ set_last_Java_frame(/*sp=*/Z_SP, /*pc=*/Z_R1); + + // This call can lead to deoptimization of the nmethod holding the handler. + __ z_lgr(Z_ARG1, Z_thread); // argument of C function + __ call_c(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)); + + __ z_lgr(handle_exception, Z_RET); + __ reset_last_Java_frame(); + + // Pop the exception blob's C frame that has been pushed before. + __ z_lgr(Z_SP, saved_sp); + + // [Z_RET]!=NULL was possible in hotspot5 but not in sapjvm6. + // C2I adapter extensions are now removed by a resize in the frame manager + // (unwind_initial_activation_pending_exception). +#ifdef ASSERT + __ z_ltgr(handle_exception, handle_exception); + __ asm_assert_ne("handler must not be NULL", 0x852); +#endif + + // Handle_exception contains the handler address. If the associated frame + // has been deoptimized then the handler has been patched to jump to + // the deoptimization blob. + + // If the exception handler jumps to the deoptimization blob, the + // exception pc will be read from there. + __ z_lg(Z_ARG2, Address(Z_thread, JavaThread::exception_pc_offset())); + + __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset())); + + // Clear the exception oop so GC no longer processes it as a root. + __ clear_mem(Address(Z_thread, JavaThread::exception_oop_offset()),sizeof(intptr_t)); +#ifdef ASSERT + __ clear_mem(Address(Z_thread, JavaThread::exception_handler_pc_offset()), sizeof(intptr_t)); + __ clear_mem(Address(Z_thread, JavaThread::exception_pc_offset()), sizeof(intptr_t)); +#endif + + __ z_br(handle_exception); + + // Make sure all code is generated. + masm->flush(); + + // Set exception blob. + OopMapSet *oop_maps = NULL; + _exception_blob = ExceptionBlob::create(&buffer, oop_maps, frame_size/wordSize); +} diff --git a/hotspot/src/cpu/s390/vm/s390.ad b/hotspot/src/cpu/s390/vm/s390.ad new file mode 100644 index 00000000000..9385cbc72c5 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/s390.ad @@ -0,0 +1,10802 @@ +// +// Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2016 SAP SE. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// + +// z/Architecture Architecture Description File + +// Major contributions by AS, JL, LS. + +// +// Following information is derived from private mail communication +// (Oct. 2011). +// +// General branch target alignment considerations +// +// z/Architecture does not imply a general branch target alignment requirement. +// There are side effects and side considerations, though, which may +// provide some performance benefit. These are: +// - Align branch target on octoword (32-byte) boundary +// On more recent models (from z9 on), I-fetch is done on a Octoword +// (32 bytes at a time) basis. To avoid I-fetching unnecessary +// instructions, branch targets should be 32-byte aligend. If this +// exact alingment cannot be achieved, having the branch target in +// the first doubleword still provides some benefit. +// - Avoid branch targets at the end of cache lines (> 64 bytes distance). +// Sequential instruction prefetching after the branch target starts +// immediately after having fetched the octoword containing the +// branch target. When I-fetching crosses a cache line, there may be +// a small stall. The worst case: the branch target (at the end of +// a cache line) is a L1 I-cache miss and the next line as well. +// Then, the entire target line must be filled first (to contine at the +// branch target). Only then can the next sequential line be filled. +// - Avoid multiple poorly predicted branches in a row. +// + +//----------REGISTER DEFINITION BLOCK------------------------------------------ +// This information is used by the matcher and the register allocator to +// describe individual registers and classes of registers within the target +// architecture. + +register %{ + +//----------Architecture Description Register Definitions---------------------- +// General Registers +// "reg_def" name (register save type, C convention save type, +// ideal register type, encoding); +// +// Register Save Types: +// +// NS = No-Save: The register allocator assumes that these registers +// can be used without saving upon entry to the method, & +// that they do not need to be saved at call sites. +// +// SOC = Save-On-Call: The register allocator assumes that these registers +// can be used without saving upon entry to the method, +// but that they must be saved at call sites. +// +// SOE = Save-On-Entry: The register allocator assumes that these registers +// must be saved before using them upon entry to the +// method, but they do not need to be saved at call sites. +// +// AS = Always-Save: The register allocator assumes that these registers +// must be saved before using them upon entry to the +// method, & that they must be saved at call sites. +// +// Ideal Register Type is used to determine how to save & restore a +// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get +// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. +// +// The encoding number is the actual bit-pattern placed into the opcodes. + +// z/Architecture register definitions, based on the z/Architecture Principles +// of Operation, 5th Edition, September 2005, and z/Linux Elf ABI Supplement, +// 5th Edition, March 2001. +// +// For each 64-bit register we must define two registers: the register +// itself, e.g. Z_R3, and a corresponding virtual other (32-bit-)'half', +// e.g. Z_R3_H, which is needed by the allocator, but is not used +// for stores, loads, etc. + + // Integer/Long Registers + // ---------------------------- + + // z/Architecture has 16 64-bit integer registers. + + // types: v = volatile, nv = non-volatile, s = system + reg_def Z_R0 (SOC, SOC, Op_RegI, 0, Z_R0->as_VMReg()); // v scratch1 + reg_def Z_R0_H (SOC, SOC, Op_RegI, 99, Z_R0->as_VMReg()->next()); + reg_def Z_R1 (SOC, SOC, Op_RegI, 1, Z_R1->as_VMReg()); // v scratch2 + reg_def Z_R1_H (SOC, SOC, Op_RegI, 99, Z_R1->as_VMReg()->next()); + reg_def Z_R2 (SOC, SOC, Op_RegI, 2, Z_R2->as_VMReg()); // v iarg1 & iret + reg_def Z_R2_H (SOC, SOC, Op_RegI, 99, Z_R2->as_VMReg()->next()); + reg_def Z_R3 (SOC, SOC, Op_RegI, 3, Z_R3->as_VMReg()); // v iarg2 + reg_def Z_R3_H (SOC, SOC, Op_RegI, 99, Z_R3->as_VMReg()->next()); + reg_def Z_R4 (SOC, SOC, Op_RegI, 4, Z_R4->as_VMReg()); // v iarg3 + reg_def Z_R4_H (SOC, SOC, Op_RegI, 99, Z_R4->as_VMReg()->next()); + reg_def Z_R5 (SOC, SOC, Op_RegI, 5, Z_R5->as_VMReg()); // v iarg4 + reg_def Z_R5_H (SOC, SOC, Op_RegI, 99, Z_R5->as_VMReg()->next()); + reg_def Z_R6 (SOC, SOE, Op_RegI, 6, Z_R6->as_VMReg()); // v iarg5 + reg_def Z_R6_H (SOC, SOE, Op_RegI, 99, Z_R6->as_VMReg()->next()); + reg_def Z_R7 (SOC, SOE, Op_RegI, 7, Z_R7->as_VMReg()); + reg_def Z_R7_H (SOC, SOE, Op_RegI, 99, Z_R7->as_VMReg()->next()); + reg_def Z_R8 (SOC, SOE, Op_RegI, 8, Z_R8->as_VMReg()); + reg_def Z_R8_H (SOC, SOE, Op_RegI, 99, Z_R8->as_VMReg()->next()); + reg_def Z_R9 (SOC, SOE, Op_RegI, 9, Z_R9->as_VMReg()); + reg_def Z_R9_H (SOC, SOE, Op_RegI, 99, Z_R9->as_VMReg()->next()); + reg_def Z_R10 (SOC, SOE, Op_RegI, 10, Z_R10->as_VMReg()); + reg_def Z_R10_H(SOC, SOE, Op_RegI, 99, Z_R10->as_VMReg()->next()); + reg_def Z_R11 (SOC, SOE, Op_RegI, 11, Z_R11->as_VMReg()); + reg_def Z_R11_H(SOC, SOE, Op_RegI, 99, Z_R11->as_VMReg()->next()); + reg_def Z_R12 (SOC, SOE, Op_RegI, 12, Z_R12->as_VMReg()); + reg_def Z_R12_H(SOC, SOE, Op_RegI, 99, Z_R12->as_VMReg()->next()); + reg_def Z_R13 (SOC, SOE, Op_RegI, 13, Z_R13->as_VMReg()); + reg_def Z_R13_H(SOC, SOE, Op_RegI, 99, Z_R13->as_VMReg()->next()); + reg_def Z_R14 (NS, NS, Op_RegI, 14, Z_R14->as_VMReg()); // s return_pc + reg_def Z_R14_H(NS, NS, Op_RegI, 99, Z_R14->as_VMReg()->next()); + reg_def Z_R15 (NS, NS, Op_RegI, 15, Z_R15->as_VMReg()); // s SP + reg_def Z_R15_H(NS, NS, Op_RegI, 99, Z_R15->as_VMReg()->next()); + + // Float/Double Registers + + // The rules of ADL require that double registers be defined in pairs. + // Each pair must be two 32-bit values, but not necessarily a pair of + // single float registers. In each pair, ADLC-assigned register numbers + // must be adjacent, with the lower number even. Finally, when the + // CPU stores such a register pair to memory, the word associated with + // the lower ADLC-assigned number must be stored to the lower address. + + // z/Architecture has 16 64-bit floating-point registers. Each can store a single + // or double precision floating-point value. + + // types: v = volatile, nv = non-volatile, s = system + reg_def Z_F0 (SOC, SOC, Op_RegF, 0, Z_F0->as_VMReg()); // v farg1 & fret + reg_def Z_F0_H (SOC, SOC, Op_RegF, 99, Z_F0->as_VMReg()->next()); + reg_def Z_F1 (SOC, SOC, Op_RegF, 1, Z_F1->as_VMReg()); + reg_def Z_F1_H (SOC, SOC, Op_RegF, 99, Z_F1->as_VMReg()->next()); + reg_def Z_F2 (SOC, SOC, Op_RegF, 2, Z_F2->as_VMReg()); // v farg2 + reg_def Z_F2_H (SOC, SOC, Op_RegF, 99, Z_F2->as_VMReg()->next()); + reg_def Z_F3 (SOC, SOC, Op_RegF, 3, Z_F3->as_VMReg()); + reg_def Z_F3_H (SOC, SOC, Op_RegF, 99, Z_F3->as_VMReg()->next()); + reg_def Z_F4 (SOC, SOC, Op_RegF, 4, Z_F4->as_VMReg()); // v farg3 + reg_def Z_F4_H (SOC, SOC, Op_RegF, 99, Z_F4->as_VMReg()->next()); + reg_def Z_F5 (SOC, SOC, Op_RegF, 5, Z_F5->as_VMReg()); + reg_def Z_F5_H (SOC, SOC, Op_RegF, 99, Z_F5->as_VMReg()->next()); + reg_def Z_F6 (SOC, SOC, Op_RegF, 6, Z_F6->as_VMReg()); + reg_def Z_F6_H (SOC, SOC, Op_RegF, 99, Z_F6->as_VMReg()->next()); + reg_def Z_F7 (SOC, SOC, Op_RegF, 7, Z_F7->as_VMReg()); + reg_def Z_F7_H (SOC, SOC, Op_RegF, 99, Z_F7->as_VMReg()->next()); + reg_def Z_F8 (SOC, SOE, Op_RegF, 8, Z_F8->as_VMReg()); + reg_def Z_F8_H (SOC, SOE, Op_RegF, 99, Z_F8->as_VMReg()->next()); + reg_def Z_F9 (SOC, SOE, Op_RegF, 9, Z_F9->as_VMReg()); + reg_def Z_F9_H (SOC, SOE, Op_RegF, 99, Z_F9->as_VMReg()->next()); + reg_def Z_F10 (SOC, SOE, Op_RegF, 10, Z_F10->as_VMReg()); + reg_def Z_F10_H(SOC, SOE, Op_RegF, 99, Z_F10->as_VMReg()->next()); + reg_def Z_F11 (SOC, SOE, Op_RegF, 11, Z_F11->as_VMReg()); + reg_def Z_F11_H(SOC, SOE, Op_RegF, 99, Z_F11->as_VMReg()->next()); + reg_def Z_F12 (SOC, SOE, Op_RegF, 12, Z_F12->as_VMReg()); + reg_def Z_F12_H(SOC, SOE, Op_RegF, 99, Z_F12->as_VMReg()->next()); + reg_def Z_F13 (SOC, SOE, Op_RegF, 13, Z_F13->as_VMReg()); + reg_def Z_F13_H(SOC, SOE, Op_RegF, 99, Z_F13->as_VMReg()->next()); + reg_def Z_F14 (SOC, SOE, Op_RegF, 14, Z_F14->as_VMReg()); + reg_def Z_F14_H(SOC, SOE, Op_RegF, 99, Z_F14->as_VMReg()->next()); + reg_def Z_F15 (SOC, SOE, Op_RegF, 15, Z_F15->as_VMReg()); + reg_def Z_F15_H(SOC, SOE, Op_RegF, 99, Z_F15->as_VMReg()->next()); + + + // Special Registers + + // Condition Codes Flag Registers + + // z/Architecture has the PSW (program status word) that contains + // (among other information) the condition code. We treat this + // part of the PSW as a condition register CR. It consists of 4 + // bits. Floating point instructions influence the same condition register CR. + + reg_def Z_CR(SOC, SOC, Op_RegFlags, 0, Z_CR->as_VMReg()); // volatile + + +// Specify priority of register selection within phases of register +// allocation. Highest priority is first. A useful heuristic is to +// give registers a low priority when they are required by machine +// instructions, and choose no-save registers before save-on-call, and +// save-on-call before save-on-entry. Registers which participate in +// fix calling sequences should come last. Registers which are used +// as pairs must fall on an even boundary. + +// It's worth about 1% on SPEC geomean to get this right. + +// Chunk0, chunk1, and chunk2 form the MachRegisterNumbers enumeration +// in adGlobals_s390.hpp which defines the _num values, e.g. +// Z_R3_num. Therefore, Z_R3_num may not be (and in reality is not) +// the same as Z_R3->encoding()! Furthermore, we cannot make any +// assumptions on ordering, e.g. Z_R3_num may be less than Z_R2_num. +// Additionally, the function +// static enum RC rc_class(OptoReg::Name reg) +// maps a given _num value to its chunk type (except for flags) +// and its current implementation relies on chunk0 and chunk1 having a +// size of 64 each. + +alloc_class chunk0( + // chunk0 contains *all* 32 integer registers halves. + + // potential SOE regs + Z_R13,Z_R13_H, + Z_R12,Z_R12_H, + Z_R11,Z_R11_H, + Z_R10,Z_R10_H, + + Z_R9,Z_R9_H, + Z_R8,Z_R8_H, + Z_R7,Z_R7_H, + + Z_R1,Z_R1_H, + Z_R0,Z_R0_H, + + // argument registers + Z_R6,Z_R6_H, + Z_R5,Z_R5_H, + Z_R4,Z_R4_H, + Z_R3,Z_R3_H, + Z_R2,Z_R2_H, + + // special registers + Z_R14,Z_R14_H, + Z_R15,Z_R15_H +); + +alloc_class chunk1( + // Chunk1 contains *all* 64 floating-point registers halves. + + Z_F15,Z_F15_H, + Z_F14,Z_F14_H, + Z_F13,Z_F13_H, + Z_F12,Z_F12_H, + Z_F11,Z_F11_H, + Z_F10,Z_F10_H, + Z_F9,Z_F9_H, + Z_F8,Z_F8_H, + // scratch register + Z_F7,Z_F7_H, + Z_F5,Z_F5_H, + Z_F3,Z_F3_H, + Z_F1,Z_F1_H, + // argument registers + Z_F6,Z_F6_H, + Z_F4,Z_F4_H, + Z_F2,Z_F2_H, + Z_F0,Z_F0_H +); + +alloc_class chunk2( + Z_CR +); + + +//-------Architecture Description Register Classes----------------------- + +// Several register classes are automatically defined based upon +// information in this architecture description. + +// 1) reg_class inline_cache_reg (as defined in frame section) +// 2) reg_class compiler_method_oop_reg (as defined in frame section) +// 2) reg_class interpreter_method_oop_reg (as defined in frame section) +// 3) reg_class stack_slots(/* one chunk of stack-based "registers" */) + +// Integer Register Classes +reg_class z_int_reg( +/*Z_R0*/ // R0 +/*Z_R1*/ + Z_R2, + Z_R3, + Z_R4, + Z_R5, + Z_R6, + Z_R7, +/*Z_R8,*/ // Z_thread + Z_R9, + Z_R10, + Z_R11, + Z_R12, + Z_R13 +/*Z_R14*/ // return_pc +/*Z_R15*/ // SP +); + +reg_class z_no_odd_int_reg( +/*Z_R0*/ // R0 +/*Z_R1*/ + Z_R2, + Z_R3, + Z_R4, +/*Z_R5,*/ // odd part of fix register pair + Z_R6, + Z_R7, +/*Z_R8,*/ // Z_thread + Z_R9, + Z_R10, + Z_R11, + Z_R12, + Z_R13 +/*Z_R14*/ // return_pc +/*Z_R15*/ // SP +); + +reg_class z_no_arg_int_reg( +/*Z_R0*/ // R0 +/*Z_R1*/ // scratch +/*Z_R2*/ +/*Z_R3*/ +/*Z_R4*/ +/*Z_R5*/ +/*Z_R6*/ + Z_R7, +/*Z_R8*/ // Z_thread + Z_R9, + Z_R10, + Z_R11, + Z_R12, + Z_R13 +/*Z_R14*/ // return_pc +/*Z_R15*/ // SP +); + +reg_class z_rarg1_int_reg(Z_R2); +reg_class z_rarg2_int_reg(Z_R3); +reg_class z_rarg3_int_reg(Z_R4); +reg_class z_rarg4_int_reg(Z_R5); +reg_class z_rarg5_int_reg(Z_R6); + +// Pointer Register Classes + +// 64-bit build means 64-bit pointers means hi/lo pairs. + +reg_class z_rarg5_ptrN_reg(Z_R6); + +reg_class z_rarg1_ptr_reg(Z_R2_H,Z_R2); +reg_class z_rarg2_ptr_reg(Z_R3_H,Z_R3); +reg_class z_rarg3_ptr_reg(Z_R4_H,Z_R4); +reg_class z_rarg4_ptr_reg(Z_R5_H,Z_R5); +reg_class z_rarg5_ptr_reg(Z_R6_H,Z_R6); +reg_class z_thread_ptr_reg(Z_R8_H,Z_R8); + +reg_class z_ptr_reg( +/*Z_R0_H,Z_R0*/ // R0 +/*Z_R1_H,Z_R1*/ + Z_R2_H,Z_R2, + Z_R3_H,Z_R3, + Z_R4_H,Z_R4, + Z_R5_H,Z_R5, + Z_R6_H,Z_R6, + Z_R7_H,Z_R7, +/*Z_R8_H,Z_R8,*/ // Z_thread + Z_R9_H,Z_R9, + Z_R10_H,Z_R10, + Z_R11_H,Z_R11, + Z_R12_H,Z_R12, + Z_R13_H,Z_R13 +/*Z_R14_H,Z_R14*/ // return_pc +/*Z_R15_H,Z_R15*/ // SP +); + +reg_class z_lock_ptr_reg( +/*Z_R0_H,Z_R0*/ // R0 +/*Z_R1_H,Z_R1*/ + Z_R2_H,Z_R2, + Z_R3_H,Z_R3, + Z_R4_H,Z_R4, +/*Z_R5_H,Z_R5,*/ +/*Z_R6_H,Z_R6,*/ + Z_R7_H,Z_R7, +/*Z_R8_H,Z_R8,*/ // Z_thread + Z_R9_H,Z_R9, + Z_R10_H,Z_R10, + Z_R11_H,Z_R11, + Z_R12_H,Z_R12, + Z_R13_H,Z_R13 +/*Z_R14_H,Z_R14*/ // return_pc +/*Z_R15_H,Z_R15*/ // SP +); + +reg_class z_no_arg_ptr_reg( +/*Z_R0_H,Z_R0*/ // R0 +/*Z_R1_H,Z_R1*/ // scratch +/*Z_R2_H,Z_R2*/ +/*Z_R3_H,Z_R3*/ +/*Z_R4_H,Z_R4*/ +/*Z_R5_H,Z_R5*/ +/*Z_R6_H,Z_R6*/ + Z_R7_H, Z_R7, +/*Z_R8_H,Z_R8*/ // Z_thread + Z_R9_H,Z_R9, + Z_R10_H,Z_R10, + Z_R11_H,Z_R11, + Z_R12_H,Z_R12, + Z_R13_H,Z_R13 +/*Z_R14_H,Z_R14*/ // return_pc +/*Z_R15_H,Z_R15*/ // SP +); + +// Special class for storeP instructions, which can store SP or RPC to +// TLS. (Note: Do not generalize this to "any_reg". If you add +// another register, such as FP, to this mask, the allocator may try +// to put a temp in it.) +// Register class for memory access base registers, +// This class is a superset of z_ptr_reg including Z_thread. +reg_class z_memory_ptr_reg( +/*Z_R0_H,Z_R0*/ // R0 +/*Z_R1_H,Z_R1*/ + Z_R2_H,Z_R2, + Z_R3_H,Z_R3, + Z_R4_H,Z_R4, + Z_R5_H,Z_R5, + Z_R6_H,Z_R6, + Z_R7_H,Z_R7, + Z_R8_H,Z_R8, // Z_thread + Z_R9_H,Z_R9, + Z_R10_H,Z_R10, + Z_R11_H,Z_R11, + Z_R12_H,Z_R12, + Z_R13_H,Z_R13 +/*Z_R14_H,Z_R14*/ // return_pc +/*Z_R15_H,Z_R15*/ // SP +); + +// Other special pointer regs. +reg_class z_r1_regP(Z_R1_H,Z_R1); +reg_class z_r9_regP(Z_R9_H,Z_R9); + + +// Long Register Classes + +reg_class z_rarg1_long_reg(Z_R2_H,Z_R2); +reg_class z_rarg2_long_reg(Z_R3_H,Z_R3); +reg_class z_rarg3_long_reg(Z_R4_H,Z_R4); +reg_class z_rarg4_long_reg(Z_R5_H,Z_R5); +reg_class z_rarg5_long_reg(Z_R6_H,Z_R6); + +// Longs in 1 register. Aligned adjacent hi/lo pairs. +reg_class z_long_reg( +/*Z_R0_H,Z_R0*/ // R0 +/*Z_R1_H,Z_R1*/ + Z_R2_H,Z_R2, + Z_R3_H,Z_R3, + Z_R4_H,Z_R4, + Z_R5_H,Z_R5, + Z_R6_H,Z_R6, + Z_R7_H,Z_R7, +/*Z_R8_H,Z_R8,*/ // Z_thread + Z_R9_H,Z_R9, + Z_R10_H,Z_R10, + Z_R11_H,Z_R11, + Z_R12_H,Z_R12, + Z_R13_H,Z_R13 +/*Z_R14_H,Z_R14,*/ // return_pc +/*Z_R15_H,Z_R15*/ // SP +); + + +// Special Class for Condition Code Flags Register + +reg_class z_condition_reg( + Z_CR +); + +// Scratch register for late profiling. Callee saved. +reg_class z_rscratch2_bits64_reg(Z_R2_H, Z_R2); + + +// Float Register Classes + +reg_class z_flt_reg( + Z_F0, +/*Z_F1,*/ // scratch + Z_F2, + Z_F3, + Z_F4, + Z_F5, + Z_F6, + Z_F7, + Z_F8, + Z_F9, + Z_F10, + Z_F11, + Z_F12, + Z_F13, + Z_F14, + Z_F15 +); +reg_class z_rscratch1_flt_reg(Z_F1); + +// Double precision float registers have virtual `high halves' that +// are needed by the allocator. +reg_class z_dbl_reg( + Z_F0,Z_F0_H, +/*Z_F1,Z_F1_H,*/ // scratch + Z_F2,Z_F2_H, + Z_F3,Z_F3_H, + Z_F4,Z_F4_H, + Z_F5,Z_F5_H, + Z_F6,Z_F6_H, + Z_F7,Z_F7_H, + Z_F8,Z_F8_H, + Z_F9,Z_F9_H, + Z_F10,Z_F10_H, + Z_F11,Z_F11_H, + Z_F12,Z_F12_H, + Z_F13,Z_F13_H, + Z_F14,Z_F14_H, + Z_F15,Z_F15_H +); +reg_class z_rscratch1_dbl_reg(Z_F1,Z_F1_H); + +%} + +//----------DEFINITION BLOCK--------------------------------------------------- +// Define 'name --> value' mappings to inform the ADLC of an integer valued name. +// Current support includes integer values in the range [0, 0x7FFFFFFF]. +// Format: +// int_def (, ); +// Generated Code in ad_.hpp +// #define () +// // value == +// Generated code in ad_.cpp adlc_verification() +// assert( == , "Expect () to equal "); +// +definitions %{ + // The default cost (of an ALU instruction). + int_def DEFAULT_COST ( 100, 100); + int_def DEFAULT_COST_LOW ( 80, 80); + int_def DEFAULT_COST_HIGH ( 120, 120); + int_def HUGE_COST (1000000, 1000000); + + // Put an advantage on REG_MEM vs. MEM+REG_REG operations. + int_def ALU_REG_COST ( 100, DEFAULT_COST); + int_def ALU_MEMORY_COST ( 150, 150); + + // Memory refs are twice as expensive as run-of-the-mill. + int_def MEMORY_REF_COST_HI ( 220, 2 * DEFAULT_COST+20); + int_def MEMORY_REF_COST ( 200, 2 * DEFAULT_COST); + int_def MEMORY_REF_COST_LO ( 180, 2 * DEFAULT_COST-20); + + // Branches are even more expensive. + int_def BRANCH_COST ( 300, DEFAULT_COST * 3); + int_def CALL_COST ( 300, DEFAULT_COST * 3); +%} + +source %{ + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) +#define BIND(label) __ bind(label) +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#define BIND(label) __ bind(label); BLOCK_COMMENT(#label ":") +#endif + +#define __ _masm. + +#define Z_DISP_SIZE Immediate::is_uimm12((long)opnd_array(1)->disp(ra_,this,2)) ? 4 : 6 +#define Z_DISP3_SIZE 6 + +// Tertiary op of a LoadP or StoreP encoding. +#define REGP_OP true + +// Given a register encoding, produce an Integer Register object. +static Register reg_to_register_object(int register_encoding); + +// **************************************************************************** + +// REQUIRED FUNCTIONALITY + +// !!!!! Special hack to get all type of calls to specify the byte offset +// from the start of the call to the point where the return address +// will point. + +int MachCallStaticJavaNode::ret_addr_offset() { + if (_method) { + return 8; + } else { + return MacroAssembler::call_far_patchable_ret_addr_offset(); + } +} + +int MachCallDynamicJavaNode::ret_addr_offset() { + // Consider size of receiver type profiling (C2 tiers). + int profile_receiver_type_size = 0; + + int vtable_index = this->_vtable_index; + if (vtable_index == -4) { + return 14 + profile_receiver_type_size; + } else { + assert(!UseInlineCaches, "expect vtable calls only if not using ICs"); + return 36 + profile_receiver_type_size; + } +} + +int MachCallRuntimeNode::ret_addr_offset() { + return 12 + MacroAssembler::call_far_patchable_ret_addr_offset(); +} + +// Compute padding required for nodes which need alignment +// +// The addresses of the call instructions needs to be 4-byte aligned to +// ensure that they don't span a cache line so that they are atomically patchable. +// The actual calls get emitted at different offsets within the node emitters. +// ins_alignment needs to be set to 2 which means that up to 1 nop may get inserted. + +int CallStaticJavaDirect_dynTOCNode::compute_padding(int current_offset) const { + return (0 - current_offset) & 2; +} + +int CallDynamicJavaDirect_dynTOCNode::compute_padding(int current_offset) const { + return (6 - current_offset) & 2; +} + +int CallRuntimeDirectNode::compute_padding(int current_offset) const { + return (12 - current_offset) & 2; +} + +int CallLeafDirectNode::compute_padding(int current_offset) const { + return (12 - current_offset) & 2; +} + +int CallLeafNoFPDirectNode::compute_padding(int current_offset) const { + return (12 - current_offset) & 2; +} + +// Indicate if the safepoint node needs the polling page as an input. +// Since z/Architecture does not have absolute addressing, it does. +bool SafePointNode::needs_polling_address_input() { + return true; +} + +void emit_nop(CodeBuffer &cbuf) { + MacroAssembler _masm(&cbuf); + __ z_nop(); +} + +// Emit an interrupt that is caught by the debugger (for debugging compiler). +void emit_break(CodeBuffer &cbuf) { + MacroAssembler _masm(&cbuf); + __ z_illtrap(); +} + +#if !defined(PRODUCT) +void MachBreakpointNode::format(PhaseRegAlloc *, outputStream *os) const { + os->print("TA"); +} +#endif + +void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + emit_break(cbuf); +} + +uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + +static inline void z_emit16(CodeBuffer &cbuf, long value) { + // 32bit instructions may become sign extended. + assert(value >= 0, "unintended sign extension (int->long)"); + assert(value < (1L << 16), "instruction too large"); + *((unsigned short*)(cbuf.insts_end())) = (unsigned short)value; + cbuf.set_insts_end(cbuf.insts_end() + sizeof(unsigned short)); +} + +static inline void z_emit32(CodeBuffer &cbuf, long value) { + // 32bit instructions may become sign extended. + assert(value < (1L << 32), "instruction too large"); + *((unsigned int*)(cbuf.insts_end())) = (unsigned int)value; + cbuf.set_insts_end(cbuf.insts_end() + sizeof(unsigned int)); +} + +static inline void z_emit48(CodeBuffer &cbuf, long value) { + // 32bit instructions may become sign extended. + assert(value >= 0, "unintended sign extension (int->long)"); + assert(value < (1L << 48), "instruction too large"); + value = value<<16; + memcpy(cbuf.insts_end(), (unsigned char*)&value, 6); + cbuf.set_insts_end(cbuf.insts_end() + 6); +} + +static inline unsigned int z_emit_inst(CodeBuffer &cbuf, long value) { + if (value < 0) { + // There obviously has been an unintended sign extension (int->long). Revert it. + value = (long)((unsigned long)((unsigned int)value)); + } + + if (value < (1L << 16)) { // 2-byte instruction + z_emit16(cbuf, value); + return 2; + } + + if (value < (1L << 32)) { // 4-byte instruction, might be unaligned store + z_emit32(cbuf, value); + return 4; + } + + // 6-byte instruction, probably unaligned store. + z_emit48(cbuf, value); + return 6; +} + +// Check effective address (at runtime) for required alignment. +static inline void z_assert_aligned(CodeBuffer &cbuf, int disp, Register index, Register base, int alignment) { + MacroAssembler _masm(&cbuf); + + __ z_lay(Z_R0, disp, index, base); + __ z_nill(Z_R0, alignment-1); + __ z_brc(Assembler::bcondEqual, +3); + __ z_illtrap(); +} + +int emit_call_reloc(MacroAssembler &_masm, intptr_t entry_point, relocInfo::relocType rtype, + PhaseRegAlloc* ra_, bool is_native_call = false) { + __ set_inst_mark(); // Used in z_enc_java_static_call() and emit_java_to_interp(). + address old_mark = __ inst_mark(); + unsigned int start_off = __ offset(); + + if (is_native_call) { + ShouldNotReachHere(); + } + + if (rtype == relocInfo::runtime_call_w_cp_type) { + assert((__ offset() & 2) == 0, "misaligned emit_call_reloc"); + address call_addr = __ call_c_opt((address)entry_point); + if (call_addr == NULL) { + Compile::current()->env()->record_out_of_memory_failure(); + return -1; + } + } else { + assert(rtype == relocInfo::none || rtype == relocInfo::opt_virtual_call_type || + rtype == relocInfo::static_call_type, "unexpected rtype"); + __ relocate(rtype); + // BRASL must be prepended with a nop to identify it in the instruction stream. + __ z_nop(); + __ z_brasl(Z_R14, (address)entry_point); + } + + unsigned int ret_off = __ offset(); + + return (ret_off - start_off); +} + +static int emit_call_reloc(MacroAssembler &_masm, intptr_t entry_point, RelocationHolder const& rspec) { + __ set_inst_mark(); // Used in z_enc_java_static_call() and emit_java_to_interp(). + address old_mark = __ inst_mark(); + unsigned int start_off = __ offset(); + + relocInfo::relocType rtype = rspec.type(); + assert(rtype == relocInfo::opt_virtual_call_type || rtype == relocInfo::static_call_type, + "unexpected rtype"); + + __ relocate(rspec); + __ z_nop(); + __ z_brasl(Z_R14, (address)entry_point); + + unsigned int ret_off = __ offset(); + + return (ret_off - start_off); +} + +//============================================================================= + +const RegMask& MachConstantBaseNode::_out_RegMask = _Z_PTR_REG_mask; +int Compile::ConstantTable::calculate_table_base_offset() const { + return 0; // absolute addressing, no offset +} + +bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } +void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { + ShouldNotReachHere(); +} + +// Even with PC-relative TOC addressing, we still need this node. +// Float loads/stores do not support PC-relative addresses. +void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { + MacroAssembler _masm(&cbuf); + Register Rtoc = as_Register(ra_->get_encode(this)); + __ load_toc(Rtoc); +} + +uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { + // PCrelative TOC access. + return 6; // sizeof(LARL) +} + +#if !defined(PRODUCT) +void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { + Register r = as_Register(ra_->get_encode(this)); + st->print("LARL %s,&constant_pool # MachConstantBaseNode", r->name()); +} +#endif + +//============================================================================= + +#if !defined(PRODUCT) +void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { + Compile* C = ra_->C; + st->print_cr("--- MachPrologNode ---"); + st->print("\t"); + for (int i = 0; i < OptoPrologueNops; i++) { + st->print_cr("NOP"); st->print("\t"); + } + + if (VerifyThread) { + st->print_cr("Verify_Thread"); + st->print("\t"); + } + + long framesize = C->frame_size_in_bytes(); + int bangsize = C->bang_size_in_bytes(); + + // Calls to C2R adapters often do not accept exceptional returns. + // We require that their callers must bang for them. But be + // careful, because some VM calls (such as call site linkage) can + // use several kilobytes of stack. But the stack safety zone should + // account for that. See bugs 4446381, 4468289, 4497237. + if (C->need_stack_bang(bangsize) && UseStackBanging) { + st->print_cr("# stack bang"); st->print("\t"); + } + st->print_cr("push_frame %d", (int)-framesize); + st->print("\t"); +} +#endif + +void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + Compile* C = ra_->C; + MacroAssembler _masm(&cbuf); + + __ verify_thread(); + + size_t framesize = C->frame_size_in_bytes(); + size_t bangsize = C->bang_size_in_bytes(); + + assert(framesize % wordSize == 0, "must preserve wordSize alignment"); + + // Calls to C2R adapters often do not accept exceptional returns. + // We require that their callers must bang for them. But be + // careful, because some VM calls (such as call site linkage) can + // use several kilobytes of stack. But the stack safety zone should + // account for that. See bugs 4446381, 4468289, 4497237. + if (C->need_stack_bang(bangsize) && UseStackBanging) { + __ generate_stack_overflow_check(bangsize); + } + + assert(Immediate::is_uimm32((long)framesize), "to do: choose suitable types!"); + __ save_return_pc(); + + // The z/Architecture abi is already accounted for in `framesize' via the + // 'out_preserve_stack_slots' declaration. + __ push_frame((unsigned int)framesize/*includes JIT ABI*/); + + if (C->has_mach_constant_base_node()) { + // NOTE: We set the table base offset here because users might be + // emitted before MachConstantBaseNode. + Compile::ConstantTable& constant_table = C->constant_table(); + constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); + } +} + +uint MachPrologNode::size(PhaseRegAlloc *ra_) const { + // Variable size. Determine dynamically. + return MachNode::size(ra_); +} + +int MachPrologNode::reloc() const { + // Return number of relocatable values contained in this instruction. + return 1; // One reloc entry for load_const(toc). +} + +//============================================================================= + +#if !defined(PRODUCT) +void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *os) const { + os->print_cr("epilog"); + os->print("\t"); + if (do_polling() && ra_->C->is_method_compilation()) { + os->print_cr("load_from_polling_page Z_R1_scratch"); + os->print("\t"); + } +} +#endif + +void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + Compile* C = ra_->C; + __ verify_thread(); + + // If this does safepoint polling, then do it here. + bool need_polling = do_polling() && C->is_method_compilation(); + + // Touch the polling page. + // Part 1: get the page's address. + if (need_polling) { + AddressLiteral pp(os::get_polling_page()); + __ load_const_optimized(Z_R1_scratch, pp); + } + + // Pop frame, restore return_pc, and all stuff needed by interpreter. + // Pop frame by add insted of load (a penny saved is a penny got :-). + int frame_size_in_bytes = Assembler::align((C->frame_slots() << LogBytesPerInt), frame::alignment_in_bytes); + int retPC_offset = frame_size_in_bytes + _z_abi16(return_pc); + if (Displacement::is_validDisp(retPC_offset)) { + __ z_lg(Z_R14, retPC_offset, Z_SP); + __ add2reg(Z_SP, frame_size_in_bytes); + } else { + __ add2reg(Z_SP, frame_size_in_bytes); + __ restore_return_pc(); + } + + // Touch the polling page, + // part 2: touch the page now. + if (need_polling) { + // We need to mark the code position where the load from the safepoint + // polling page was emitted as relocInfo::poll_return_type here. + __ relocate(relocInfo::poll_return_type); + __ load_from_polling_page(Z_R1_scratch); + } +} + +uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { + // variable size. determine dynamically. + return MachNode::size(ra_); +} + +int MachEpilogNode::reloc() const { + // Return number of relocatable values contained in this instruction. + return 1; // One for load_from_polling_page. +} + +const Pipeline * MachEpilogNode::pipeline() const { + return MachNode::pipeline_class(); +} + +int MachEpilogNode::safepoint_offset() const { + assert(do_polling(), "no return for this epilog node"); + return 0; +} + +//============================================================================= + +// Figure out which register class each belongs in: rc_int, rc_float, rc_stack. +enum RC { rc_bad, rc_int, rc_float, rc_stack }; + +static enum RC rc_class(OptoReg::Name reg) { + // Return the register class for the given register. The given register + // reg is a _num value, which is an index into the MachRegisterNumbers + // enumeration in adGlobals_s390.hpp. + + if (reg == OptoReg::Bad) { + return rc_bad; + } + + // We have 32 integer register halves, starting at index 0. + if (reg < 32) { + return rc_int; + } + + // We have 32 floating-point register halves, starting at index 32. + if (reg < 32+32) { + return rc_float; + } + + // Between float regs & stack are the flags regs. + assert(reg >= OptoReg::stack0(), "blow up if spilling flags"); + return rc_stack; +} + +// Returns size as obtained from z_emit_instr. +static unsigned int z_ld_st_helper(CodeBuffer *cbuf, const char *op_str, unsigned long opcode, + int reg, int offset, bool do_print, outputStream *os) { + + if (cbuf) { + if (opcode > (1L<<32)) { + return z_emit_inst(*cbuf, opcode | Assembler::reg(Matcher::_regEncode[reg], 8, 48) | + Assembler::simm20(offset) | Assembler::reg(Z_R0, 12, 48) | Assembler::regz(Z_SP, 16, 48)); + } else { + return z_emit_inst(*cbuf, opcode | Assembler::reg(Matcher::_regEncode[reg], 8, 32) | + Assembler::uimm12(offset, 20, 32) | Assembler::reg(Z_R0, 12, 32) | Assembler::regz(Z_SP, 16, 32)); + } + } + +#if !defined(PRODUCT) + if (do_print) { + os->print("%s %s,#%d[,SP]\t # MachCopy spill code",op_str, Matcher::regName[reg], offset); + } +#endif + return (opcode > (1L << 32)) ? 6 : 4; +} + +static unsigned int z_mvc_helper(CodeBuffer *cbuf, int len, int dst_off, int src_off, bool do_print, outputStream *os) { + if (cbuf) { + MacroAssembler _masm(cbuf); + __ z_mvc(dst_off, len-1, Z_SP, src_off, Z_SP); + } + +#if !defined(PRODUCT) + else if (do_print) { + os->print("MVC %d(%d,SP),%d(SP)\t # MachCopy spill code",dst_off, len, src_off); + } +#endif + + return 6; +} + +uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *os) const { + // Get registers to move. + OptoReg::Name src_hi = ra_->get_reg_second(in(1)); + OptoReg::Name src_lo = ra_->get_reg_first(in(1)); + OptoReg::Name dst_hi = ra_->get_reg_second(this); + OptoReg::Name dst_lo = ra_->get_reg_first(this); + + enum RC src_hi_rc = rc_class(src_hi); + enum RC src_lo_rc = rc_class(src_lo); + enum RC dst_hi_rc = rc_class(dst_hi); + enum RC dst_lo_rc = rc_class(dst_lo); + + assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register"); + bool is64 = (src_hi_rc != rc_bad); + assert(!is64 || + ((src_lo&1) == 0 && src_lo+1 == src_hi && (dst_lo&1) == 0 && dst_lo+1 == dst_hi), + "expected aligned-adjacent pairs"); + + // Generate spill code! + + if (src_lo == dst_lo && src_hi == dst_hi) { + return 0; // Self copy, no move. + } + + int src_offset = ra_->reg2offset(src_lo); + int dst_offset = ra_->reg2offset(dst_lo); + bool print = !do_size; + bool src12 = Immediate::is_uimm12(src_offset); + bool dst12 = Immediate::is_uimm12(dst_offset); + + const char *mnemo = NULL; + unsigned long opc = 0; + + // Memory->Memory Spill. Use Z_R0 to hold the value. + if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { + + assert(!is64 || (src_hi_rc==rc_stack && dst_hi_rc==rc_stack), + "expected same type of move for high parts"); + + if (src12 && dst12) { + return z_mvc_helper(cbuf, is64 ? 8 : 4, dst_offset, src_offset, print, os); + } + + int r0 = Z_R0_num; + if (is64) { + return z_ld_st_helper(cbuf, "LG ", LG_ZOPC, r0, src_offset, print, os) + + z_ld_st_helper(cbuf, "STG ", STG_ZOPC, r0, dst_offset, print, os); + } + + return z_ld_st_helper(cbuf, "LY ", LY_ZOPC, r0, src_offset, print, os) + + z_ld_st_helper(cbuf, "STY ", STY_ZOPC, r0, dst_offset, print, os); + } + + // Check for float->int copy. Requires a trip through memory. + if (src_lo_rc == rc_float && dst_lo_rc == rc_int) { + Unimplemented(); // Unsafe, do not remove! + } + + // Check for integer reg-reg copy. + if (src_lo_rc == rc_int && dst_lo_rc == rc_int) { + if (cbuf) { + MacroAssembler _masm(cbuf); + Register Rsrc = as_Register(Matcher::_regEncode[src_lo]); + Register Rdst = as_Register(Matcher::_regEncode[dst_lo]); + __ z_lgr(Rdst, Rsrc); + return 4; + } +#if !defined(PRODUCT) + // else + if (print) { + os->print("LGR %s,%s\t # MachCopy spill code", Matcher::regName[dst_lo], Matcher::regName[src_lo]); + } +#endif + return 4; + } + + // Check for integer store. + if (src_lo_rc == rc_int && dst_lo_rc == rc_stack) { + assert(!is64 || (src_hi_rc==rc_int && dst_hi_rc==rc_stack), + "expected same type of move for high parts"); + + if (is64) { + return z_ld_st_helper(cbuf, "STG ", STG_ZOPC, src_lo, dst_offset, print, os); + } + + // else + mnemo = dst12 ? "ST " : "STY "; + opc = dst12 ? ST_ZOPC : STY_ZOPC; + + return z_ld_st_helper(cbuf, mnemo, opc, src_lo, dst_offset, print, os); + } + + // Check for integer load + // Always load cOops zero-extended. That doesn't hurt int loads. + if (dst_lo_rc == rc_int && src_lo_rc == rc_stack) { + + assert(!is64 || (dst_hi_rc==rc_int && src_hi_rc==rc_stack), + "expected same type of move for high parts"); + + mnemo = is64 ? "LG " : "LLGF"; + opc = is64 ? LG_ZOPC : LLGF_ZOPC; + + return z_ld_st_helper(cbuf, mnemo, opc, dst_lo, src_offset, print, os); + } + + // Check for float reg-reg copy. + if (src_lo_rc == rc_float && dst_lo_rc == rc_float) { + if (cbuf) { + MacroAssembler _masm(cbuf); + FloatRegister Rsrc = as_FloatRegister(Matcher::_regEncode[src_lo]); + FloatRegister Rdst = as_FloatRegister(Matcher::_regEncode[dst_lo]); + __ z_ldr(Rdst, Rsrc); + return 2; + } +#if !defined(PRODUCT) + // else + if (print) { + os->print("LDR %s,%s\t # MachCopy spill code", Matcher::regName[dst_lo], Matcher::regName[src_lo]); + } +#endif + return 2; + } + + // Check for float store. + if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { + assert(!is64 || (src_hi_rc==rc_float && dst_hi_rc==rc_stack), + "expected same type of move for high parts"); + + if (is64) { + mnemo = dst12 ? "STD " : "STDY "; + opc = dst12 ? STD_ZOPC : STDY_ZOPC; + return z_ld_st_helper(cbuf, mnemo, opc, src_lo, dst_offset, print, os); + } + // else + + mnemo = dst12 ? "STE " : "STEY "; + opc = dst12 ? STE_ZOPC : STEY_ZOPC; + return z_ld_st_helper(cbuf, mnemo, opc, src_lo, dst_offset, print, os); + } + + // Check for float load. + if (dst_lo_rc == rc_float && src_lo_rc == rc_stack) { + assert(!is64 || (dst_hi_rc==rc_float && src_hi_rc==rc_stack), + "expected same type of move for high parts"); + + if (is64) { + mnemo = src12 ? "LD " : "LDY "; + opc = src12 ? LD_ZOPC : LDY_ZOPC; + return z_ld_st_helper(cbuf, mnemo, opc, dst_lo, src_offset, print, os); + } + // else + + mnemo = src12 ? "LE " : "LEY "; + opc = src12 ? LE_ZOPC : LEY_ZOPC; + return z_ld_st_helper(cbuf, mnemo, opc, dst_lo, src_offset, print, os); + } + + // -------------------------------------------------------------------- + // Check for hi bits still needing moving. Only happens for misaligned + // arguments to native calls. + if (src_hi == dst_hi) { + return 0; // Self copy, no move. + } + + assert(is64 && dst_hi_rc != rc_bad, "src_hi & dst_hi cannot be Bad"); + Unimplemented(); // Unsafe, do not remove! + + return 0; // never reached, but make the compiler shut up! +} + +#if !defined(PRODUCT) +void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *os) const { + if (ra_ && ra_->node_regs_max_index() > 0) { + implementation(NULL, ra_, false, os); + } else { + if (req() == 2 && in(1)) { + os->print("N%d = N%d\n", _idx, in(1)->_idx); + } else { + const char *c = "("; + os->print("N%d = ", _idx); + for (uint i = 1; i < req(); ++i) { + os->print("%sN%d", c, in(i)->_idx); + c = ", "; + } + os->print(")"); + } + } +} +#endif + +void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + implementation(&cbuf, ra_, false, NULL); +} + +uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { + return implementation(NULL, ra_, true, NULL); +} + +//============================================================================= + +#if !defined(PRODUCT) +void MachNopNode::format(PhaseRegAlloc *, outputStream *os) const { + os->print("NOP # pad for alignment (%d nops, %d bytes)", _count, _count*MacroAssembler::nop_size()); +} +#endif + +void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ra_) const { + MacroAssembler _masm(&cbuf); + + int rem_space = 0; + if (!(ra_->C->in_scratch_emit_size())) { + rem_space = cbuf.insts()->remaining(); + if (rem_space <= _count*2 + 8) { + tty->print("NopNode: _count = %3.3d, remaining space before = %d", _count, rem_space); + } + } + + for (int i = 0; i < _count; i++) { + __ z_nop(); + } + + if (!(ra_->C->in_scratch_emit_size())) { + if (rem_space <= _count*2 + 8) { + int rem_space2 = cbuf.insts()->remaining(); + tty->print_cr(", after = %d", rem_space2); + } + } +} + +uint MachNopNode::size(PhaseRegAlloc *ra_) const { + return 2 * _count; +} + +#if !defined(PRODUCT) +void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *os) const { + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + if (ra_ && ra_->node_regs_max_index() > 0) { + int reg = ra_->get_reg_first(this); + os->print("ADDHI %s, SP, %d\t//box node", Matcher::regName[reg], offset); + } else { + os->print("ADDHI N%d = SP + %d\t// box node", _idx, offset); + } +} +#endif + +// Take care of the size function, if you make changes here! +void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_encode(this); + __ z_lay(as_Register(reg), offset, Z_SP); +} + +uint BoxLockNode::size(PhaseRegAlloc *ra_) const { + // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_) + return 6; +} + + %} // end source section + +//----------SOURCE BLOCK------------------------------------------------------- +// This is a block of C++ code which provides values, functions, and +// definitions necessary in the rest of the architecture description + +source_hpp %{ + +// Header information of the source block. +// Method declarations/definitions which are used outside +// the ad-scope can conveniently be defined here. +// +// To keep related declarations/definitions/uses close together, +// we switch between source %{ }% and source_hpp %{ }% freely as needed. + +//-------------------------------------------------------------- +// Used for optimization in Compile::Shorten_branches +//-------------------------------------------------------------- + +class CallStubImpl { + public: + + // call trampolines + // Size of call trampoline stub. For add'l comments, see size_java_to_interp(). + static uint size_call_trampoline() { + return 0; // no call trampolines on this platform + } + + // call trampolines + // Number of relocations needed by a call trampoline stub. + static uint reloc_call_trampoline() { + return 0; // No call trampolines on this platform. + } +}; + +%} // end source_hpp section + +source %{ + +#if !defined(PRODUCT) +void MachUEPNode::format(PhaseRegAlloc *ra_, outputStream *os) const { + os->print_cr("---- MachUEPNode ----"); + os->print_cr("\tTA"); + os->print_cr("\tload_const Z_R1, SharedRuntime::get_ic_miss_stub()"); + os->print_cr("\tBR(Z_R1)"); + os->print_cr("\tTA # pad with illtraps"); + os->print_cr("\t..."); + os->print_cr("\tTA"); + os->print_cr("\tLTGR Z_R2, Z_R2"); + os->print_cr("\tBRU ic_miss"); +} +#endif + +void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + const int ic_miss_offset = 2; + + // Inline_cache contains a klass. + Register ic_klass = as_Register(Matcher::inline_cache_reg_encode()); + // ARG1 is the receiver oop. + Register R2_receiver = Z_ARG1; + int klass_offset = oopDesc::klass_offset_in_bytes(); + AddressLiteral icmiss(SharedRuntime::get_ic_miss_stub()); + Register R1_ic_miss_stub_addr = Z_R1_scratch; + + // Null check of receiver. + // This is the null check of the receiver that actually should be + // done in the caller. It's here because in case of implicit null + // checks we get it for free. + assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), + "second word in oop should not require explicit null check."); + if (!ImplicitNullChecks) { + Label valid; + if (VM_Version::has_CompareBranch()) { + __ z_cgij(R2_receiver, 0, Assembler::bcondNotEqual, valid); + } else { + __ z_ltgr(R2_receiver, R2_receiver); + __ z_bre(valid); + } + // The ic_miss_stub will handle the null pointer exception. + __ load_const_optimized(R1_ic_miss_stub_addr, icmiss); + __ z_br(R1_ic_miss_stub_addr); + __ bind(valid); + } + + // Check whether this method is the proper implementation for the class of + // the receiver (ic miss check). + { + Label valid; + // Compare cached class against klass from receiver. + // This also does an implicit null check! + __ compare_klass_ptr(ic_klass, klass_offset, R2_receiver, false); + __ z_bre(valid); + // The inline cache points to the wrong method. Call the + // ic_miss_stub to find the proper method. + __ load_const_optimized(R1_ic_miss_stub_addr, icmiss); + __ z_br(R1_ic_miss_stub_addr); + __ bind(valid); + } + +} + +uint MachUEPNode::size(PhaseRegAlloc *ra_) const { + // Determine size dynamically. + return MachNode::size(ra_); +} + +//============================================================================= + +%} // interrupt source section + +source_hpp %{ // Header information of the source block. + +class HandlerImpl { + public: + + static int emit_exception_handler(CodeBuffer &cbuf); + static int emit_deopt_handler(CodeBuffer& cbuf); + + static uint size_exception_handler() { + return NativeJump::max_instruction_size(); + } + + static uint size_deopt_handler() { + return NativeCall::max_instruction_size(); + } +}; + +%} // end source_hpp section + +source %{ + +// This exception handler code snippet is placed after the method's +// code. It is the return point if an exception occurred. it jumps to +// the exception blob. +// +// If the method gets deoptimized, the method and this code snippet +// get patched. +// +// 1) Trampoline code gets patched into the end of this exception +// handler. the trampoline code jumps to the deoptimization blob. +// +// 2) The return address in the method's code will get patched such +// that it jumps to the trampoline. +// +// 3) The handler will get patched such that it does not jump to the +// exception blob, but to an entry in the deoptimization blob being +// aware of the exception. +int HandlerImpl::emit_exception_handler(CodeBuffer &cbuf) { + Register temp_reg = Z_R1; + MacroAssembler _masm(&cbuf); + + address base = __ start_a_stub(size_exception_handler()); + if (base == NULL) { + return 0; // CodeBuffer::expand failed + } + + int offset = __ offset(); + // Use unconditional pc-relative jump with 32-bit range here. + __ load_const_optimized(temp_reg, (address)OptoRuntime::exception_blob()->content_begin()); + __ z_br(temp_reg); + + assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); + + __ end_a_stub(); + + return offset; +} + +// Emit deopt handler code. +int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { + MacroAssembler _masm(&cbuf); + address base = __ start_a_stub(size_deopt_handler()); + + if (base == NULL) { + return 0; // CodeBuffer::expand failed + } + + int offset = __ offset(); + + // Size_deopt_handler() must be exact on zarch, so for simplicity + // we do not use load_const_opt here. + __ load_const(Z_R1, SharedRuntime::deopt_blob()->unpack()); + __ call(Z_R1); + assert(__ offset() - offset == (int) size_deopt_handler(), "must be fixed size"); + + __ end_a_stub(); + return offset; +} + +//============================================================================= + + +// Given a register encoding, produce an Integer Register object. +static Register reg_to_register_object(int register_encoding) { + assert(Z_R12->encoding() == Z_R12_enc, "wrong coding"); + return as_Register(register_encoding); +} + +const bool Matcher::match_rule_supported(int opcode) { + if (!has_match_rule(opcode)) return false; + + switch (opcode) { + case Op_CountLeadingZerosI: + case Op_CountLeadingZerosL: + case Op_CountTrailingZerosI: + case Op_CountTrailingZerosL: + // Implementation requires FLOGR instruction. + return UseCountLeadingZerosInstruction; + + case Op_ReverseBytesI: + case Op_ReverseBytesL: + return UseByteReverseInstruction; + + // PopCount supported by H/W from z/Architecture G5 (z196) on. + case Op_PopCountI: + case Op_PopCountL: + return UsePopCountInstruction && VM_Version::has_PopCount(); + + case Op_StrComp: + return SpecialStringCompareTo; + case Op_StrEquals: + return SpecialStringEquals; + case Op_StrIndexOf: + case Op_StrIndexOfChar: + return SpecialStringIndexOf; + + case Op_GetAndAddI: + case Op_GetAndAddL: + return true; + // return VM_Version::has_AtomicMemWithImmALUOps(); + case Op_GetAndSetI: + case Op_GetAndSetL: + case Op_GetAndSetP: + case Op_GetAndSetN: + return true; // General CAS implementation, always available. + + default: + return true; // Per default match rules are supported. + // BUT: make sure match rule is not disabled by a false predicate! + } + + return true; // Per default match rules are supported. + // BUT: make sure match rule is not disabled by a false predicate! +} + +const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { + // TODO + // Identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen. + bool ret_value = match_rule_supported(opcode); + // Add rules here. + + return ret_value; // Per default match rules are supported. +} + +int Matcher::regnum_to_fpu_offset(int regnum) { + ShouldNotReachHere(); + return regnum - 32; // The FP registers are in the second chunk. +} + +const bool Matcher::has_predicated_vectors(void) { + return false; +} + +const int Matcher::float_pressure(int default_pressure_threshold) { + return default_pressure_threshold; +} + +const bool Matcher::convL2FSupported(void) { + return true; // False means that conversion is done by runtime call. +} + +//----------SUPERWORD HELPERS---------------------------------------- + +// Vector width in bytes. +const int Matcher::vector_width_in_bytes(BasicType bt) { + assert(MaxVectorSize == 8, ""); + return 8; +} + +// Vector ideal reg. +const int Matcher::vector_ideal_reg(int size) { + assert(MaxVectorSize == 8 && size == 8, ""); + return Op_RegL; +} + +// Limits on vector size (number of elements) loaded into vector. +const int Matcher::max_vector_size(const BasicType bt) { + assert(is_java_primitive(bt), "only primitive type vectors"); + return vector_width_in_bytes(bt)/type2aelembytes(bt); +} + +const int Matcher::min_vector_size(const BasicType bt) { + return max_vector_size(bt); // Same as max. +} + +const int Matcher::vector_shift_count_ideal_reg(int size) { + fatal("vector shift is not supported"); + return Node::NotAMachineReg; +} + +// z/Architecture does support misaligned store/load at minimal extra cost. +const bool Matcher::misaligned_vectors_ok() { + return true; +} + +// Not yet ported to z/Architecture. +const bool Matcher::pass_original_key_for_aes() { + return false; +} + +// RETURNS: whether this branch offset is short enough that a short +// branch can be used. +// +// If the platform does not provide any short branch variants, then +// this method should return `false' for offset 0. +// +// `Compile::Fill_buffer' will decide on basis of this information +// whether to do the pass `Compile::Shorten_branches' at all. +// +// And `Compile::Shorten_branches' will decide on basis of this +// information whether to replace particular branch sites by short +// ones. +bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { + // On zarch short branches use a 16 bit signed immediate that + // is the pc-relative offset in halfword (= 2 bytes) units. + return Assembler::is_within_range_of_RelAddr16((address)((long)offset), (address)0); +} + +const bool Matcher::isSimpleConstant64(jlong value) { + // Probably always true, even if a temp register is required. + return true; +} + +// Should correspond to setting above +const bool Matcher::init_array_count_is_in_bytes = false; + +// Suppress CMOVL. Conditional move available on z/Architecture only from z196 onwards. Not exploited yet. +const int Matcher::long_cmove_cost() { return ConditionalMoveLimit; } + +// Suppress CMOVF. Conditional move available on z/Architecture only from z196 onwards. Not exploited yet. +const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; } + +// Does the CPU require postalloc expand (see block.cpp for description of postalloc expand)? +const bool Matcher::require_postalloc_expand = false; + +// Do we need to mask the count passed to shift instructions or does +// the cpu only look at the lower 5/6 bits anyway? +// 32bit shifts mask in emitter, 64bit shifts need no mask. +// Constant shift counts are handled in Ideal phase. +const bool Matcher::need_masked_shift_count = false; + +// Set this as clone_shift_expressions. +bool Matcher::narrow_oop_use_complex_address() { + if (Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0) return true; + return false; +} + +bool Matcher::narrow_klass_use_complex_address() { + NOT_LP64(ShouldNotCallThis()); + assert(UseCompressedClassPointers, "only for compressed klass code"); + // TODO HS25: z port if (MatchDecodeNodes) return true; + return false; +} + +bool Matcher::const_oop_prefer_decode() { + // Prefer ConN+DecodeN over ConP in simple compressed oops mode. + return Universe::narrow_oop_base() == NULL; +} + +bool Matcher::const_klass_prefer_decode() { + // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. + return Universe::narrow_klass_base() == NULL; +} + +// Is it better to copy float constants, or load them directly from memory? +// Most RISCs will have to materialize an address into a +// register first, so they would do better to copy the constant from stack. +const bool Matcher::rematerialize_float_constants = false; + +// If CPU can load and store mis-aligned doubles directly then no fixup is +// needed. Else we split the double into 2 integer pieces and move it +// piece-by-piece. Only happens when passing doubles into C code as the +// Java calling convention forces doubles to be aligned. +const bool Matcher::misaligned_doubles_ok = true; + +// Advertise here if the CPU requires explicit rounding operations +// to implement the UseStrictFP mode. +const bool Matcher::strict_fp_requires_explicit_rounding = false; + +// Do floats take an entire double register or just half? +// +// A float in resides in a zarch double register. When storing it by +// z_std, it cannot be restored in C-code by reloading it as a double +// and casting it into a float afterwards. +bool Matcher::float_in_double() { return false; } + +// Do ints take an entire long register or just half? +// The relevant question is how the int is callee-saved: +// the whole long is written but de-opt'ing will have to extract +// the relevant 32 bits. +const bool Matcher::int_in_long = true; + +// Constants for c2c and c calling conventions. + +const MachRegisterNumbers z_iarg_reg[5] = { + Z_R2_num, Z_R3_num, Z_R4_num, Z_R5_num, Z_R6_num +}; + +const MachRegisterNumbers z_farg_reg[4] = { + Z_F0_num, Z_F2_num, Z_F4_num, Z_F6_num +}; + +const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]); + +const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]); + +// Return whether or not this register is ever used as an argument. This +// function is used on startup to build the trampoline stubs in generateOptoStub. +// Registers not mentioned will be killed by the VM call in the trampoline, and +// arguments in those registers not be available to the callee. +bool Matcher::can_be_java_arg(int reg) { + // We return true for all registers contained in z_iarg_reg[] and + // z_farg_reg[] and their virtual halves. + // We must include the virtual halves in order to get STDs and LDs + // instead of STWs and LWs in the trampoline stubs. + + if (reg == Z_R2_num || reg == Z_R2_H_num || + reg == Z_R3_num || reg == Z_R3_H_num || + reg == Z_R4_num || reg == Z_R4_H_num || + reg == Z_R5_num || reg == Z_R5_H_num || + reg == Z_R6_num || reg == Z_R6_H_num) { + return true; + } + + if (reg == Z_F0_num || reg == Z_F0_H_num || + reg == Z_F2_num || reg == Z_F2_H_num || + reg == Z_F4_num || reg == Z_F4_H_num || + reg == Z_F6_num || reg == Z_F6_H_num) { + return true; + } + + return false; +} + +bool Matcher::is_spillable_arg(int reg) { + return can_be_java_arg(reg); +} + +bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) { + return false; +} + +// Register for DIVI projection of divmodI +RegMask Matcher::divI_proj_mask() { + return _Z_RARG4_INT_REG_mask; +} + +// Register for MODI projection of divmodI +RegMask Matcher::modI_proj_mask() { + return _Z_RARG3_INT_REG_mask; +} + +// Register for DIVL projection of divmodL +RegMask Matcher::divL_proj_mask() { + return _Z_RARG4_LONG_REG_mask; +} + +// Register for MODL projection of divmodL +RegMask Matcher::modL_proj_mask() { + return _Z_RARG3_LONG_REG_mask; +} + +// Copied from sparc. +const RegMask Matcher::method_handle_invoke_SP_save_mask() { + return RegMask(); +} + +const bool Matcher::convi2l_type_required = true; + +// Should the Matcher clone shifts on addressing modes, expecting them +// to be subsumed into complex addressing expressions or compute them +// into registers? +bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { + return clone_base_plus_offset_address(m, mstack, address_visited); +} + +void Compile::reshape_address(AddPNode* addp) { +} + +%} // source + +//----------ENCODING BLOCK----------------------------------------------------- +// This block specifies the encoding classes used by the compiler to output +// byte streams. Encoding classes are parameterized macros used by +// Machine Instruction Nodes in order to generate the bit encoding of the +// instruction. Operands specify their base encoding interface with the +// interface keyword. There are currently supported four interfaces, +// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an +// operand to generate a function which returns its register number when +// queried. CONST_INTER causes an operand to generate a function which +// returns the value of the constant when queried. MEMORY_INTER causes an +// operand to generate four functions which return the Base Register, the +// Index Register, the Scale Value, and the Offset Value of the operand when +// queried. COND_INTER causes an operand to generate six functions which +// return the encoding code (ie - encoding bits for the instruction) +// associated with each basic boolean condition for a conditional instruction. +// +// Instructions specify two basic values for encoding. Again, a function +// is available to check if the constant displacement is an oop. They use the +// ins_encode keyword to specify their encoding classes (which must be +// a sequence of enc_class names, and their parameters, specified in +// the encoding block), and they use the +// opcode keyword to specify, in order, their primary, secondary, and +// tertiary opcode. Only the opcode sections which a particular instruction +// needs for encoding need to be specified. +encode %{ + enc_class enc_unimplemented %{ + MacroAssembler _masm(&cbuf); + __ unimplemented("Unimplemented mach node encoding in AD file.", 13); + %} + + enc_class enc_untested %{ +#ifdef ASSERT + MacroAssembler _masm(&cbuf); + __ untested("Untested mach node encoding in AD file."); +#endif + %} + + enc_class z_rrform(iRegI dst, iRegI src) %{ + assert((($primary >> 14) & 0x03) == 0, "Instruction format error"); + assert( ($primary >> 16) == 0, "Instruction format error"); + z_emit16(cbuf, $primary | + Assembler::reg($dst$$reg,8,16) | + Assembler::reg($src$$reg,12,16)); + %} + + enc_class z_rreform(iRegI dst1, iRegI src2) %{ + assert((($primary >> 30) & 0x03) == 2, "Instruction format error"); + z_emit32(cbuf, $primary | + Assembler::reg($dst1$$reg,24,32) | + Assembler::reg($src2$$reg,28,32)); + %} + + enc_class z_rrfform(iRegI dst1, iRegI src2, iRegI src3) %{ + assert((($primary >> 30) & 0x03) == 2, "Instruction format error"); + z_emit32(cbuf, $primary | + Assembler::reg($dst1$$reg,24,32) | + Assembler::reg($src2$$reg,28,32) | + Assembler::reg($src3$$reg,16,32)); + %} + + enc_class z_riform_signed(iRegI dst, immI16 src) %{ + assert((($primary>>30) & 0x03) == 2, "Instruction format error"); + z_emit32(cbuf, $primary | + Assembler::reg($dst$$reg,8,32) | + Assembler::simm16($src$$constant,16,32)); + %} + + enc_class z_riform_unsigned(iRegI dst, uimmI16 src) %{ + assert((($primary>>30) & 0x03) == 2, "Instruction format error"); + z_emit32(cbuf, $primary | + Assembler::reg($dst$$reg,8,32) | + Assembler::uimm16($src$$constant,16,32)); + %} + + enc_class z_rieform_d(iRegI dst1, iRegI src3, immI src2) %{ + assert((($primary>>46) & 0x03) == 3, "Instruction format error"); + z_emit48(cbuf, $primary | + Assembler::reg($dst1$$reg,8,48) | + Assembler::reg($src3$$reg,12,48) | + Assembler::simm16($src2$$constant,16,48)); + %} + + enc_class z_rilform_signed(iRegI dst, immL32 src) %{ + assert((($primary>>46) & 0x03) == 3, "Instruction format error"); + z_emit48(cbuf, $primary | + Assembler::reg($dst$$reg,8,48) | + Assembler::simm32($src$$constant,16,48)); + %} + + enc_class z_rilform_unsigned(iRegI dst, uimmL32 src) %{ + assert((($primary>>46) & 0x03) == 3, "Instruction format error"); + z_emit48(cbuf, $primary | + Assembler::reg($dst$$reg,8,48) | + Assembler::uimm32($src$$constant,16,48)); + %} + + enc_class z_rsyform_const(iRegI dst, iRegI src1, immI src2) %{ + z_emit48(cbuf, $primary | + Assembler::reg($dst$$reg,8,48) | + Assembler::reg($src1$$reg,12,48) | + Assembler::simm20($src2$$constant)); + %} + + enc_class z_rsyform_reg_reg(iRegI dst, iRegI src, iRegI shft) %{ + z_emit48(cbuf, $primary | + Assembler::reg($dst$$reg,8,48) | + Assembler::reg($src$$reg,12,48) | + Assembler::reg($shft$$reg,16,48) | + Assembler::simm20(0)); + %} + + enc_class z_rxform_imm_reg_reg(iRegL dst, immL con, iRegL src1, iRegL src2) %{ + assert((($primary>>30) & 0x03) == 1, "Instruction format error"); + z_emit32(cbuf, $primary | + Assembler::reg($dst$$reg,8,32) | + Assembler::reg($src1$$reg,12,32) | + Assembler::reg($src2$$reg,16,32) | + Assembler::uimm12($con$$constant,20,32)); + %} + + enc_class z_rxform_imm_reg(iRegL dst, immL con, iRegL src) %{ + assert((($primary>>30) & 0x03) == 1, "Instruction format error"); + z_emit32(cbuf, $primary | + Assembler::reg($dst$$reg,8,32) | + Assembler::reg($src$$reg,16,32) | + Assembler::uimm12($con$$constant,20,32)); + %} + + enc_class z_rxyform_imm_reg_reg(iRegL dst, immL con, iRegL src1, iRegL src2) %{ + z_emit48(cbuf, $primary | + Assembler::reg($dst$$reg,8,48) | + Assembler::reg($src1$$reg,12,48) | + Assembler::reg($src2$$reg,16,48) | + Assembler::simm20($con$$constant)); + %} + + enc_class z_rxyform_imm_reg(iRegL dst, immL con, iRegL src) %{ + z_emit48(cbuf, $primary | + Assembler::reg($dst$$reg,8,48) | + Assembler::reg($src$$reg,16,48) | + Assembler::simm20($con$$constant)); + %} + + // Direct memory arithmetic. + enc_class z_siyform(memoryRSY mem, immI8 src) %{ + int disp = $mem$$disp; + Register base = reg_to_register_object($mem$$base); + int con = $src$$constant; + + assert(VM_Version::has_MemWithImmALUOps(), "unsupported CPU"); + z_emit_inst(cbuf, $primary | + Assembler::regz(base,16,48) | + Assembler::simm20(disp) | + Assembler::simm8(con,8,48)); + %} + + enc_class z_silform(memoryRS mem, immI16 src) %{ + z_emit_inst(cbuf, $primary | + Assembler::regz(reg_to_register_object($mem$$base),16,48) | + Assembler::uimm12($mem$$disp,20,48) | + Assembler::simm16($src$$constant,32,48)); + %} + + // Encoder for FP ALU reg/mem instructions (support only short displacements). + enc_class z_form_rt_memFP(RegF dst, memoryRX mem) %{ + Register Ridx = $mem$$index$$Register; + if (Ridx == noreg) { Ridx = Z_R0; } // Index is 0. + if ($primary > (1L << 32)) { + z_emit_inst(cbuf, $primary | + Assembler::reg($dst$$reg, 8, 48) | + Assembler::uimm12($mem$$disp, 20, 48) | + Assembler::reg(Ridx, 12, 48) | + Assembler::regz(reg_to_register_object($mem$$base), 16, 48)); + } else { + z_emit_inst(cbuf, $primary | + Assembler::reg($dst$$reg, 8, 32) | + Assembler::uimm12($mem$$disp, 20, 32) | + Assembler::reg(Ridx, 12, 32) | + Assembler::regz(reg_to_register_object($mem$$base), 16, 32)); + } + %} + + enc_class z_form_rt_mem(iRegI dst, memory mem) %{ + Register Ridx = $mem$$index$$Register; + if (Ridx == noreg) { Ridx = Z_R0; } // Index is 0. + if ($primary > (1L<<32)) { + z_emit_inst(cbuf, $primary | + Assembler::reg($dst$$reg, 8, 48) | + Assembler::simm20($mem$$disp) | + Assembler::reg(Ridx, 12, 48) | + Assembler::regz(reg_to_register_object($mem$$base), 16, 48)); + } else { + z_emit_inst(cbuf, $primary | + Assembler::reg($dst$$reg, 8, 32) | + Assembler::uimm12($mem$$disp, 20, 32) | + Assembler::reg(Ridx, 12, 32) | + Assembler::regz(reg_to_register_object($mem$$base), 16, 32)); + } + %} + + enc_class z_form_rt_mem_opt(iRegI dst, memory mem) %{ + int isize = $secondary > 1L << 32 ? 48 : 32; + Register Ridx = $mem$$index$$Register; + if (Ridx == noreg) { Ridx = Z_R0; } // Index is 0. + + if (Displacement::is_shortDisp((long)$mem$$disp)) { + z_emit_inst(cbuf, $secondary | + Assembler::reg($dst$$reg, 8, isize) | + Assembler::uimm12($mem$$disp, 20, isize) | + Assembler::reg(Ridx, 12, isize) | + Assembler::regz(reg_to_register_object($mem$$base), 16, isize)); + } else if (Displacement::is_validDisp((long)$mem$$disp)) { + z_emit_inst(cbuf, $primary | + Assembler::reg($dst$$reg, 8, 48) | + Assembler::simm20($mem$$disp) | + Assembler::reg(Ridx, 12, 48) | + Assembler::regz(reg_to_register_object($mem$$base), 16, 48)); + } else { + MacroAssembler _masm(&cbuf); + __ load_const_optimized(Z_R1_scratch, $mem$$disp); + if (Ridx != Z_R0) { __ z_agr(Z_R1_scratch, Ridx); } + z_emit_inst(cbuf, $secondary | + Assembler::reg($dst$$reg, 8, isize) | + Assembler::uimm12(0, 20, isize) | + Assembler::reg(Z_R1_scratch, 12, isize) | + Assembler::regz(reg_to_register_object($mem$$base), 16, isize)); + } + %} + + enc_class z_enc_brul(Label lbl) %{ + MacroAssembler _masm(&cbuf); + Label* p = $lbl$$label; + + // 'p' is `NULL' when this encoding class is used only to + // determine the size of the encoded instruction. + // Use a bound dummy label in that case. + Label d; + __ bind(d); + Label& l = (NULL == p) ? d : *(p); + __ z_brul(l); + %} + + enc_class z_enc_bru(Label lbl) %{ + MacroAssembler _masm(&cbuf); + Label* p = $lbl$$label; + + // 'p' is `NULL' when this encoding class is used only to + // determine the size of the encoded instruction. + // Use a bound dummy label in that case. + Label d; + __ bind(d); + Label& l = (NULL == p) ? d : *(p); + __ z_bru(l); + %} + + enc_class z_enc_branch_con_far(cmpOp cmp, Label lbl) %{ + MacroAssembler _masm(&cbuf); + Label* p = $lbl$$label; + + // 'p' is `NULL' when this encoding class is used only to + // determine the size of the encoded instruction. + // Use a bound dummy label in that case. + Label d; + __ bind(d); + Label& l = (NULL == p) ? d : *(p); + __ z_brcl((Assembler::branch_condition)$cmp$$cmpcode, l); + %} + + enc_class z_enc_branch_con_short(cmpOp cmp, Label lbl) %{ + MacroAssembler _masm(&cbuf); + Label* p = $lbl$$label; + + // 'p' is `NULL' when this encoding class is used only to + // determine the size of the encoded instruction. + // Use a bound dummy label in that case. + Label d; + __ bind(d); + Label& l = (NULL == p) ? d : *(p); + __ z_brc((Assembler::branch_condition)$cmp$$cmpcode, l); + %} + + enc_class z_enc_cmpb_regreg(iRegI src1, iRegI src2, Label lbl, cmpOpT cmp) %{ + MacroAssembler _masm(&cbuf); + Label* p = $lbl$$label; + + // 'p' is `NULL' when this encoding class is used only to + // determine the size of the encoded instruction. + // Use a bound dummy label in that case. + Label d; + __ bind(d); + Label& l = (NULL == p) ? d : *(p); + Assembler::branch_condition cc = (Assembler::branch_condition)$cmp$$cmpcode; + unsigned long instr = $primary; + if (instr == CRJ_ZOPC) { + __ z_crj($src1$$Register, $src2$$Register, cc, l); + } else if (instr == CLRJ_ZOPC) { + __ z_clrj($src1$$Register, $src2$$Register, cc, l); + } else if (instr == CGRJ_ZOPC) { + __ z_cgrj($src1$$Register, $src2$$Register, cc, l); + } else { + guarantee(instr == CLGRJ_ZOPC, "opcode not implemented"); + __ z_clgrj($src1$$Register, $src2$$Register, cc, l); + } + %} + + enc_class z_enc_cmpb_regregFar(iRegI src1, iRegI src2, Label lbl, cmpOpT cmp) %{ + MacroAssembler _masm(&cbuf); + Label* p = $lbl$$label; + + // 'p' is `NULL' when this encoding class is used only to + // determine the size of the encoded instruction. + // Use a bound dummy label in that case. + Label d; + __ bind(d); + Label& l = (NULL == p) ? d : *(p); + + unsigned long instr = $primary; + if (instr == CR_ZOPC) { + __ z_cr($src1$$Register, $src2$$Register); + } else if (instr == CLR_ZOPC) { + __ z_clr($src1$$Register, $src2$$Register); + } else if (instr == CGR_ZOPC) { + __ z_cgr($src1$$Register, $src2$$Register); + } else { + guarantee(instr == CLGR_ZOPC, "opcode not implemented"); + __ z_clgr($src1$$Register, $src2$$Register); + } + + __ z_brcl((Assembler::branch_condition)$cmp$$cmpcode, l); + %} + + enc_class z_enc_cmpb_regimm(iRegI src1, immI8 src2, Label lbl, cmpOpT cmp) %{ + MacroAssembler _masm(&cbuf); + Label* p = $lbl$$label; + + // 'p' is `NULL' when this encoding class is used only to + // determine the size of the encoded instruction. + // Use a bound dummy label in that case. + Label d; + __ bind(d); + Label& l = (NULL == p) ? d : *(p); + + Assembler::branch_condition cc = (Assembler::branch_condition)$cmp$$cmpcode; + unsigned long instr = $primary; + if (instr == CIJ_ZOPC) { + __ z_cij($src1$$Register, $src2$$constant, cc, l); + } else if (instr == CLIJ_ZOPC) { + __ z_clij($src1$$Register, $src2$$constant, cc, l); + } else if (instr == CGIJ_ZOPC) { + __ z_cgij($src1$$Register, $src2$$constant, cc, l); + } else { + guarantee(instr == CLGIJ_ZOPC, "opcode not implemented"); + __ z_clgij($src1$$Register, $src2$$constant, cc, l); + } + %} + + enc_class z_enc_cmpb_regimmFar(iRegI src1, immI8 src2, Label lbl, cmpOpT cmp) %{ + MacroAssembler _masm(&cbuf); + Label* p = $lbl$$label; + + // 'p' is `NULL' when this encoding class is used only to + // determine the size of the encoded instruction. + // Use a bound dummy label in that case. + Label d; + __ bind(d); + Label& l = (NULL == p) ? d : *(p); + + unsigned long instr = $primary; + if (instr == CHI_ZOPC) { + __ z_chi($src1$$Register, $src2$$constant); + } else if (instr == CLFI_ZOPC) { + __ z_clfi($src1$$Register, $src2$$constant); + } else if (instr == CGHI_ZOPC) { + __ z_cghi($src1$$Register, $src2$$constant); + } else { + guarantee(instr == CLGFI_ZOPC, "opcode not implemented"); + __ z_clgfi($src1$$Register, $src2$$constant); + } + + __ z_brcl((Assembler::branch_condition)$cmp$$cmpcode, l); + %} + + // Call from Java to runtime. + enc_class z_enc_java_to_runtime_call(method meth) %{ + MacroAssembler _masm(&cbuf); + + // Save return pc before call to the place where we need it, since + // callee doesn't. + unsigned int start_off = __ offset(); + // Compute size of "larl + stg + call_c_opt". + const int size_of_code = 6 + 6 + MacroAssembler::call_far_patchable_size(); + __ get_PC(Z_R14, size_of_code); + __ save_return_pc(); + assert(__ offset() - start_off == 12, "bad prelude len: %d", __ offset() - start_off); + + assert((__ offset() & 2) == 0, "misaligned z_enc_java_to_runtime_call"); + address call_addr = __ call_c_opt((address)$meth$$method); + if (call_addr == NULL) { + Compile::current()->env()->record_out_of_memory_failure(); + return; + } + +#ifdef ASSERT + // Plausibility check for size_of_code assumptions. + unsigned int actual_ret_off = __ offset(); + assert(start_off + size_of_code == actual_ret_off, "wrong return_pc"); +#endif + %} + + enc_class z_enc_java_static_call(method meth) %{ + // Call to fixup routine. Fixup routine uses ScopeDesc info to determine + // whom we intended to call. + MacroAssembler _masm(&cbuf); + int ret_offset = 0; + + if (!_method) { + ret_offset = emit_call_reloc(_masm, $meth$$method, + relocInfo::runtime_call_w_cp_type, ra_); + } else { + int method_index = resolved_method_index(cbuf); + if (_optimized_virtual) { + ret_offset = emit_call_reloc(_masm, $meth$$method, + opt_virtual_call_Relocation::spec(method_index)); + } else { + ret_offset = emit_call_reloc(_masm, $meth$$method, + static_call_Relocation::spec(method_index)); + } + } + assert(__ inst_mark() != NULL, "emit_call_reloc must set_inst_mark()"); + + if (_method) { // Emit stub for static call. + address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); + if (stub == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + } + %} + + // Java dynamic call + enc_class z_enc_java_dynamic_call(method meth) %{ + MacroAssembler _masm(&cbuf); + unsigned int start_off = __ offset(); + + int vtable_index = this->_vtable_index; + if (vtable_index == -4) { + Register ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode()); + address virtual_call_oop_addr = NULL; + + AddressLiteral empty_ic((address) Universe::non_oop_word()); + virtual_call_oop_addr = __ pc(); + bool success = __ load_const_from_toc(ic_reg, empty_ic); + if (!success) { + Compile::current()->env()->record_out_of_memory_failure(); + return; + } + + // Call to fixup routine. Fixup routine uses ScopeDesc info + // to determine who we intended to call. + int method_index = resolved_method_index(cbuf); + __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr, method_index)); + unsigned int ret_off = __ offset(); + assert(__ offset() - start_off == 6, "bad prelude len: %d", __ offset() - start_off); + ret_off += emit_call_reloc(_masm, $meth$$method, relocInfo::none, ra_); + assert(_method, "lazy_constant may be wrong when _method==null"); + } else { + assert(!UseInlineCaches, "expect vtable calls only if not using ICs"); + // Go through the vtable. Get receiver klass. Receiver already + // checked for non-null. If we'll go thru a C2I adapter, the + // interpreter expects method in Z_method. + // Use Z_method to temporarily hold the klass oop. Z_R1_scratch is destroyed + // by load_heap_oop_not_null. + __ load_klass(Z_method, Z_R2); + + int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index * vtableEntry::size_in_bytes(); + int v_off = entry_offset + vtableEntry::method_offset_in_bytes(); + + if (Displacement::is_validDisp(v_off) ) { + // Can use load instruction with large offset. + __ z_lg(Z_method, Address(Z_method /*class oop*/, v_off /*method offset*/)); + } else { + // Worse case, must load offset into register. + __ load_const(Z_R1_scratch, v_off); + __ z_lg(Z_method, Address(Z_method /*class oop*/, Z_R1_scratch /*method offset*/)); + } + // NOTE: for vtable dispatches, the vtable entry will never be + // null. However it may very well end up in handle_wrong_method + // if the method is abstract for the particular class. + __ z_lg(Z_R1_scratch, Address(Z_method, Method::from_compiled_offset())); + // Call target. Either compiled code or C2I adapter. + __ z_basr(Z_R14, Z_R1_scratch); + unsigned int ret_off = __ offset(); + } + %} + + enc_class z_enc_cmov_reg(cmpOp cmp, iRegI dst, iRegI src) %{ + MacroAssembler _masm(&cbuf); + Register Rdst = reg_to_register_object($dst$$reg); + Register Rsrc = reg_to_register_object($src$$reg); + + // Don't emit code if operands are identical (same register). + if (Rsrc != Rdst) { + Assembler::branch_condition cc = (Assembler::branch_condition)$cmp$$cmpcode; + + if (VM_Version::has_LoadStoreConditional()) { + __ z_locgr(Rdst, Rsrc, cc); + } else { + // Branch if not (cmp cr). + Label done; + __ z_brc(Assembler::inverse_condition(cc), done); + __ z_lgr(Rdst, Rsrc); // Used for int and long+ptr. + __ bind(done); + } + } + %} + + enc_class z_enc_cmov_imm(cmpOp cmp, iRegI dst, immI16 src) %{ + MacroAssembler _masm(&cbuf); + Register Rdst = reg_to_register_object($dst$$reg); + int Csrc = $src$$constant; + Assembler::branch_condition cc = (Assembler::branch_condition)$cmp$$cmpcode; + Label done; + // Branch if not (cmp cr). + __ z_brc(Assembler::inverse_condition(cc), done); + if (Csrc == 0) { + // Don't set CC. + __ clear_reg(Rdst, true, false); // Use for int, long & ptr. + } else { + __ z_lghi(Rdst, Csrc); // Use for int, long & ptr. + } + __ bind(done); + %} + + enc_class z_enc_cctobool(iRegI res) %{ + MacroAssembler _masm(&cbuf); + Register Rres = reg_to_register_object($res$$reg); + + if (VM_Version::has_LoadStoreConditional()) { + __ load_const_optimized(Z_R0_scratch, 0L); // false (failed) + __ load_const_optimized(Rres, 1L); // true (succeed) + __ z_locgr(Rres, Z_R0_scratch, Assembler::bcondNotEqual); + } else { + Label done; + __ load_const_optimized(Rres, 0L); // false (failed) + __ z_brne(done); // Assume true to be the common case. + __ load_const_optimized(Rres, 1L); // true (succeed) + __ bind(done); + } + %} + + enc_class z_enc_casI(iRegI compare_value, iRegI exchange_value, iRegP addr_ptr) %{ + MacroAssembler _masm(&cbuf); + Register Rcomp = reg_to_register_object($compare_value$$reg); + Register Rnew = reg_to_register_object($exchange_value$$reg); + Register Raddr = reg_to_register_object($addr_ptr$$reg); + + __ z_cs(Rcomp, Rnew, 0, Raddr); + %} + + enc_class z_enc_casL(iRegL compare_value, iRegL exchange_value, iRegP addr_ptr) %{ + MacroAssembler _masm(&cbuf); + Register Rcomp = reg_to_register_object($compare_value$$reg); + Register Rnew = reg_to_register_object($exchange_value$$reg); + Register Raddr = reg_to_register_object($addr_ptr$$reg); + + __ z_csg(Rcomp, Rnew, 0, Raddr); + %} + + enc_class z_enc_SwapI(memoryRSY mem, iRegI dst, iRegI tmp) %{ + MacroAssembler _masm(&cbuf); + Register Rdst = reg_to_register_object($dst$$reg); + Register Rtmp = reg_to_register_object($tmp$$reg); + guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF"); + Label retry; + + // Iterate until swap succeeds. + __ z_llgf(Rtmp, $mem$$Address); // current contents + __ bind(retry); + // Calculate incremented value. + __ z_csy(Rtmp, Rdst, $mem$$Address); // Try to store new value. + __ z_brne(retry); // Yikes, concurrent update, need to retry. + __ z_lgr(Rdst, Rtmp); // Exchanged value from memory is return value. + %} + + enc_class z_enc_SwapL(memoryRSY mem, iRegL dst, iRegL tmp) %{ + MacroAssembler _masm(&cbuf); + Register Rdst = reg_to_register_object($dst$$reg); + Register Rtmp = reg_to_register_object($tmp$$reg); + guarantee(Rdst != Rtmp, "Fix match rule to use TEMP_DEF"); + Label retry; + + // Iterate until swap succeeds. + __ z_lg(Rtmp, $mem$$Address); // current contents + __ bind(retry); + // Calculate incremented value. + __ z_csg(Rtmp, Rdst, $mem$$Address); // Try to store new value. + __ z_brne(retry); // Yikes, concurrent update, need to retry. + __ z_lgr(Rdst, Rtmp); // Exchanged value from memory is return value. + %} + +%} // encode + +source %{ + + // Check whether outs are all Stores. If so, we can omit clearing the upper + // 32 bits after encoding. + static bool all_outs_are_Stores(const Node *n) { + for (DUIterator_Fast imax, k = n->fast_outs(imax); k < imax; k++) { + Node *out = n->fast_out(k); + if (!out->is_Mach() || out->as_Mach()->ideal_Opcode() != Op_StoreN) { + // Most other outs are SpillCopy, but there are various other. + // jvm98 has arond 9% Encodes where we return false. + return false; + } + } + return true; + } + +%} // source + + +//----------FRAME-------------------------------------------------------------- +// Definition of frame structure and management information. + +frame %{ + // What direction does stack grow in (assumed to be same for native & Java). + stack_direction(TOWARDS_LOW); + + // These two registers define part of the calling convention between + // compiled code and the interpreter. + + // Inline Cache Register + inline_cache_reg(Z_R9); // Z_inline_cache + + // Argument pointer for I2C adapters + // + // Tos is loaded in run_compiled_code to Z_ARG5=Z_R6. + // interpreter_arg_ptr_reg(Z_R6); + + // Temporary in compiled entry-points + // compiler_method_oop_reg(Z_R1);//Z_R1_scratch + + // Method Oop Register when calling interpreter + interpreter_method_oop_reg(Z_R9);//Z_method + + // Optional: name the operand used by cisc-spilling to access + // [stack_pointer + offset]. + cisc_spilling_operand_name(indOffset12); + + // Number of stack slots consumed by a Monitor enter. + sync_stack_slots(frame::jit_monitor_size_in_4_byte_units); + + // Compiled code's Frame Pointer + // + // z/Architecture stack pointer + frame_pointer(Z_R15); // Z_SP + + // Interpreter stores its frame pointer in a register which is + // stored to the stack by I2CAdaptors. I2CAdaptors convert from + // interpreted java to compiled java. + // + // Z_state holds pointer to caller's cInterpreter. + interpreter_frame_pointer(Z_R7); // Z_state + + // Use alignment_in_bytes instead of log_2_of_alignment_in_bits. + stack_alignment(frame::alignment_in_bytes); + + in_preserve_stack_slots(frame::jit_in_preserve_size_in_4_byte_units); + + // A `slot' is assumed 4 bytes here! + // out_preserve_stack_slots(frame::jit_out_preserve_size_in_4_byte_units); + + // Number of outgoing stack slots killed above the + // out_preserve_stack_slots for calls to C. Supports the var-args + // backing area for register parms. + varargs_C_out_slots_killed(((frame::z_abi_160_size - frame::z_jit_out_preserve_size) / VMRegImpl::stack_slot_size)); + + // The after-PROLOG location of the return address. Location of + // return address specifies a type (REG or STACK) and a number + // representing the register number (i.e. - use a register name) or + // stack slot. + return_addr(REG Z_R14); + + // This is the body of the function + // + // void Matcher::calling_convention(OptoRegPair* sig /* array of ideal regs */, + // uint length /* length of array */, + // bool is_outgoing) + // + // The `sig' array is to be updated. Sig[j] represents the location + // of the j-th argument, either a register or a stack slot. + + // Body of function which returns an integer array locating + // arguments either in registers or in stack slots. Passed an array + // of ideal registers called "sig" and a "length" count. Stack-slot + // offsets are based on outgoing arguments, i.e. a CALLER setting up + // arguments for a CALLEE. Incoming stack arguments are + // automatically biased by the preserve_stack_slots field above. + calling_convention %{ + // No difference between ingoing/outgoing just pass false. + SharedRuntime::java_calling_convention(sig_bt, regs, length, false); + %} + + // Body of function which returns an integer array locating + // arguments either in registers or in stack slots. Passed an array + // of ideal registers called "sig" and a "length" count. Stack-slot + // offsets are based on outgoing arguments, i.e. a CALLER setting up + // arguments for a CALLEE. Incoming stack arguments are + // automatically biased by the preserve_stack_slots field above. + c_calling_convention %{ + // This is obviously always outgoing. + // C argument must be in register AND stack slot. + (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); + %} + + // Location of native (C/C++) and interpreter return values. This + // is specified to be the same as Java. In the 32-bit VM, long + // values are actually returned from native calls in O0:O1 and + // returned to the interpreter in I0:I1. The copying to and from + // the register pairs is done by the appropriate call and epilog + // opcodes. This simplifies the register allocator. + // + // Use register pair for c return value. + c_return_value %{ + assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values"); + static int typeToRegLo[Op_RegL+1] = { 0, 0, Z_R2_num, Z_R2_num, Z_R2_num, Z_F0_num, Z_F0_num, Z_R2_num }; + static int typeToRegHi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, Z_R2_H_num, OptoReg::Bad, Z_F0_H_num, Z_R2_H_num }; + return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]); + %} + + // Use register pair for return value. + // Location of compiled Java return values. Same as C + return_value %{ + assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values"); + static int typeToRegLo[Op_RegL+1] = { 0, 0, Z_R2_num, Z_R2_num, Z_R2_num, Z_F0_num, Z_F0_num, Z_R2_num }; + static int typeToRegHi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, Z_R2_H_num, OptoReg::Bad, Z_F0_H_num, Z_R2_H_num }; + return OptoRegPair(typeToRegHi[ideal_reg], typeToRegLo[ideal_reg]); + %} +%} + + +//----------ATTRIBUTES--------------------------------------------------------- + +//----------Operand Attributes------------------------------------------------- +op_attrib op_cost(1); // Required cost attribute + +//----------Instruction Attributes--------------------------------------------- + +// Cost attribute. required. +ins_attrib ins_cost(DEFAULT_COST); + +// Is this instruction a non-matching short branch variant of some +// long branch? Not required. +ins_attrib ins_short_branch(0); + +// Indicates this is a trap based check node and final control-flow fixup +// must generate a proper fall through. +ins_attrib ins_is_TrapBasedCheckNode(true); + +// Attribute of instruction to tell how many constants the instruction will generate. +// (optional attribute). Default: 0. +ins_attrib ins_num_consts(0); + +// Required alignment attribute (must be a power of 2) +// specifies the alignment that some part of the instruction (not +// necessarily the start) requires. If > 1, a compute_padding() +// function must be provided for the instruction. +// +// WARNING: Don't use size(FIXED_SIZE) or size(VARIABLE_SIZE) in +// instructions which depend on the proper alignment, because the +// desired alignment isn't guaranteed for the call to "emit()" during +// the size computation. +ins_attrib ins_alignment(1); + +// Enforce/prohibit rematerializations. +// - If an instruction is attributed with 'ins_cannot_rematerialize(true)' +// then rematerialization of that instruction is prohibited and the +// instruction's value will be spilled if necessary. +// - If an instruction is attributed with 'ins_should_rematerialize(true)' +// then rematerialization is enforced and the instruction's value will +// never get spilled. a copy of the instruction will be inserted if +// necessary. +// Note: this may result in rematerializations in front of every use. +// (optional attribute) +ins_attrib ins_cannot_rematerialize(false); +ins_attrib ins_should_rematerialize(false); + +//----------OPERANDS----------------------------------------------------------- +// Operand definitions must precede instruction definitions for correct +// parsing in the ADLC because operands constitute user defined types +// which are used in instruction definitions. + +//----------Simple Operands---------------------------------------------------- +// Immediate Operands +// Please note: +// Formats are generated automatically for constants and base registers. + +//---------------------------------------------- +// SIGNED (shorter than INT) immediate operands +//---------------------------------------------- + +// Byte Immediate: constant 'int -1' +operand immB_minus1() %{ + // sign-ext constant zero-ext constant + predicate((n->get_int() == -1) || ((n->get_int()&0x000000ff) == 0x000000ff)); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Byte Immediate: constant, but not 'int 0' nor 'int -1'. +operand immB_n0m1() %{ + // sign-ext constant zero-ext constant + predicate(n->get_int() != 0 && n->get_int() != -1 && (n->get_int()&0x000000ff) != 0x000000ff); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Short Immediate: constant 'int -1' +operand immS_minus1() %{ + // sign-ext constant zero-ext constant + predicate((n->get_int() == -1) || ((n->get_int()&0x0000ffff) == 0x0000ffff)); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Short Immediate: constant, but not 'int 0' nor 'int -1'. +operand immS_n0m1() %{ + // sign-ext constant zero-ext constant + predicate(n->get_int() != 0 && n->get_int() != -1 && (n->get_int()&0x0000ffff) != 0x0000ffff); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +//----------------------------------------- +// SIGNED INT immediate operands +//----------------------------------------- + +// Integer Immediate: 32-bit +operand immI() %{ + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Int Immediate: 20-bit +operand immI20() %{ + predicate(Immediate::is_simm20(n->get_int())); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 16-bit +operand immI16() %{ + predicate(Immediate::is_simm16(n->get_int())); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 8-bit +operand immI8() %{ + predicate(Immediate::is_simm8(n->get_int())); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: constant 'int 0' +operand immI_0() %{ + predicate(n->get_int() == 0); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: constant 'int -1' +operand immI_minus1() %{ + predicate(n->get_int() == -1); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: constant, but not 'int 0' nor 'int -1'. +operand immI_n0m1() %{ + predicate(n->get_int() != 0 && n->get_int() != -1); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +//------------------------------------------- +// UNSIGNED INT immediate operands +//------------------------------------------- + +// Unsigned Integer Immediate: 32-bit +operand uimmI() %{ + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Unsigned Integer Immediate: 16-bit +operand uimmI16() %{ + predicate(Immediate::is_uimm16(n->get_int())); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Unsigned Integer Immediate: 12-bit +operand uimmI12() %{ + predicate(Immediate::is_uimm12(n->get_int())); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Unsigned Integer Immediate: 12-bit +operand uimmI8() %{ + predicate(Immediate::is_uimm8(n->get_int())); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 6-bit +operand uimmI6() %{ + predicate(Immediate::is_uimm(n->get_int(), 6)); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 5-bit +operand uimmI5() %{ + predicate(Immediate::is_uimm(n->get_int(), 5)); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Length for SS instructions, given in DWs, +// possible range [1..512], i.e. [8..4096] Bytes +// used range [1..256], i.e. [8..2048] Bytes +// operand type int +// Unsigned Integer Immediate: 9-bit +operand SSlenDW() %{ + predicate(Immediate::is_uimm8(n->get_long()-1)); + match(ConL); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +//------------------------------------------ +// (UN)SIGNED INT specific values +//------------------------------------------ + +// Integer Immediate: the value 1 +operand immI_1() %{ + predicate(n->get_int() == 1); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 16. +operand immI_16() %{ + predicate(n->get_int() == 16); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 24. +operand immI_24() %{ + predicate(n->get_int() == 24); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 255 +operand immI_255() %{ + predicate(n->get_int() == 255); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the values 32-63 +operand immI_32_63() %{ + predicate(n->get_int() >= 32 && n->get_int() <= 63); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Unsigned Integer Immediate: LL-part, extended by 1s. +operand uimmI_LL1() %{ + predicate((n->get_int() & 0xFFFF0000) == 0xFFFF0000); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Unsigned Integer Immediate: LH-part, extended by 1s. +operand uimmI_LH1() %{ + predicate((n->get_int() & 0xFFFF) == 0xFFFF); + match(ConI); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +//------------------------------------------ +// SIGNED LONG immediate operands +//------------------------------------------ + +operand immL() %{ + match(ConL); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: 32-bit +operand immL32() %{ + predicate(Immediate::is_simm32(n->get_long())); + match(ConL); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: 20-bit +operand immL20() %{ + predicate(Immediate::is_simm20(n->get_long())); + match(ConL); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: 16-bit +operand immL16() %{ + predicate(Immediate::is_simm16(n->get_long())); + match(ConL); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: 8-bit +operand immL8() %{ + predicate(Immediate::is_simm8(n->get_long())); + match(ConL); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +//-------------------------------------------- +// UNSIGNED LONG immediate operands +//-------------------------------------------- + +operand uimmL32() %{ + predicate(Immediate::is_uimm32(n->get_long())); + match(ConL); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Unsigned Long Immediate: 16-bit +operand uimmL16() %{ + predicate(Immediate::is_uimm16(n->get_long())); + match(ConL); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Unsigned Long Immediate: 12-bit +operand uimmL12() %{ + predicate(Immediate::is_uimm12(n->get_long())); + match(ConL); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Unsigned Long Immediate: 8-bit +operand uimmL8() %{ + predicate(Immediate::is_uimm8(n->get_long())); + match(ConL); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +//------------------------------------------- +// (UN)SIGNED LONG specific values +//------------------------------------------- + +// Long Immediate: the value FF +operand immL_FF() %{ + predicate(n->get_long() == 0xFFL); + match(ConL); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: the value FFFF +operand immL_FFFF() %{ + predicate(n->get_long() == 0xFFFFL); + match(ConL); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: the value FFFFFFFF +operand immL_FFFFFFFF() %{ + predicate(n->get_long() == 0xFFFFFFFFL); + match(ConL); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +operand immL_0() %{ + predicate(n->get_long() == 0L); + match(ConL); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Unsigned Long Immediate: LL-part, extended by 1s. +operand uimmL_LL1() %{ + predicate((n->get_long() & 0xFFFFFFFFFFFF0000L) == 0xFFFFFFFFFFFF0000L); + match(ConL); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Unsigned Long Immediate: LH-part, extended by 1s. +operand uimmL_LH1() %{ + predicate((n->get_long() & 0xFFFFFFFF0000FFFFL) == 0xFFFFFFFF0000FFFFL); + match(ConL); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Unsigned Long Immediate: HL-part, extended by 1s. +operand uimmL_HL1() %{ + predicate((n->get_long() & 0xFFFF0000FFFFFFFFL) == 0xFFFF0000FFFFFFFFL); + match(ConL); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Unsigned Long Immediate: HH-part, extended by 1s. +operand uimmL_HH1() %{ + predicate((n->get_long() & 0xFFFFFFFFFFFFL) == 0xFFFFFFFFFFFFL); + match(ConL); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: low 32-bit mask +operand immL_32bits() %{ + predicate(n->get_long() == 0xFFFFFFFFL); + match(ConL); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +//-------------------------------------- +// POINTER immediate operands +//-------------------------------------- + +// Pointer Immediate: 64-bit +operand immP() %{ + match(ConP); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate: 32-bit +operand immP32() %{ + predicate(Immediate::is_uimm32(n->get_ptr())); + match(ConP); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate: 16-bit +operand immP16() %{ + predicate(Immediate::is_uimm16(n->get_ptr())); + match(ConP); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate: 8-bit +operand immP8() %{ + predicate(Immediate::is_uimm8(n->get_ptr())); + match(ConP); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +//----------------------------------- +// POINTER specific values +//----------------------------------- + +// Pointer Immediate: NULL +operand immP0() %{ + predicate(n->get_ptr() == 0); + match(ConP); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +//--------------------------------------------- +// NARROW POINTER immediate operands +//--------------------------------------------- + +// Narrow Pointer Immediate +operand immN() %{ + match(ConN); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +operand immNKlass() %{ + match(ConNKlass); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Narrow Pointer Immediate +operand immN8() %{ + predicate(Immediate::is_uimm8(n->get_narrowcon())); + match(ConN); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Narrow NULL Pointer Immediate +operand immN0() %{ + predicate(n->get_narrowcon() == 0); + match(ConN); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// FLOAT and DOUBLE immediate operands + +// Double Immediate +operand immD() %{ + match(ConD); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Double Immediate: +-0 +operand immDpm0() %{ + predicate(n->getd() == 0); + match(ConD); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Double Immediate: +0 +operand immDp0() %{ + predicate(jlong_cast(n->getd()) == 0); + match(ConD); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Float Immediate +operand immF() %{ + match(ConF); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Float Immediate: +-0 +operand immFpm0() %{ + predicate(n->getf() == 0); + match(ConF); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// Float Immediate: +0 +operand immFp0() %{ + predicate(jint_cast(n->getf()) == 0); + match(ConF); + op_cost(1); + format %{ %} + interface(CONST_INTER); +%} + +// End of Immediate Operands + +// Integer Register Operands +// Integer Register +operand iRegI() %{ + constraint(ALLOC_IN_RC(z_int_reg)); + match(RegI); + match(noArg_iRegI); + match(rarg1RegI); + match(rarg2RegI); + match(rarg3RegI); + match(rarg4RegI); + match(rarg5RegI); + match(noOdd_iRegI); + match(revenRegI); + match(roddRegI); + format %{ %} + interface(REG_INTER); +%} + +operand noArg_iRegI() %{ + constraint(ALLOC_IN_RC(z_no_arg_int_reg)); + match(RegI); + format %{ %} + interface(REG_INTER); +%} + +// Revenregi and roddRegI constitute and even-odd-pair. +operand revenRegI() %{ + constraint(ALLOC_IN_RC(z_rarg3_int_reg)); + match(iRegI); + format %{ %} + interface(REG_INTER); +%} + +// Revenregi and roddRegI constitute and even-odd-pair. +operand roddRegI() %{ + constraint(ALLOC_IN_RC(z_rarg4_int_reg)); + match(iRegI); + format %{ %} + interface(REG_INTER); +%} + +operand rarg1RegI() %{ + constraint(ALLOC_IN_RC(z_rarg1_int_reg)); + match(iRegI); + format %{ %} + interface(REG_INTER); +%} + +operand rarg2RegI() %{ + constraint(ALLOC_IN_RC(z_rarg2_int_reg)); + match(iRegI); + format %{ %} + interface(REG_INTER); +%} + +operand rarg3RegI() %{ + constraint(ALLOC_IN_RC(z_rarg3_int_reg)); + match(iRegI); + format %{ %} + interface(REG_INTER); +%} + +operand rarg4RegI() %{ + constraint(ALLOC_IN_RC(z_rarg4_int_reg)); + match(iRegI); + format %{ %} + interface(REG_INTER); +%} + +operand rarg5RegI() %{ + constraint(ALLOC_IN_RC(z_rarg5_int_reg)); + match(iRegI); + format %{ %} + interface(REG_INTER); +%} + +operand noOdd_iRegI() %{ + constraint(ALLOC_IN_RC(z_no_odd_int_reg)); + match(RegI); + match(revenRegI); + format %{ %} + interface(REG_INTER); +%} + +// Pointer Register +operand iRegP() %{ + constraint(ALLOC_IN_RC(z_ptr_reg)); + match(RegP); + match(noArg_iRegP); + match(rarg1RegP); + match(rarg2RegP); + match(rarg3RegP); + match(rarg4RegP); + match(rarg5RegP); + match(revenRegP); + match(roddRegP); + format %{ %} + interface(REG_INTER); +%} + +// thread operand +operand threadRegP() %{ + constraint(ALLOC_IN_RC(z_thread_ptr_reg)); + match(RegP); + format %{ "Z_THREAD" %} + interface(REG_INTER); +%} + +operand noArg_iRegP() %{ + constraint(ALLOC_IN_RC(z_no_arg_ptr_reg)); + match(iRegP); + format %{ %} + interface(REG_INTER); +%} + +operand rarg1RegP() %{ + constraint(ALLOC_IN_RC(z_rarg1_ptr_reg)); + match(iRegP); + format %{ %} + interface(REG_INTER); +%} + +operand rarg2RegP() %{ + constraint(ALLOC_IN_RC(z_rarg2_ptr_reg)); + match(iRegP); + format %{ %} + interface(REG_INTER); +%} + +operand rarg3RegP() %{ + constraint(ALLOC_IN_RC(z_rarg3_ptr_reg)); + match(iRegP); + format %{ %} + interface(REG_INTER); +%} + +operand rarg4RegP() %{ + constraint(ALLOC_IN_RC(z_rarg4_ptr_reg)); + match(iRegP); + format %{ %} + interface(REG_INTER); +%} + +operand rarg5RegP() %{ + constraint(ALLOC_IN_RC(z_rarg5_ptr_reg)); + match(iRegP); + format %{ %} + interface(REG_INTER); +%} + +operand memoryRegP() %{ + constraint(ALLOC_IN_RC(z_memory_ptr_reg)); + match(RegP); + match(iRegP); + match(threadRegP); + format %{ %} + interface(REG_INTER); +%} + +// Revenregp and roddRegP constitute and even-odd-pair. +operand revenRegP() %{ + constraint(ALLOC_IN_RC(z_rarg3_ptr_reg)); + match(iRegP); + format %{ %} + interface(REG_INTER); +%} + +// Revenregl and roddRegL constitute and even-odd-pair. +operand roddRegP() %{ + constraint(ALLOC_IN_RC(z_rarg4_ptr_reg)); + match(iRegP); + format %{ %} + interface(REG_INTER); +%} + +operand lock_ptr_RegP() %{ + constraint(ALLOC_IN_RC(z_lock_ptr_reg)); + match(RegP); + format %{ %} + interface(REG_INTER); +%} + +operand rscratch2RegP() %{ + constraint(ALLOC_IN_RC(z_rscratch2_bits64_reg)); + match(RegP); + format %{ %} + interface(REG_INTER); +%} + +operand iRegN() %{ + constraint(ALLOC_IN_RC(z_int_reg)); + match(RegN); + match(noArg_iRegN); + match(rarg1RegN); + match(rarg2RegN); + match(rarg3RegN); + match(rarg4RegN); + match(rarg5RegN); + format %{ %} + interface(REG_INTER); +%} + +operand noArg_iRegN() %{ + constraint(ALLOC_IN_RC(z_no_arg_int_reg)); + match(iRegN); + format %{ %} + interface(REG_INTER); +%} + +operand rarg1RegN() %{ + constraint(ALLOC_IN_RC(z_rarg1_int_reg)); + match(iRegN); + format %{ %} + interface(REG_INTER); +%} + +operand rarg2RegN() %{ + constraint(ALLOC_IN_RC(z_rarg2_int_reg)); + match(iRegN); + format %{ %} + interface(REG_INTER); +%} + +operand rarg3RegN() %{ + constraint(ALLOC_IN_RC(z_rarg3_int_reg)); + match(iRegN); + format %{ %} + interface(REG_INTER); +%} + +operand rarg4RegN() %{ + constraint(ALLOC_IN_RC(z_rarg4_int_reg)); + match(iRegN); + format %{ %} + interface(REG_INTER); +%} + +operand rarg5RegN() %{ + constraint(ALLOC_IN_RC(z_rarg5_ptrN_reg)); + match(iRegN); + format %{ %} + interface(REG_INTER); +%} + +// Long Register +operand iRegL() %{ + constraint(ALLOC_IN_RC(z_long_reg)); + match(RegL); + match(revenRegL); + match(roddRegL); + match(rarg1RegL); + match(rarg5RegL); + format %{ %} + interface(REG_INTER); +%} + +// Revenregl and roddRegL constitute and even-odd-pair. +operand revenRegL() %{ + constraint(ALLOC_IN_RC(z_rarg3_long_reg)); + match(iRegL); + format %{ %} + interface(REG_INTER); +%} + +// Revenregl and roddRegL constitute and even-odd-pair. +operand roddRegL() %{ + constraint(ALLOC_IN_RC(z_rarg4_long_reg)); + match(iRegL); + format %{ %} + interface(REG_INTER); +%} + +operand rarg1RegL() %{ + constraint(ALLOC_IN_RC(z_rarg1_long_reg)); + match(iRegL); + format %{ %} + interface(REG_INTER); +%} + +operand rarg5RegL() %{ + constraint(ALLOC_IN_RC(z_rarg5_long_reg)); + match(iRegL); + format %{ %} + interface(REG_INTER); +%} + +// Condition Code Flag Registers +operand flagsReg() %{ + constraint(ALLOC_IN_RC(z_condition_reg)); + match(RegFlags); + format %{ "CR" %} + interface(REG_INTER); +%} + +// Condition Code Flag Registers for rules with result tuples +operand TD_flagsReg() %{ + constraint(ALLOC_IN_RC(z_condition_reg)); + match(RegFlags); + format %{ "CR" %} + interface(REG_TUPLE_DEST_INTER); +%} + +operand regD() %{ + constraint(ALLOC_IN_RC(z_dbl_reg)); + match(RegD); + format %{ %} + interface(REG_INTER); +%} + +operand rscratchRegD() %{ + constraint(ALLOC_IN_RC(z_rscratch1_dbl_reg)); + match(RegD); + format %{ %} + interface(REG_INTER); +%} + +operand regF() %{ + constraint(ALLOC_IN_RC(z_flt_reg)); + match(RegF); + format %{ %} + interface(REG_INTER); +%} + +operand rscratchRegF() %{ + constraint(ALLOC_IN_RC(z_rscratch1_flt_reg)); + match(RegF); + format %{ %} + interface(REG_INTER); +%} + +// Special Registers + +// Method Register +operand inline_cache_regP(iRegP reg) %{ + constraint(ALLOC_IN_RC(z_r9_regP)); // inline_cache_reg + match(reg); + format %{ %} + interface(REG_INTER); +%} + +operand compiler_method_oop_regP(iRegP reg) %{ + constraint(ALLOC_IN_RC(z_r1_RegP)); // compiler_method_oop_reg + match(reg); + format %{ %} + interface(REG_INTER); +%} + +operand interpreter_method_oop_regP(iRegP reg) %{ + constraint(ALLOC_IN_RC(z_r9_regP)); // interpreter_method_oop_reg + match(reg); + format %{ %} + interface(REG_INTER); +%} + +// Operands to remove register moves in unscaled mode. +// Match read/write registers with an EncodeP node if neither shift nor add are required. +operand iRegP2N(iRegP reg) %{ + predicate(Universe::narrow_oop_shift() == 0 && _leaf->as_EncodeP()->in(0) == NULL); + constraint(ALLOC_IN_RC(z_memory_ptr_reg)); + match(EncodeP reg); + format %{ "$reg" %} + interface(REG_INTER) +%} + +operand iRegN2P(iRegN reg) %{ + predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0 && + _leaf->as_DecodeN()->in(0) == NULL); + constraint(ALLOC_IN_RC(z_memory_ptr_reg)); + match(DecodeN reg); + format %{ "$reg" %} + interface(REG_INTER) +%} + + +//----------Complex Operands--------------------------------------------------- + +// Indirect Memory Reference +operand indirect(memoryRegP base) %{ + constraint(ALLOC_IN_RC(z_memory_ptr_reg)); + match(base); + op_cost(1); + format %{ "#0[,$base]" %} + interface(MEMORY_INTER) %{ + base($base); + index(0xffffFFFF); // noreg + scale(0x0); + disp(0x0); + %} +%} + +// Indirect with Offset (long) +operand indOffset20(memoryRegP base, immL20 offset) %{ + constraint(ALLOC_IN_RC(z_memory_ptr_reg)); + match(AddP base offset); + op_cost(1); + format %{ "$offset[,$base]" %} + interface(MEMORY_INTER) %{ + base($base); + index(0xffffFFFF); // noreg + scale(0x0); + disp($offset); + %} +%} + +operand indOffset20Narrow(iRegN base, immL20 offset) %{ + predicate(Matcher::narrow_oop_use_complex_address()); + constraint(ALLOC_IN_RC(z_memory_ptr_reg)); + match(AddP (DecodeN base) offset); + op_cost(1); + format %{ "$offset[,$base]" %} + interface(MEMORY_INTER) %{ + base($base); + index(0xffffFFFF); // noreg + scale(0x0); + disp($offset); + %} +%} + +// Indirect with Offset (short) +operand indOffset12(memoryRegP base, uimmL12 offset) %{ + constraint(ALLOC_IN_RC(z_memory_ptr_reg)); + match(AddP base offset); + op_cost(1); + format %{ "$offset[[,$base]]" %} + interface(MEMORY_INTER) %{ + base($base); + index(0xffffFFFF); // noreg + scale(0x0); + disp($offset); + %} +%} + +operand indOffset12Narrow(iRegN base, uimmL12 offset) %{ + predicate(Matcher::narrow_oop_use_complex_address()); + constraint(ALLOC_IN_RC(z_memory_ptr_reg)); + match(AddP (DecodeN base) offset); + op_cost(1); + format %{ "$offset[[,$base]]" %} + interface(MEMORY_INTER) %{ + base($base); + index(0xffffFFFF); // noreg + scale(0x0); + disp($offset); + %} +%} + +// Indirect with Register Index +operand indIndex(memoryRegP base, iRegL index) %{ + constraint(ALLOC_IN_RC(z_memory_ptr_reg)); + match(AddP base index); + op_cost(1); + format %{ "#0[($index,$base)]" %} + interface(MEMORY_INTER) %{ + base($base); + index($index); + scale(0x0); + disp(0x0); + %} +%} + +// Indirect with Offset (long) and index +operand indOffset20index(memoryRegP base, immL20 offset, iRegL index) %{ + constraint(ALLOC_IN_RC(z_memory_ptr_reg)); + match(AddP (AddP base index) offset); + op_cost(1); + format %{ "$offset[($index,$base)]" %} + interface(MEMORY_INTER) %{ + base($base); + index($index); + scale(0x0); + disp($offset); + %} +%} + +operand indOffset20indexNarrow(iRegN base, immL20 offset, iRegL index) %{ + predicate(Matcher::narrow_oop_use_complex_address()); + constraint(ALLOC_IN_RC(z_memory_ptr_reg)); + match(AddP (AddP (DecodeN base) index) offset); + op_cost(1); + format %{ "$offset[($index,$base)]" %} + interface(MEMORY_INTER) %{ + base($base); + index($index); + scale(0x0); + disp($offset); + %} +%} + +// Indirect with Offset (short) and index +operand indOffset12index(memoryRegP base, uimmL12 offset, iRegL index) %{ + constraint(ALLOC_IN_RC(z_memory_ptr_reg)); + match(AddP (AddP base index) offset); + op_cost(1); + format %{ "$offset[[($index,$base)]]" %} + interface(MEMORY_INTER) %{ + base($base); + index($index); + scale(0x0); + disp($offset); + %} +%} + +operand indOffset12indexNarrow(iRegN base, uimmL12 offset, iRegL index) %{ + predicate(Matcher::narrow_oop_use_complex_address()); + constraint(ALLOC_IN_RC(z_memory_ptr_reg)); + match(AddP (AddP (DecodeN base) index) offset); + op_cost(1); + format %{ "$offset[[($index,$base)]]" %} + interface(MEMORY_INTER) %{ + base($base); + index($index); + scale(0x0); + disp($offset); + %} +%} + +//----------Special Memory Operands-------------------------------------------- + +// Stack Slot Operand +// This operand is used for loading and storing temporary values on +// the stack where a match requires a value to flow through memory. +operand stackSlotI(sRegI reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + op_cost(1); + format %{ "[$reg(stackSlotI)]" %} + interface(MEMORY_INTER) %{ + base(0xf); // Z_SP + index(0xffffFFFF); // noreg + scale(0x0); + disp($reg); // stack offset + %} +%} + +operand stackSlotP(sRegP reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + op_cost(1); + format %{ "[$reg(stackSlotP)]" %} + interface(MEMORY_INTER) %{ + base(0xf); // Z_SP + index(0xffffFFFF); // noreg + scale(0x0); + disp($reg); // Stack Offset + %} +%} + +operand stackSlotF(sRegF reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + op_cost(1); + format %{ "[$reg(stackSlotF)]" %} + interface(MEMORY_INTER) %{ + base(0xf); // Z_SP + index(0xffffFFFF); // noreg + scale(0x0); + disp($reg); // Stack Offset + %} +%} + +operand stackSlotD(sRegD reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + op_cost(1); + //match(RegD); + format %{ "[$reg(stackSlotD)]" %} + interface(MEMORY_INTER) %{ + base(0xf); // Z_SP + index(0xffffFFFF); // noreg + scale(0x0); + disp($reg); // Stack Offset + %} +%} + +operand stackSlotL(sRegL reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + op_cost(1); //match(RegL); + format %{ "[$reg(stackSlotL)]" %} + interface(MEMORY_INTER) %{ + base(0xf); // Z_SP + index(0xffffFFFF); // noreg + scale(0x0); + disp($reg); // Stack Offset + %} +%} + +// Operands for expressing Control Flow +// NOTE: Label is a predefined operand which should not be redefined in +// the AD file. It is generically handled within the ADLC. + +//----------Conditional Branch Operands---------------------------------------- +// Comparison Op - This is the operation of the comparison, and is limited to +// the following set of codes: +// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) +// +// Other attributes of the comparison, such as unsignedness, are specified +// by the comparison instruction that sets a condition code flags register. +// That result is represented by a flags operand whose subtype is appropriate +// to the unsignedness (etc.) of the comparison. +// +// Later, the instruction which matches both the Comparison Op (a Bool) and +// the flags (produced by the Cmp) specifies the coding of the comparison op +// by matching a specific subtype of Bool operand below. + +// INT cmpOps for CompareAndBranch and CompareAndTrap instructions should not +// have mask bit #3 set. +operand cmpOpT() %{ + match(Bool); + format %{ "" %} + interface(COND_INTER) %{ + equal(0x8); // Assembler::bcondEqual + not_equal(0x6); // Assembler::bcondNotEqual + less(0x4); // Assembler::bcondLow + greater_equal(0xa); // Assembler::bcondNotLow + less_equal(0xc); // Assembler::bcondNotHigh + greater(0x2); // Assembler::bcondHigh + overflow(0x1); // Assembler::bcondOverflow + no_overflow(0xe); // Assembler::bcondNotOverflow + %} +%} + +// When used for floating point comparisons: unordered is treated as less. +operand cmpOpF() %{ + match(Bool); + format %{ "" %} + interface(COND_INTER) %{ + equal(0x8); + not_equal(0x7); // Includes 'unordered'. + less(0x5); // Includes 'unordered'. + greater_equal(0xa); + less_equal(0xd); // Includes 'unordered'. + greater(0x2); + overflow(0x0); // Not meaningful on z/Architecture. + no_overflow(0x0); // leave unchanged (zero) therefore + %} +%} + +// "Regular" cmpOp for int comparisons, includes bit #3 (overflow). +operand cmpOp() %{ + match(Bool); + format %{ "" %} + interface(COND_INTER) %{ + equal(0x8); + not_equal(0x7); // Includes 'unordered'. + less(0x5); // Includes 'unordered'. + greater_equal(0xa); + less_equal(0xd); // Includes 'unordered'. + greater(0x2); + overflow(0x1); // Assembler::bcondOverflow + no_overflow(0xe); // Assembler::bcondNotOverflow + %} +%} + +//----------OPERAND CLASSES---------------------------------------------------- +// Operand Classes are groups of operands that are used to simplify +// instruction definitions by not requiring the AD writer to specify +// seperate instructions for every form of operand when the +// instruction accepts multiple operand types with the same basic +// encoding and format. The classic case of this is memory operands. +// Indirect is not included since its use is limited to Compare & Swap + +// Most general memory operand, allows base, index, and long displacement. +opclass memory(indirect, indIndex, indOffset20, indOffset20Narrow, indOffset20index, indOffset20indexNarrow); +opclass memoryRXY(indirect, indIndex, indOffset20, indOffset20Narrow, indOffset20index, indOffset20indexNarrow); + +// General memory operand, allows base, index, and short displacement. +opclass memoryRX(indirect, indIndex, indOffset12, indOffset12Narrow, indOffset12index, indOffset12indexNarrow); + +// Memory operand, allows only base and long displacement. +opclass memoryRSY(indirect, indOffset20, indOffset20Narrow); + +// Memory operand, allows only base and short displacement. +opclass memoryRS(indirect, indOffset12, indOffset12Narrow); + +// Operand classes to match encode and decode. +opclass iRegN_P2N(iRegN); +opclass iRegP_N2P(iRegP); + + +//----------PIPELINE----------------------------------------------------------- +pipeline %{ + +//----------ATTRIBUTES--------------------------------------------------------- +attributes %{ + // z/Architecture instructions are of length 2, 4, or 6 bytes. + variable_size_instructions; + instruction_unit_size = 2; + + // Meaningless on z/Architecture. + max_instructions_per_bundle = 1; + + // The z/Architecture processor fetches 64 bytes... + instruction_fetch_unit_size = 64; + + // ...in one line. + instruction_fetch_units = 1 +%} + +//----------RESOURCES---------------------------------------------------------- +// Resources are the functional units available to the machine. +resources( + Z_BR, // branch unit + Z_CR, // condition unit + Z_FX1, // integer arithmetic unit 1 + Z_FX2, // integer arithmetic unit 2 + Z_LDST1, // load/store unit 1 + Z_LDST2, // load/store unit 2 + Z_FP1, // float arithmetic unit 1 + Z_FP2, // float arithmetic unit 2 + Z_LDST = Z_LDST1 | Z_LDST2, + Z_FX = Z_FX1 | Z_FX2, + Z_FP = Z_FP1 | Z_FP2 + ); + +//----------PIPELINE DESCRIPTION----------------------------------------------- +// Pipeline Description specifies the stages in the machine's pipeline. +pipe_desc( + // TODO: adapt + Z_IF, // instruction fetch + Z_IC, + Z_D0, // decode + Z_D1, // decode + Z_D2, // decode + Z_D3, // decode + Z_Xfer1, + Z_GD, // group definition + Z_MP, // map + Z_ISS, // issue + Z_RF, // resource fetch + Z_EX1, // execute (all units) + Z_EX2, // execute (FP, LDST) + Z_EX3, // execute (FP, LDST) + Z_EX4, // execute (FP) + Z_EX5, // execute (FP) + Z_EX6, // execute (FP) + Z_WB, // write back + Z_Xfer2, + Z_CP + ); + +//----------PIPELINE CLASSES--------------------------------------------------- +// Pipeline Classes describe the stages in which input and output are +// referenced by the hardware pipeline. + +// Providing the `ins_pipe' declarations in the instruction +// specifications seems to be of little use. So we use +// `pipe_class_dummy' for all our instructions at present. +pipe_class pipe_class_dummy() %{ + single_instruction; + fixed_latency(4); +%} + +// SIGTRAP based implicit range checks in compiled code. +// Currently, no pipe classes are used on z/Architecture. +pipe_class pipe_class_trap() %{ + single_instruction; +%} + +pipe_class pipe_class_fx_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + single_instruction; + dst : Z_EX1(write); + src1 : Z_RF(read); + src2 : Z_RF(read); + Z_FX : Z_RF; +%} + +pipe_class pipe_class_ldst(iRegP dst, memory mem) %{ + single_instruction; + mem : Z_RF(read); + dst : Z_WB(write); + Z_LDST : Z_RF; +%} + +define %{ + MachNop = pipe_class_dummy; +%} + +%} + +//----------INSTRUCTIONS------------------------------------------------------- + +//---------- Chain stack slots between similar types -------- + +// Load integer from stack slot. +instruct stkI_to_regI(iRegI dst, stackSlotI src) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "L $dst,$src\t # stk reload int" %} + opcode(L_ZOPC); + ins_encode(z_form_rt_mem(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Store integer to stack slot. +instruct regI_to_stkI(stackSlotI dst, iRegI src) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "ST $src,$dst\t # stk spill int" %} + opcode(ST_ZOPC); + ins_encode(z_form_rt_mem(src, dst)); // rs=rt + ins_pipe(pipe_class_dummy); +%} + +// Load long from stack slot. +instruct stkL_to_regL(iRegL dst, stackSlotL src) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "LG $dst,$src\t # stk reload long" %} + opcode(LG_ZOPC); + ins_encode(z_form_rt_mem(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Store long to stack slot. +instruct regL_to_stkL(stackSlotL dst, iRegL src) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + size(6); + format %{ "STG $src,$dst\t # stk spill long" %} + opcode(STG_ZOPC); + ins_encode(z_form_rt_mem(src, dst)); // rs=rt + ins_pipe(pipe_class_dummy); +%} + +// Load pointer from stack slot, 64-bit encoding. +instruct stkP_to_regP(iRegP dst, stackSlotP src) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "LG $dst,$src\t # stk reload ptr" %} + opcode(LG_ZOPC); + ins_encode(z_form_rt_mem(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Store pointer to stack slot. +instruct regP_to_stkP(stackSlotP dst, iRegP src) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "STG $src,$dst\t # stk spill ptr" %} + opcode(STG_ZOPC); + ins_encode(z_form_rt_mem(src, dst)); // rs=rt + ins_pipe(pipe_class_dummy); +%} + +// Float types + +// Load float value from stack slot. +instruct stkF_to_regF(regF dst, stackSlotF src) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "LE(Y) $dst,$src\t # stk reload float" %} + opcode(LE_ZOPC); + ins_encode(z_form_rt_mem(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Store float value to stack slot. +instruct regF_to_stkF(stackSlotF dst, regF src) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STE(Y) $src,$dst\t # stk spill float" %} + opcode(STE_ZOPC); + ins_encode(z_form_rt_mem(src, dst)); + ins_pipe(pipe_class_dummy); +%} + +// Load double value from stack slot. +instruct stkD_to_regD(regD dst, stackSlotD src) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "LD(Y) $dst,$src\t # stk reload double" %} + opcode(LD_ZOPC); + ins_encode(z_form_rt_mem(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Store double value to stack slot. +instruct regD_to_stkD(stackSlotD dst, regD src) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STD(Y) $src,$dst\t # stk spill double" %} + opcode(STD_ZOPC); + ins_encode(z_form_rt_mem(src, dst)); + ins_pipe(pipe_class_dummy); +%} + +//----------Load/Store/Move Instructions--------------------------------------- + +//----------Load Instructions-------------------------------------------------- + +//------------------ +// MEMORY +//------------------ + +// BYTE +// Load Byte (8bit signed) +instruct loadB(iRegI dst, memory mem) %{ + match(Set dst (LoadB mem)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "LB $dst, $mem\t # sign-extend byte to int" %} + opcode(LB_ZOPC, LB_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +// Load Byte (8bit signed) +instruct loadB2L(iRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadB mem))); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "LGB $dst, $mem\t # sign-extend byte to long" %} + opcode(LGB_ZOPC, LGB_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +// Load Unsigned Byte (8bit UNsigned) into an int reg. +instruct loadUB(iRegI dst, memory mem) %{ + match(Set dst (LoadUB mem)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "LLGC $dst,$mem\t # zero-extend byte to int" %} + opcode(LLGC_ZOPC, LLGC_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +// Load Unsigned Byte (8bit UNsigned) into a Long Register. +instruct loadUB2L(iRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadUB mem))); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "LLGC $dst,$mem\t # zero-extend byte to long" %} + opcode(LLGC_ZOPC, LLGC_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +// CHAR/SHORT + +// Load Short (16bit signed) +instruct loadS(iRegI dst, memory mem) %{ + match(Set dst (LoadS mem)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP_SIZE); + format %{ "LH(Y) $dst,$mem\t # sign-extend short to int" %} + opcode(LHY_ZOPC, LH_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +// Load Short (16bit signed) +instruct loadS2L(iRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadS mem))); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "LGH $dst,$mem\t # sign-extend short to long" %} + opcode(LGH_ZOPC, LGH_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +// Load Char (16bit Unsigned) +instruct loadUS(iRegI dst, memory mem) %{ + match(Set dst (LoadUS mem)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "LLGH $dst,$mem\t # zero-extend short to int" %} + opcode(LLGH_ZOPC, LLGH_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +// Load Unsigned Short/Char (16bit UNsigned) into a Long Register. +instruct loadUS2L(iRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadUS mem))); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "LLGH $dst,$mem\t # zero-extend short to long" %} + opcode(LLGH_ZOPC, LLGH_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +// INT + +// Load Integer +instruct loadI(iRegI dst, memory mem) %{ + match(Set dst (LoadI mem)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP_SIZE); + format %{ "L(Y) $dst,$mem\t #" %} + opcode(LY_ZOPC, L_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +// Load and convert to long. +instruct loadI2L(iRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadI mem))); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "LGF $dst,$mem\t #" %} + opcode(LGF_ZOPC, LGF_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +// Load Unsigned Integer into a Long Register +instruct loadUI2L(iRegL dst, memory mem, immL_FFFFFFFF mask) %{ + match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "LLGF $dst,$mem\t # zero-extend int to long" %} + opcode(LLGF_ZOPC, LLGF_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +// range = array length (=jint) +// Load Range +instruct loadRange(iRegI dst, memory mem) %{ + match(Set dst (LoadRange mem)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP_SIZE); + format %{ "L(Y) $dst,$mem\t # range" %} + opcode(LY_ZOPC, L_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +// LONG + +// Load Long - aligned +instruct loadL(iRegL dst, memory mem) %{ + match(Set dst (LoadL mem)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "LG $dst,$mem\t # long" %} + opcode(LG_ZOPC, LG_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +// Load Long - UNaligned +instruct loadL_unaligned(iRegL dst, memory mem) %{ + match(Set dst (LoadL_unaligned mem)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "LG $dst,$mem\t # unaligned long" %} + opcode(LG_ZOPC, LG_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + + +// PTR + +// Load Pointer +instruct loadP(iRegP dst, memory mem) %{ + match(Set dst (LoadP mem)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "LG $dst,$mem\t # ptr" %} + opcode(LG_ZOPC, LG_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +// LoadP + CastP2L +instruct castP2X_loadP(iRegL dst, memory mem) %{ + match(Set dst (CastP2X (LoadP mem))); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "LG $dst,$mem\t # ptr + p2x" %} + opcode(LG_ZOPC, LG_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +// Load Klass Pointer +instruct loadKlass(iRegP dst, memory mem) %{ + match(Set dst (LoadKlass mem)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "LG $dst,$mem\t # klass ptr" %} + opcode(LG_ZOPC, LG_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +instruct loadTOC(iRegL dst) %{ + effect(DEF dst); + ins_cost(DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + // TODO: check why this attribute causes many unnecessary rematerializations. + // + // The graphs I saw just had high register pressure. Further the + // register TOC is loaded to is overwritten by the constant short + // after. Here something as round robin register allocation might + // help. But rematerializing seems not to hurt, jack even seems to + // improve slightly. + // + // Without this flag we get spill-split recycle sanity check + // failures in + // spec.benchmarks._228_jack.NfaState::GenerateCode. This happens in + // a block with three loadConP_dynTOC nodes and a tlsLoadP. The + // tlsLoadP has a huge amount of outs and forces the TOC down to the + // stack. Later tlsLoadP is rematerialized, leaving the register + // allocator with TOC on the stack and a badly placed reload. + ins_should_rematerialize(true); + format %{ "LARL $dst, &constant_pool\t; load dynTOC" %} + ins_encode %{ __ load_toc($dst$$Register); %} + ins_pipe(pipe_class_dummy); +%} + +// FLOAT + +// Load Float +instruct loadF(regF dst, memory mem) %{ + match(Set dst (LoadF mem)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP_SIZE); + format %{ "LE(Y) $dst,$mem" %} + opcode(LEY_ZOPC, LE_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +// DOUBLE + +// Load Double +instruct loadD(regD dst, memory mem) %{ + match(Set dst (LoadD mem)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP_SIZE); + format %{ "LD(Y) $dst,$mem" %} + opcode(LDY_ZOPC, LD_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +// Load Double - UNaligned +instruct loadD_unaligned(regD dst, memory mem) %{ + match(Set dst (LoadD_unaligned mem)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP_SIZE); + format %{ "LD(Y) $dst,$mem" %} + opcode(LDY_ZOPC, LD_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + + +//---------------------- +// IMMEDIATES +//---------------------- + +instruct loadConI(iRegI dst, immI src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST); + size(6); + format %{ "LGFI $dst,$src\t # (int)" %} + ins_encode %{ __ z_lgfi($dst$$Register, $src$$constant); %} // Sign-extend to 64 bit, it's at no cost. + ins_pipe(pipe_class_dummy); +%} + +instruct loadConI16(iRegI dst, immI16 src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST_LOW); + size(4); + format %{ "LGHI $dst,$src\t # (int)" %} + ins_encode %{ __ z_lghi($dst$$Register, $src$$constant); %} // Sign-extend to 64 bit, it's at no cost. + ins_pipe(pipe_class_dummy); +%} + +instruct loadConI_0(iRegI dst, immI_0 src, flagsReg cr) %{ + match(Set dst src); + effect(KILL cr); + ins_cost(DEFAULT_COST_LOW); + size(4); + format %{ "loadConI $dst,$src\t # (int) XGR because ZERO is loaded" %} + opcode(XGR_ZOPC); + ins_encode(z_rreform(dst, dst)); + ins_pipe(pipe_class_dummy); +%} + +instruct loadConUI16(iRegI dst, uimmI16 src) %{ + match(Set dst src); + // TODO: s390 port size(FIXED_SIZE); + format %{ "LLILL $dst,$src" %} + opcode(LLILL_ZOPC); + ins_encode(z_riform_unsigned(dst, src) ); + ins_pipe(pipe_class_dummy); +%} + +// Load long constant from TOC with pcrelative address. +instruct loadConL_pcrelTOC(iRegL dst, immL src) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST_LO); + size(6); + format %{ "LGRL $dst,[pcrelTOC]\t # load long $src from table" %} + ins_encode %{ + address long_address = __ long_constant($src$$constant); + if (long_address == NULL) { + Compile::current()->env()->record_out_of_memory_failure(); + return; + } + __ load_long_pcrelative($dst$$Register, long_address); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct loadConL32(iRegL dst, immL32 src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST); + size(6); + format %{ "LGFI $dst,$src\t # (long)" %} + ins_encode %{ __ z_lgfi($dst$$Register, $src$$constant); %} // Sign-extend to 64 bit, it's at no cost. + ins_pipe(pipe_class_dummy); +%} + +instruct loadConL16(iRegL dst, immL16 src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST_LOW); + size(4); + format %{ "LGHI $dst,$src\t # (long)" %} + ins_encode %{ __ z_lghi($dst$$Register, $src$$constant); %} // Sign-extend to 64 bit, it's at no cost. + ins_pipe(pipe_class_dummy); +%} + +instruct loadConL_0(iRegL dst, immL_0 src, flagsReg cr) %{ + match(Set dst src); + effect(KILL cr); + ins_cost(DEFAULT_COST_LOW); + format %{ "LoadConL $dst,$src\t # (long) XGR because ZERO is loaded" %} + opcode(XGR_ZOPC); + ins_encode(z_rreform(dst, dst)); + ins_pipe(pipe_class_dummy); +%} + +// Load ptr constant from TOC with pc relative address. +// Special handling for oop constants required. +instruct loadConP_pcrelTOC(iRegP dst, immP src) %{ + match(Set dst src); + ins_cost(MEMORY_REF_COST_LO); + size(6); + format %{ "LGRL $dst,[pcrelTOC]\t # load ptr $src from table" %} + ins_encode %{ + relocInfo::relocType constant_reloc = $src->constant_reloc(); + if (constant_reloc == relocInfo::oop_type) { + AddressLiteral a = __ allocate_oop_address((jobject)$src$$constant); + bool success = __ load_oop_from_toc($dst$$Register, a); + if (!success) { + Compile::current()->env()->record_out_of_memory_failure(); + return; + } + } else if (constant_reloc == relocInfo::metadata_type) { + AddressLiteral a = __ constant_metadata_address((Metadata *)$src$$constant); + address const_toc_addr = __ address_constant((address)a.value(), RelocationHolder::none); + if (const_toc_addr == NULL) { + Compile::current()->env()->record_out_of_memory_failure(); + return; + } + __ load_long_pcrelative($dst$$Register, const_toc_addr); + } else { // Non-oop pointers, e.g. card mark base, heap top. + address long_address = __ long_constant((jlong)$src$$constant); + if (long_address == NULL) { + Compile::current()->env()->record_out_of_memory_failure(); + return; + } + __ load_long_pcrelative($dst$$Register, long_address); + } + %} + ins_pipe(pipe_class_dummy); +%} + +// We don't use immP16 to avoid problems with oops. +instruct loadConP0(iRegP dst, immP0 src, flagsReg cr) %{ + match(Set dst src); + effect(KILL cr); + size(4); + format %{ "XGR $dst,$dst\t # NULL ptr" %} + opcode(XGR_ZOPC); + ins_encode(z_rreform(dst, dst)); + ins_pipe(pipe_class_dummy); +%} + +//----------Load Float Constant Instructions------------------------------------------------- + +// We may not specify this instruction via an `expand' rule. If we do, +// code selection will forget that this instruction needs a floating +// point constant inserted into the code buffer. So `Shorten_branches' +// will fail. +instruct loadConF_dynTOC(regF dst, immF src, flagsReg cr) %{ + match(Set dst src); + effect(KILL cr); + ins_cost(MEMORY_REF_COST); + size(6); + // If this instruction rematerializes, it prolongs the live range + // of the toc node, causing illegal graphs. + ins_cannot_rematerialize(true); + format %{ "LE(Y) $dst,$constantoffset[,$constanttablebase]\t # load FLOAT $src from table" %} + ins_encode %{ + __ load_float_largeoffset($dst$$FloatRegister, $constantoffset($src), $constanttablebase, Z_R1_scratch); + %} + ins_pipe(pipe_class_dummy); +%} + +// E may not specify this instruction via an `expand' rule. If we do, +// code selection will forget that this instruction needs a floating +// point constant inserted into the code buffer. So `Shorten_branches' +// will fail. +instruct loadConD_dynTOC(regD dst, immD src, flagsReg cr) %{ + match(Set dst src); + effect(KILL cr); + ins_cost(MEMORY_REF_COST); + size(6); + // If this instruction rematerializes, it prolongs the live range + // of the toc node, causing illegal graphs. + ins_cannot_rematerialize(true); + format %{ "LD(Y) $dst,$constantoffset[,$constanttablebase]\t # load DOUBLE $src from table" %} + ins_encode %{ + __ load_double_largeoffset($dst$$FloatRegister, $constantoffset($src), $constanttablebase, Z_R1_scratch); + %} + ins_pipe(pipe_class_dummy); +%} + +// Special case: Load Const 0.0F + +// There's a special instr to clear a FP register. +instruct loadConF0(regF dst, immFp0 src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST_LOW); + size(4); + format %{ "LZER $dst,$src\t # clear to zero" %} + opcode(LZER_ZOPC); + ins_encode(z_rreform(dst, Z_F0)); + ins_pipe(pipe_class_dummy); +%} + +// There's a special instr to clear a FP register. +instruct loadConD0(regD dst, immDp0 src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST_LOW); + size(4); + format %{ "LZDR $dst,$src\t # clear to zero" %} + opcode(LZDR_ZOPC); + ins_encode(z_rreform(dst, Z_F0)); + ins_pipe(pipe_class_dummy); +%} + + +//----------Store Instructions------------------------------------------------- + +// BYTE + +// Store Byte +instruct storeB(memory mem, iRegI src) %{ + match(Set mem (StoreB mem src)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP_SIZE); + format %{ "STC(Y) $src,$mem\t # byte" %} + opcode(STCY_ZOPC, STC_ZOPC); + ins_encode(z_form_rt_mem_opt(src, mem)); + ins_pipe(pipe_class_dummy); +%} + +instruct storeCM(memory mem, immI_0 src) %{ + match(Set mem (StoreCM mem src)); + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "STC(Y) $src,$mem\t # CMS card-mark byte (must be 0!)" %} + ins_encode %{ + guarantee($mem$$index$$Register != Z_R0, "content will not be used."); + if ($mem$$index$$Register != noreg) { + // Can't use clear_mem --> load const zero and store character. + __ load_const_optimized(Z_R0_scratch, (long)0); + if (Immediate::is_uimm12($mem$$disp)) { + __ z_stc(Z_R0_scratch, $mem$$Address); + } else { + __ z_stcy(Z_R0_scratch, $mem$$Address); + } + } else { + __ clear_mem(Address($mem$$Address), 1); + } + %} + ins_pipe(pipe_class_dummy); +%} + +// CHAR/SHORT + +// Store Char/Short +instruct storeC(memory mem, iRegI src) %{ + match(Set mem (StoreC mem src)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP_SIZE); + format %{ "STH(Y) $src,$mem\t # short" %} + opcode(STHY_ZOPC, STH_ZOPC); + ins_encode(z_form_rt_mem_opt(src, mem)); + ins_pipe(pipe_class_dummy); +%} + +// INT + +// Store Integer +instruct storeI(memory mem, iRegI src) %{ + match(Set mem (StoreI mem src)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP_SIZE); + format %{ "ST(Y) $src,$mem\t # int" %} + opcode(STY_ZOPC, ST_ZOPC); + ins_encode(z_form_rt_mem_opt(src, mem)); + ins_pipe(pipe_class_dummy); +%} + +// LONG + +// Store Long +instruct storeL(memory mem, iRegL src) %{ + match(Set mem (StoreL mem src)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "STG $src,$mem\t # long" %} + opcode(STG_ZOPC, STG_ZOPC); + ins_encode(z_form_rt_mem_opt(src, mem)); + ins_pipe(pipe_class_dummy); +%} + +// PTR + +// Store Pointer +instruct storeP(memory dst, memoryRegP src) %{ + match(Set dst (StoreP dst src)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "STG $src,$dst\t # ptr" %} + opcode(STG_ZOPC, STG_ZOPC); + ins_encode(z_form_rt_mem_opt(src, dst)); + ins_pipe(pipe_class_dummy); +%} + +// FLOAT + +// Store Float +instruct storeF(memory mem, regF src) %{ + match(Set mem (StoreF mem src)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP_SIZE); + format %{ "STE(Y) $src,$mem\t # float" %} + opcode(STEY_ZOPC, STE_ZOPC); + ins_encode(z_form_rt_mem_opt(src, mem)); + ins_pipe(pipe_class_dummy); +%} + +// DOUBLE + +// Store Double +instruct storeD(memory mem, regD src) %{ + match(Set mem (StoreD mem src)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP_SIZE); + format %{ "STD(Y) $src,$mem\t # double" %} + opcode(STDY_ZOPC, STD_ZOPC); + ins_encode(z_form_rt_mem_opt(src, mem)); + ins_pipe(pipe_class_dummy); +%} + +// Prefetch instructions. Must be safe to execute with invalid address (cannot fault). + +// Should support match rule for PrefetchAllocation. +// Still needed after 8068977 for PrefetchAllocate. +instruct prefetchAlloc(memory mem) %{ + match(PrefetchAllocation mem); + predicate(VM_Version::has_Prefetch()); + ins_cost(DEFAULT_COST); + format %{ "PREFETCH 2, $mem\t # Prefetch allocation, z10 only" %} + ins_encode %{ __ z_pfd(0x02, $mem$$Address); %} + ins_pipe(pipe_class_dummy); +%} + +//----------Memory init instructions------------------------------------------ + +// Move Immediate to 1-byte memory. +instruct memInitB(memoryRSY mem, immI8 src) %{ + match(Set mem (StoreB mem src)); + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MVI $mem,$src\t # direct mem init 1" %} + ins_encode %{ + if (Immediate::is_uimm12((long)$mem$$disp)) { + __ z_mvi($mem$$Address, $src$$constant); + } else { + __ z_mviy($mem$$Address, $src$$constant); + } + %} + ins_pipe(pipe_class_dummy); +%} + +// Move Immediate to 2-byte memory. +instruct memInitC(memoryRS mem, immI16 src) %{ + match(Set mem (StoreC mem src)); + ins_cost(MEMORY_REF_COST); + size(6); + format %{ "MVHHI $mem,$src\t # direct mem init 2" %} + opcode(MVHHI_ZOPC); + ins_encode(z_silform(mem, src)); + ins_pipe(pipe_class_dummy); +%} + +// Move Immediate to 4-byte memory. +instruct memInitI(memoryRS mem, immI16 src) %{ + match(Set mem (StoreI mem src)); + ins_cost(MEMORY_REF_COST); + size(6); + format %{ "MVHI $mem,$src\t # direct mem init 4" %} + opcode(MVHI_ZOPC); + ins_encode(z_silform(mem, src)); + ins_pipe(pipe_class_dummy); +%} + + +// Move Immediate to 8-byte memory. +instruct memInitL(memoryRS mem, immL16 src) %{ + match(Set mem (StoreL mem src)); + ins_cost(MEMORY_REF_COST); + size(6); + format %{ "MVGHI $mem,$src\t # direct mem init 8" %} + opcode(MVGHI_ZOPC); + ins_encode(z_silform(mem, src)); + ins_pipe(pipe_class_dummy); +%} + +// Move Immediate to 8-byte memory. +instruct memInitP(memoryRS mem, immP16 src) %{ + match(Set mem (StoreP mem src)); + ins_cost(MEMORY_REF_COST); + size(6); + format %{ "MVGHI $mem,$src\t # direct mem init 8" %} + opcode(MVGHI_ZOPC); + ins_encode(z_silform(mem, src)); + ins_pipe(pipe_class_dummy); +%} + + +//----------Instructions for compressed pointers (cOop and NKlass)------------- + +// See cOop encoding classes for elaborate comment. + +// Moved here because it is needed in expand rules for encode. +// Long negation. +instruct negL_reg_reg(iRegL dst, immL_0 zero, iRegL src, flagsReg cr) %{ + match(Set dst (SubL zero src)); + effect(KILL cr); + size(4); + format %{ "NEG $dst, $src\t # long" %} + ins_encode %{ __ z_lcgr($dst$$Register, $src$$Register); %} + ins_pipe(pipe_class_dummy); +%} + +// Load Compressed Pointer + +// Load narrow oop +instruct loadN(iRegN dst, memory mem) %{ + match(Set dst (LoadN mem)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "LoadN $dst,$mem\t# (cOop)" %} + opcode(LLGF_ZOPC, LLGF_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +// Load narrow Klass Pointer +instruct loadNKlass(iRegN dst, memory mem) %{ + match(Set dst (LoadNKlass mem)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "LoadNKlass $dst,$mem\t# (klass cOop)" %} + opcode(LLGF_ZOPC, LLGF_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +// Load constant Compressed Pointer + +instruct loadConN(iRegN dst, immN src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST); + size(6); + format %{ "loadConN $dst,$src\t # (cOop)" %} + ins_encode %{ + AddressLiteral cOop = __ constant_oop_address((jobject)$src$$constant); + __ relocate(cOop.rspec(), 1); + __ load_narrow_oop($dst$$Register, (narrowOop)cOop.value()); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct loadConN0(iRegN dst, immN0 src, flagsReg cr) %{ + match(Set dst src); + effect(KILL cr); + ins_cost(DEFAULT_COST_LOW); + size(4); + format %{ "loadConN $dst,$src\t # (cOop) XGR because ZERO is loaded" %} + opcode(XGR_ZOPC); + ins_encode(z_rreform(dst, dst)); + ins_pipe(pipe_class_dummy); +%} + +instruct loadConNKlass(iRegN dst, immNKlass src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST); + size(6); + format %{ "loadConNKlass $dst,$src\t # (cKlass)" %} + ins_encode %{ + AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant); + __ relocate(NKlass.rspec(), 1); + __ load_narrow_klass($dst$$Register, (Klass*)NKlass.value()); + %} + ins_pipe(pipe_class_dummy); +%} + +// Load and Decode Compressed Pointer +// optimized variants for Unscaled cOops + +instruct decodeLoadN(iRegP dst, memory mem) %{ + match(Set dst (DecodeN (LoadN mem))); + predicate(false && (Universe::narrow_oop_base()==NULL)&&(Universe::narrow_oop_shift()==0)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "DecodeLoadN $dst,$mem\t# (cOop Load+Decode)" %} + opcode(LLGF_ZOPC, LLGF_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +instruct decodeLoadNKlass(iRegP dst, memory mem) %{ + match(Set dst (DecodeNKlass (LoadNKlass mem))); + predicate(false && (Universe::narrow_klass_base()==NULL)&&(Universe::narrow_klass_shift()==0)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "DecodeLoadNKlass $dst,$mem\t# (load/decode NKlass)" %} + opcode(LLGF_ZOPC, LLGF_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +instruct decodeLoadConNKlass(iRegP dst, immNKlass src) %{ + match(Set dst (DecodeNKlass src)); + ins_cost(3 * DEFAULT_COST); + size(12); + format %{ "DecodeLoadConNKlass $dst,$src\t # decode(cKlass)" %} + ins_encode %{ + AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant); + __ relocate(NKlass.rspec(), 1); + __ load_const($dst$$Register, (Klass*)NKlass.value()); + %} + ins_pipe(pipe_class_dummy); +%} + +// Decode Compressed Pointer + +// General decoder +instruct decodeN(iRegP dst, iRegN src, flagsReg cr) %{ + match(Set dst (DecodeN src)); + effect(KILL cr); + predicate(Universe::narrow_oop_base() == NULL || !ExpandLoadingBaseDecode); + ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "decodeN $dst,$src\t# (decode cOop)" %} + ins_encode %{ __ oop_decoder($dst$$Register, $src$$Register, true); %} + ins_pipe(pipe_class_dummy); +%} + +// General Klass decoder +instruct decodeKlass(iRegP dst, iRegN src, flagsReg cr) %{ + match(Set dst (DecodeNKlass src)); + effect(KILL cr); + ins_cost(3 * DEFAULT_COST); + format %{ "decode_klass $dst,$src" %} + ins_encode %{ __ decode_klass_not_null($dst$$Register, $src$$Register); %} + ins_pipe(pipe_class_dummy); +%} + +// General decoder +instruct decodeN_NN(iRegP dst, iRegN src, flagsReg cr) %{ + match(Set dst (DecodeN src)); + effect(KILL cr); + predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull || + n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) && + (Universe::narrow_oop_base()== NULL || !ExpandLoadingBaseDecode_NN)); + ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "decodeN $dst,$src\t# (decode cOop NN)" %} + ins_encode %{ __ oop_decoder($dst$$Register, $src$$Register, false); %} + ins_pipe(pipe_class_dummy); +%} + + instruct loadBase(iRegL dst, immL baseImm) %{ + effect(DEF dst, USE baseImm); + predicate(false); + format %{ "llihl $dst=$baseImm \t// load heap base" %} + ins_encode %{ __ get_oop_base($dst$$Register, $baseImm$$constant); %} + ins_pipe(pipe_class_dummy); + %} + + // Decoder for heapbased mode peeling off loading the base. + instruct decodeN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{ + match(Set dst (DecodeN src base)); + // Note: Effect TEMP dst was used with the intention to get + // different regs for dst and base, but this has caused ADLC to + // generate wrong code. Oop_decoder generates additional lgr when + // dst==base. + effect(KILL cr); + predicate(false); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "decodeN $dst = ($src == 0) ? NULL : ($src << 3) + $base + pow2_offset\t# (decode cOop)" %} + ins_encode %{ + __ oop_decoder($dst$$Register, $src$$Register, true, $base$$Register, + (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)Universe::narrow_oop_base())); + %} + ins_pipe(pipe_class_dummy); + %} + + // Decoder for heapbased mode peeling off loading the base. + instruct decodeN_NN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{ + match(Set dst (DecodeN src base)); + effect(KILL cr); + predicate(false); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "decodeN $dst = ($src << 3) + $base + pow2_offset\t# (decode cOop)" %} + ins_encode %{ + __ oop_decoder($dst$$Register, $src$$Register, false, $base$$Register, + (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)Universe::narrow_oop_base())); + %} + ins_pipe(pipe_class_dummy); + %} + +// Decoder for heapbased mode peeling off loading the base. +instruct decodeN_Ex(iRegP dst, iRegN src, flagsReg cr) %{ + match(Set dst (DecodeN src)); + predicate(Universe::narrow_oop_base() != NULL && ExpandLoadingBaseDecode); + ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + expand %{ + immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %} + iRegL base; + loadBase(base, baseImm); + decodeN_base(dst, src, base, cr); + %} +%} + +// Decoder for heapbased mode peeling off loading the base. +instruct decodeN_NN_Ex(iRegP dst, iRegN src, flagsReg cr) %{ + match(Set dst (DecodeN src)); + predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull || + n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) && + Universe::narrow_oop_base() != NULL && ExpandLoadingBaseDecode_NN); + ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST); + // TODO: s390 port size(VARIABLE_SIZE); + expand %{ + immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %} + iRegL base; + loadBase(base, baseImm); + decodeN_NN_base(dst, src, base, cr); + %} +%} + +// Encode Compressed Pointer + +// General encoder +instruct encodeP(iRegN dst, iRegP src, flagsReg cr) %{ + match(Set dst (EncodeP src)); + effect(KILL cr); + predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) && + (Universe::narrow_oop_base() == 0 || + Universe::narrow_oop_base_disjoint() || + !ExpandLoadingBaseEncode)); + ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "encodeP $dst,$src\t# (encode cOop)" %} + ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, true, Z_R1_scratch, -1, all_outs_are_Stores(this)); %} + ins_pipe(pipe_class_dummy); +%} + +// General class encoder +instruct encodeKlass(iRegN dst, iRegP src, flagsReg cr) %{ + match(Set dst (EncodePKlass src)); + effect(KILL cr); + format %{ "encode_klass $dst,$src" %} + ins_encode %{ __ encode_klass_not_null($dst$$Register, $src$$Register); %} + ins_pipe(pipe_class_dummy); +%} + +instruct encodeP_NN(iRegN dst, iRegP src, flagsReg cr) %{ + match(Set dst (EncodeP src)); + effect(KILL cr); + predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull) && + (Universe::narrow_oop_base() == 0 || + Universe::narrow_oop_base_disjoint() || + !ExpandLoadingBaseEncode_NN)); + ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "encodeP $dst,$src\t# (encode cOop)" %} + ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, Z_R1_scratch, -1, all_outs_are_Stores(this)); %} + ins_pipe(pipe_class_dummy); +%} + + // Encoder for heapbased mode peeling off loading the base. + instruct encodeP_base(iRegN dst, iRegP src, iRegL base) %{ + match(Set dst (EncodeP src (Binary base dst))); + effect(TEMP_DEF dst); + predicate(false); + ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "encodeP $dst = ($src>>3) +$base + pow2_offset\t# (encode cOop)" %} + ins_encode %{ + jlong offset = -(jlong)MacroAssembler::get_oop_base_pow2_offset + (((uint64_t)(intptr_t)Universe::narrow_oop_base()) >> Universe::narrow_oop_shift()); + __ oop_encoder($dst$$Register, $src$$Register, true, $base$$Register, offset); + %} + ins_pipe(pipe_class_dummy); + %} + + // Encoder for heapbased mode peeling off loading the base. + instruct encodeP_NN_base(iRegN dst, iRegP src, iRegL base, immL pow2_offset) %{ + match(Set dst (EncodeP src base)); + effect(USE pow2_offset); + predicate(false); + ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "encodeP $dst = ($src>>3) +$base + $pow2_offset\t# (encode cOop)" %} + ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, $base$$Register, $pow2_offset$$constant); %} + ins_pipe(pipe_class_dummy); + %} + +// Encoder for heapbased mode peeling off loading the base. +instruct encodeP_Ex(iRegN dst, iRegP src, flagsReg cr) %{ + match(Set dst (EncodeP src)); + effect(KILL cr); + predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) && + (Universe::narrow_oop_base_overlaps() && ExpandLoadingBaseEncode)); + ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST); + // TODO: s390 port size(VARIABLE_SIZE); + expand %{ + immL baseImm %{ ((jlong)(intptr_t)Universe::narrow_oop_base()) >> Universe::narrow_oop_shift() %} + immL_0 zero %{ (0) %} + flagsReg ccr; + iRegL base; + iRegL negBase; + loadBase(base, baseImm); + negL_reg_reg(negBase, zero, base, ccr); + encodeP_base(dst, src, negBase); + %} +%} + +// Encoder for heapbased mode peeling off loading the base. +instruct encodeP_NN_Ex(iRegN dst, iRegP src, flagsReg cr) %{ + match(Set dst (EncodeP src)); + effect(KILL cr); + predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull) && + (Universe::narrow_oop_base_overlaps() && ExpandLoadingBaseEncode_NN)); + ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST); + // TODO: s390 port size(VARIABLE_SIZE); + expand %{ + immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %} + immL pow2_offset %{ -(jlong)MacroAssembler::get_oop_base_pow2_offset(((uint64_t)(intptr_t)Universe::narrow_oop_base())) %} + immL_0 zero %{ 0 %} + flagsReg ccr; + iRegL base; + iRegL negBase; + loadBase(base, baseImm); + negL_reg_reg(negBase, zero, base, ccr); + encodeP_NN_base(dst, src, negBase, pow2_offset); + %} +%} + +// Store Compressed Pointer + +// Store Compressed Pointer +instruct storeN(memory mem, iRegN_P2N src) %{ + match(Set mem (StoreN mem src)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP_SIZE); + format %{ "ST $src,$mem\t# (cOop)" %} + opcode(STY_ZOPC, ST_ZOPC); + ins_encode(z_form_rt_mem_opt(src, mem)); + ins_pipe(pipe_class_dummy); +%} + +// Store Compressed Klass pointer +instruct storeNKlass(memory mem, iRegN src) %{ + match(Set mem (StoreNKlass mem src)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP_SIZE); + format %{ "ST $src,$mem\t# (cKlass)" %} + opcode(STY_ZOPC, ST_ZOPC); + ins_encode(z_form_rt_mem_opt(src, mem)); + ins_pipe(pipe_class_dummy); +%} + +// Compare Compressed Pointers + +instruct compN_iRegN(iRegN_P2N src1, iRegN_P2N src2, flagsReg cr) %{ + match(Set cr (CmpN src1 src2)); + ins_cost(DEFAULT_COST); + size(2); + format %{ "CLR $src1,$src2\t# (cOop)" %} + opcode(CLR_ZOPC); + ins_encode(z_rrform(src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +instruct compN_iRegN_immN(iRegN_P2N src1, immN src2, flagsReg cr) %{ + match(Set cr (CmpN src1 src2)); + ins_cost(DEFAULT_COST); + size(6); + format %{ "CLFI $src1,$src2\t# (cOop) compare immediate narrow" %} + ins_encode %{ + AddressLiteral cOop = __ constant_oop_address((jobject)$src2$$constant); + __ relocate(cOop.rspec(), 1); + __ compare_immediate_narrow_oop($src1$$Register, (narrowOop)cOop.value()); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct compNKlass_iRegN_immN(iRegN src1, immNKlass src2, flagsReg cr) %{ + match(Set cr (CmpN src1 src2)); + ins_cost(DEFAULT_COST); + size(6); + format %{ "CLFI $src1,$src2\t# (NKlass) compare immediate narrow" %} + ins_encode %{ + AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src2$$constant); + __ relocate(NKlass.rspec(), 1); + __ compare_immediate_narrow_klass($src1$$Register, (Klass*)NKlass.value()); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct compN_iRegN_immN0(iRegN_P2N src1, immN0 src2, flagsReg cr) %{ + match(Set cr (CmpN src1 src2)); + ins_cost(DEFAULT_COST); + size(2); + format %{ "LTR $src1,$src2\t# (cOop) LTR because comparing against zero" %} + opcode(LTR_ZOPC); + ins_encode(z_rrform(src1, src1)); + ins_pipe(pipe_class_dummy); +%} + + +//----------MemBar Instructions----------------------------------------------- + +// Memory barrier flavors + +instruct membar_acquire() %{ + match(MemBarAcquire); + match(LoadFence); + ins_cost(4*MEMORY_REF_COST); + size(0); + format %{ "MEMBAR-acquire" %} + ins_encode %{ __ z_acquire(); %} + ins_pipe(pipe_class_dummy); +%} + +instruct membar_acquire_lock() %{ + match(MemBarAcquireLock); + ins_cost(0); + size(0); + format %{ "MEMBAR-acquire (CAS in prior FastLock so empty encoding)" %} + ins_encode(/*empty*/); + ins_pipe(pipe_class_dummy); +%} + +instruct membar_release() %{ + match(MemBarRelease); + match(StoreFence); + ins_cost(4 * MEMORY_REF_COST); + size(0); + format %{ "MEMBAR-release" %} + ins_encode %{ __ z_release(); %} + ins_pipe(pipe_class_dummy); +%} + +instruct membar_release_lock() %{ + match(MemBarReleaseLock); + ins_cost(0); + size(0); + format %{ "MEMBAR-release (CAS in succeeding FastUnlock so empty encoding)" %} + ins_encode(/*empty*/); + ins_pipe(pipe_class_dummy); +%} + +instruct membar_volatile() %{ + match(MemBarVolatile); + ins_cost(4 * MEMORY_REF_COST); + size(2); + format %{ "MEMBAR-volatile" %} + ins_encode %{ __ z_fence(); %} + ins_pipe(pipe_class_dummy); +%} + +instruct unnecessary_membar_volatile() %{ + match(MemBarVolatile); + predicate(Matcher::post_store_load_barrier(n)); + ins_cost(0); + size(0); + format %{ "# MEMBAR-volatile (empty)" %} + ins_encode(/*empty*/); + ins_pipe(pipe_class_dummy); +%} + +instruct membar_CPUOrder() %{ + match(MemBarCPUOrder); + ins_cost(0); + // TODO: s390 port size(FIXED_SIZE); + format %{ "MEMBAR-CPUOrder (empty)" %} + ins_encode(/*empty*/); + ins_pipe(pipe_class_dummy); +%} + +instruct membar_storestore() %{ + match(MemBarStoreStore); + ins_cost(0); + size(0); + format %{ "MEMBAR-storestore (empty)" %} + ins_encode(); + ins_pipe(pipe_class_dummy); +%} + + +//----------Register Move Instructions----------------------------------------- +instruct roundDouble_nop(regD dst) %{ + match(Set dst (RoundDouble dst)); + ins_cost(0); + // TODO: s390 port size(FIXED_SIZE); + // z/Architecture results are already "rounded" (i.e., normal-format IEEE). + ins_encode(); + ins_pipe(pipe_class_dummy); +%} + +instruct roundFloat_nop(regF dst) %{ + match(Set dst (RoundFloat dst)); + ins_cost(0); + // TODO: s390 port size(FIXED_SIZE); + // z/Architecture results are already "rounded" (i.e., normal-format IEEE). + ins_encode(); + ins_pipe(pipe_class_dummy); +%} + +// Cast Long to Pointer for unsafe natives. +instruct castX2P(iRegP dst, iRegL src) %{ + match(Set dst (CastX2P src)); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "LGR $dst,$src\t # CastX2P" %} + ins_encode %{ __ lgr_if_needed($dst$$Register, $src$$Register); %} + ins_pipe(pipe_class_dummy); +%} + +// Cast Pointer to Long for unsafe natives. +instruct castP2X(iRegL dst, iRegP_N2P src) %{ + match(Set dst (CastP2X src)); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "LGR $dst,$src\t # CastP2X" %} + ins_encode %{ __ lgr_if_needed($dst$$Register, $src$$Register); %} + ins_pipe(pipe_class_dummy); +%} + +instruct stfSSD(stackSlotD stkSlot, regD src) %{ + // %%%% TODO: Tell the coalescer that this kind of node is a copy! + match(Set stkSlot src); // chain rule + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ " STD $src,$stkSlot\t # stk" %} + opcode(STD_ZOPC); + ins_encode(z_form_rt_mem(src, stkSlot)); + ins_pipe(pipe_class_dummy); +%} + +instruct stfSSF(stackSlotF stkSlot, regF src) %{ + // %%%% TODO: Tell the coalescer that this kind of node is a copy! + match(Set stkSlot src); // chain rule + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "STE $src,$stkSlot\t # stk" %} + opcode(STE_ZOPC); + ins_encode(z_form_rt_mem(src, stkSlot)); + ins_pipe(pipe_class_dummy); +%} + +//----------Conditional Move--------------------------------------------------- + +instruct cmovN_reg(cmpOp cmp, flagsReg cr, iRegN dst, iRegN_P2N src) %{ + match(Set dst (CMoveN (Binary cmp cr) (Binary dst src))); + ins_cost(DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "CMoveN,$cmp $dst,$src" %} + ins_encode(z_enc_cmov_reg(cmp,dst,src)); + ins_pipe(pipe_class_dummy); +%} + +instruct cmovN_imm(cmpOp cmp, flagsReg cr, iRegN dst, immN0 src) %{ + match(Set dst (CMoveN (Binary cmp cr) (Binary dst src))); + ins_cost(DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "CMoveN,$cmp $dst,$src" %} + ins_encode(z_enc_cmov_imm(cmp,dst,src)); + ins_pipe(pipe_class_dummy); +%} + +instruct cmovI_reg(cmpOp cmp, flagsReg cr, iRegI dst, iRegI src) %{ + match(Set dst (CMoveI (Binary cmp cr) (Binary dst src))); + ins_cost(DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "CMoveI,$cmp $dst,$src" %} + ins_encode(z_enc_cmov_reg(cmp,dst,src)); + ins_pipe(pipe_class_dummy); +%} + +instruct cmovI_imm(cmpOp cmp, flagsReg cr, iRegI dst, immI16 src) %{ + match(Set dst (CMoveI (Binary cmp cr) (Binary dst src))); + ins_cost(DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "CMoveI,$cmp $dst,$src" %} + ins_encode(z_enc_cmov_imm(cmp,dst,src)); + ins_pipe(pipe_class_dummy); +%} + +instruct cmovP_reg(cmpOp cmp, flagsReg cr, iRegP dst, iRegP_N2P src) %{ + match(Set dst (CMoveP (Binary cmp cr) (Binary dst src))); + ins_cost(DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "CMoveP,$cmp $dst,$src" %} + ins_encode(z_enc_cmov_reg(cmp,dst,src)); + ins_pipe(pipe_class_dummy); +%} + +instruct cmovP_imm(cmpOp cmp, flagsReg cr, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp cr) (Binary dst src))); + ins_cost(DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "CMoveP,$cmp $dst,$src" %} + ins_encode(z_enc_cmov_imm(cmp,dst,src)); + ins_pipe(pipe_class_dummy); +%} + +instruct cmovF_reg(cmpOpF cmp, flagsReg cr, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cmp cr) (Binary dst src))); + ins_cost(DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "CMoveF,$cmp $dst,$src" %} + ins_encode %{ + // Don't emit code if operands are identical (same register). + if ($dst$$FloatRegister != $src$$FloatRegister) { + Label done; + __ z_brc(Assembler::inverse_float_condition((Assembler::branch_condition)$cmp$$cmpcode), done); + __ z_ler($dst$$FloatRegister, $src$$FloatRegister); + __ bind(done); + } + %} + ins_pipe(pipe_class_dummy); +%} + +instruct cmovD_reg(cmpOpF cmp, flagsReg cr, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cmp cr) (Binary dst src))); + ins_cost(DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "CMoveD,$cmp $dst,$src" %} + ins_encode %{ + // Don't emit code if operands are identical (same register). + if ($dst$$FloatRegister != $src$$FloatRegister) { + Label done; + __ z_brc(Assembler::inverse_float_condition((Assembler::branch_condition)$cmp$$cmpcode), done); + __ z_ldr($dst$$FloatRegister, $src$$FloatRegister); + __ bind(done); + } + %} + ins_pipe(pipe_class_dummy); +%} + +instruct cmovL_reg(cmpOp cmp, flagsReg cr, iRegL dst, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp cr) (Binary dst src))); + ins_cost(DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "CMoveL,$cmp $dst,$src" %} + ins_encode(z_enc_cmov_reg(cmp,dst,src)); + ins_pipe(pipe_class_dummy); +%} + +instruct cmovL_imm(cmpOp cmp, flagsReg cr, iRegL dst, immL16 src) %{ + match(Set dst (CMoveL (Binary cmp cr) (Binary dst src))); + ins_cost(DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "CMoveL,$cmp $dst,$src" %} + ins_encode(z_enc_cmov_imm(cmp,dst,src)); + ins_pipe(pipe_class_dummy); +%} + +//----------OS and Locking Instructions---------------------------------------- + +// This name is KNOWN by the ADLC and cannot be changed. +// The ADLC forces a 'TypeRawPtr::BOTTOM' output type +// for this guy. +instruct tlsLoadP(threadRegP dst) %{ + match(Set dst (ThreadLocal)); + ins_cost(0); + size(0); + ins_should_rematerialize(true); + format %{ "# $dst=ThreadLocal" %} + ins_encode(/* empty */); + ins_pipe(pipe_class_dummy); +%} + +instruct checkCastPP(iRegP dst) %{ + match(Set dst (CheckCastPP dst)); + size(0); + format %{ "# checkcastPP of $dst" %} + ins_encode(/*empty*/); + ins_pipe(pipe_class_dummy); +%} + +instruct castPP(iRegP dst) %{ + match(Set dst (CastPP dst)); + size(0); + format %{ "# castPP of $dst" %} + ins_encode(/*empty*/); + ins_pipe(pipe_class_dummy); +%} + +instruct castII(iRegI dst) %{ + match(Set dst (CastII dst)); + size(0); + format %{ "# castII of $dst" %} + ins_encode(/*empty*/); + ins_pipe(pipe_class_dummy); +%} + + +//----------Conditional_store-------------------------------------------------- +// Conditional-store of the updated heap-top. +// Used during allocation of the shared heap. +// Sets flags (EQ) on success. + +// Implement LoadPLocked. Must be ordered against changes of the memory location +// by storePConditional. +// Don't know whether this is ever used. +instruct loadPLocked(iRegP dst, memory mem) %{ + match(Set dst (LoadPLocked mem)); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "LG $dst,$mem\t # LoadPLocked" %} + opcode(LG_ZOPC, LG_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +// As compareAndSwapP, but return flag register instead of boolean value in +// int register. +// This instruction is matched if UseTLAB is off. Needed to pass +// option tests. Mem_ptr must be a memory operand, else this node +// does not get Flag_needs_anti_dependence_check set by adlc. If this +// is not set this node can be rematerialized which leads to errors. +instruct storePConditional(indirect mem_ptr, rarg5RegP oldval, iRegP_N2P newval, flagsReg cr) %{ + match(Set cr (StorePConditional mem_ptr (Binary oldval newval))); + effect(KILL oldval); + // TODO: s390 port size(FIXED_SIZE); + format %{ "storePConditional $oldval,$newval,$mem_ptr" %} + ins_encode(z_enc_casL(oldval, newval, mem_ptr)); + ins_pipe(pipe_class_dummy); +%} + +// As compareAndSwapL, but return flag register instead of boolean value in +// int register. +// Used by sun/misc/AtomicLongCSImpl.java. Mem_ptr must be a memory +// operand, else this node does not get +// Flag_needs_anti_dependence_check set by adlc. If this is not set +// this node can be rematerialized which leads to errors. +instruct storeLConditional(indirect mem_ptr, rarg5RegL oldval, iRegL newval, flagsReg cr) %{ + match(Set cr (StoreLConditional mem_ptr (Binary oldval newval))); + effect(KILL oldval); + // TODO: s390 port size(FIXED_SIZE); + format %{ "storePConditional $oldval,$newval,$mem_ptr" %} + ins_encode(z_enc_casL(oldval, newval, mem_ptr)); + ins_pipe(pipe_class_dummy); +%} + +// No flag versions for CompareAndSwap{P,I,L,N} because matcher can't match them. + +instruct compareAndSwapI_bool(iRegP mem_ptr, rarg5RegI oldval, iRegI newval, iRegI res, flagsReg cr) %{ + match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); + effect(USE mem_ptr, USE_KILL oldval, KILL cr); + size(16); + format %{ "$res = CompareAndSwapI $oldval,$newval,$mem_ptr" %} + ins_encode(z_enc_casI(oldval, newval, mem_ptr), + z_enc_cctobool(res)); + ins_pipe(pipe_class_dummy); +%} + +instruct compareAndSwapL_bool(iRegP mem_ptr, rarg5RegL oldval, iRegL newval, iRegI res, flagsReg cr) %{ + match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); + effect(USE mem_ptr, USE_KILL oldval, KILL cr); + size(18); + format %{ "$res = CompareAndSwapL $oldval,$newval,$mem_ptr" %} + ins_encode(z_enc_casL(oldval, newval, mem_ptr), + z_enc_cctobool(res)); + ins_pipe(pipe_class_dummy); +%} + +instruct compareAndSwapP_bool(iRegP mem_ptr, rarg5RegP oldval, iRegP_N2P newval, iRegI res, flagsReg cr) %{ + match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); + effect(USE mem_ptr, USE_KILL oldval, KILL cr); + size(18); + format %{ "$res = CompareAndSwapP $oldval,$newval,$mem_ptr" %} + ins_encode(z_enc_casL(oldval, newval, mem_ptr), + z_enc_cctobool(res)); + ins_pipe(pipe_class_dummy); +%} + +instruct compareAndSwapN_bool(iRegP mem_ptr, rarg5RegN oldval, iRegN_P2N newval, iRegI res, flagsReg cr) %{ + match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); + effect(USE mem_ptr, USE_KILL oldval, KILL cr); + size(16); + format %{ "$res = CompareAndSwapN $oldval,$newval,$mem_ptr" %} + ins_encode(z_enc_casI(oldval, newval, mem_ptr), + z_enc_cctobool(res)); + ins_pipe(pipe_class_dummy); +%} + +//----------Atomic operations on memory (GetAndSet*, GetAndAdd*)--------------- + +// Exploit: direct memory arithmetic +// Prereqs: - instructions available +// - instructions guarantee atomicity +// - immediate operand to be added +// - immediate operand is small enough (8-bit signed). +// - result of instruction is not used +instruct addI_mem_imm8_atomic_no_res(memoryRSY mem, Universe dummy, immI8 src, flagsReg cr) %{ + match(Set dummy (GetAndAddI mem src)); + effect(KILL cr); + predicate(VM_Version::has_AtomicMemWithImmALUOps() && n->as_LoadStore()->result_not_used()); + ins_cost(MEMORY_REF_COST); + size(6); + format %{ "ASI [$mem],$src\t # GetAndAddI (atomic)" %} + opcode(ASI_ZOPC); + ins_encode(z_siyform(mem, src)); + ins_pipe(pipe_class_dummy); +%} + +// Fallback: direct memory arithmetic not available +// Disadvantages: - CS-Loop required, very expensive. +// - more code generated (26 to xx bytes vs. 6 bytes) +instruct addI_mem_imm16_atomic(memoryRSY mem, iRegI dst, immI16 src, iRegI tmp, flagsReg cr) %{ + match(Set dst (GetAndAddI mem src)); + effect(KILL cr, TEMP_DEF dst, TEMP tmp); + ins_cost(MEMORY_REF_COST+100*DEFAULT_COST); + format %{ "BEGIN ATOMIC {\n\t" + " LGF $dst,[$mem]\n\t" + " AHIK $tmp,$dst,$src\n\t" + " CSY $dst,$tmp,$mem\n\t" + " retry if failed\n\t" + "} END ATOMIC" + %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rtmp = $tmp$$Register; + int Isrc = $src$$constant; + Label retry; + + // Iterate until update with incremented value succeeds. + __ z_lgf(Rdst, $mem$$Address); // current contents + __ bind(retry); + // Calculate incremented value. + if (VM_Version::has_DistinctOpnds()) { + __ z_ahik(Rtmp, Rdst, Isrc); + } else { + __ z_lr(Rtmp, Rdst); + __ z_ahi(Rtmp, Isrc); + } + // Swap into memory location. + __ z_csy(Rdst, Rtmp, $mem$$Address); // Try to store new value. + __ z_brne(retry); // Yikes, concurrent update, need to retry. + %} + ins_pipe(pipe_class_dummy); +%} + +instruct addI_mem_imm32_atomic(memoryRSY mem, iRegI dst, immI src, iRegI tmp, flagsReg cr) %{ + match(Set dst (GetAndAddI mem src)); + effect(KILL cr, TEMP_DEF dst, TEMP tmp); + ins_cost(MEMORY_REF_COST+200*DEFAULT_COST); + format %{ "BEGIN ATOMIC {\n\t" + " LGF $dst,[$mem]\n\t" + " LGR $tmp,$dst\n\t" + " AFI $tmp,$src\n\t" + " CSY $dst,$tmp,$mem\n\t" + " retry if failed\n\t" + "} END ATOMIC" + %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rtmp = $tmp$$Register; + int Isrc = $src$$constant; + Label retry; + + // Iterate until update with incremented value succeeds. + __ z_lgf(Rdst, $mem$$Address); // current contents + __ bind(retry); + // Calculate incremented value. + __ z_lr(Rtmp, Rdst); + __ z_afi(Rtmp, Isrc); + // Swap into memory location. + __ z_csy(Rdst, Rtmp, $mem$$Address); // Try to store new value. + __ z_brne(retry); // Yikes, concurrent update, need to retry. + %} + ins_pipe(pipe_class_dummy); +%} + +instruct addI_mem_reg_atomic(memoryRSY mem, iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{ + match(Set dst (GetAndAddI mem src)); + effect(KILL cr, TEMP_DEF dst, TEMP tmp); + ins_cost(MEMORY_REF_COST+100*DEFAULT_COST); + format %{ "BEGIN ATOMIC {\n\t" + " LGF $dst,[$mem]\n\t" + " ARK $tmp,$dst,$src\n\t" + " CSY $dst,$tmp,$mem\n\t" + " retry if failed\n\t" + "} END ATOMIC" + %} + ins_encode %{ + Register Rsrc = $src$$Register; + Register Rdst = $dst$$Register; + Register Rtmp = $tmp$$Register; + Label retry; + + // Iterate until update with incremented value succeeds. + __ z_lgf(Rdst, $mem$$Address); // current contents + __ bind(retry); + // Calculate incremented value. + if (VM_Version::has_DistinctOpnds()) { + __ z_ark(Rtmp, Rdst, Rsrc); + } else { + __ z_lr(Rtmp, Rdst); + __ z_ar(Rtmp, Rsrc); + } + __ z_csy(Rdst, Rtmp, $mem$$Address); // Try to store new value. + __ z_brne(retry); // Yikes, concurrent update, need to retry. + %} + ins_pipe(pipe_class_dummy); +%} + + +// Exploit: direct memory arithmetic +// Prereqs: - instructions available +// - instructions guarantee atomicity +// - immediate operand to be added +// - immediate operand is small enough (8-bit signed). +// - result of instruction is not used +instruct addL_mem_imm8_atomic_no_res(memoryRSY mem, Universe dummy, immL8 src, flagsReg cr) %{ + match(Set dummy (GetAndAddL mem src)); + effect(KILL cr); + predicate(VM_Version::has_AtomicMemWithImmALUOps() && n->as_LoadStore()->result_not_used()); + ins_cost(MEMORY_REF_COST); + size(6); + format %{ "AGSI [$mem],$src\t # GetAndAddL (atomic)" %} + opcode(AGSI_ZOPC); + ins_encode(z_siyform(mem, src)); + ins_pipe(pipe_class_dummy); +%} + +// Fallback: direct memory arithmetic not available +// Disadvantages: - CS-Loop required, very expensive. +// - more code generated (26 to xx bytes vs. 6 bytes) +instruct addL_mem_imm16_atomic(memoryRSY mem, iRegL dst, immL16 src, iRegL tmp, flagsReg cr) %{ + match(Set dst (GetAndAddL mem src)); + effect(KILL cr, TEMP_DEF dst, TEMP tmp); + ins_cost(MEMORY_REF_COST+100*DEFAULT_COST); + format %{ "BEGIN ATOMIC {\n\t" + " LG $dst,[$mem]\n\t" + " AGHIK $tmp,$dst,$src\n\t" + " CSG $dst,$tmp,$mem\n\t" + " retry if failed\n\t" + "} END ATOMIC" + %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rtmp = $tmp$$Register; + int Isrc = $src$$constant; + Label retry; + + // Iterate until update with incremented value succeeds. + __ z_lg(Rdst, $mem$$Address); // current contents + __ bind(retry); + // Calculate incremented value. + if (VM_Version::has_DistinctOpnds()) { + __ z_aghik(Rtmp, Rdst, Isrc); + } else { + __ z_lgr(Rtmp, Rdst); + __ z_aghi(Rtmp, Isrc); + } + __ z_csg(Rdst, Rtmp, $mem$$Address); // Try to store new value. + __ z_brne(retry); // Yikes, concurrent update, need to retry. + %} + ins_pipe(pipe_class_dummy); +%} + +instruct addL_mem_imm32_atomic(memoryRSY mem, iRegL dst, immL32 src, iRegL tmp, flagsReg cr) %{ + match(Set dst (GetAndAddL mem src)); + effect(KILL cr, TEMP_DEF dst, TEMP tmp); + ins_cost(MEMORY_REF_COST+100*DEFAULT_COST); + format %{ "BEGIN ATOMIC {\n\t" + " LG $dst,[$mem]\n\t" + " LGR $tmp,$dst\n\t" + " AGFI $tmp,$src\n\t" + " CSG $dst,$tmp,$mem\n\t" + " retry if failed\n\t" + "} END ATOMIC" + %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rtmp = $tmp$$Register; + int Isrc = $src$$constant; + Label retry; + + // Iterate until update with incremented value succeeds. + __ z_lg(Rdst, $mem$$Address); // current contents + __ bind(retry); + // Calculate incremented value. + __ z_lgr(Rtmp, Rdst); + __ z_agfi(Rtmp, Isrc); + __ z_csg(Rdst, Rtmp, $mem$$Address); // Try to store new value. + __ z_brne(retry); // Yikes, concurrent update, need to retry. + %} + ins_pipe(pipe_class_dummy); +%} + +instruct addL_mem_reg_atomic(memoryRSY mem, iRegL dst, iRegL src, iRegL tmp, flagsReg cr) %{ + match(Set dst (GetAndAddL mem src)); + effect(KILL cr, TEMP_DEF dst, TEMP tmp); + ins_cost(MEMORY_REF_COST+100*DEFAULT_COST); + format %{ "BEGIN ATOMIC {\n\t" + " LG $dst,[$mem]\n\t" + " AGRK $tmp,$dst,$src\n\t" + " CSG $dst,$tmp,$mem\n\t" + " retry if failed\n\t" + "} END ATOMIC" + %} + ins_encode %{ + Register Rsrc = $src$$Register; + Register Rdst = $dst$$Register; + Register Rtmp = $tmp$$Register; + Label retry; + + // Iterate until update with incremented value succeeds. + __ z_lg(Rdst, $mem$$Address); // current contents + __ bind(retry); + // Calculate incremented value. + if (VM_Version::has_DistinctOpnds()) { + __ z_agrk(Rtmp, Rdst, Rsrc); + } else { + __ z_lgr(Rtmp, Rdst); + __ z_agr(Rtmp, Rsrc); + } + __ z_csg(Rdst, Rtmp, $mem$$Address); // Try to store new value. + __ z_brne(retry); // Yikes, concurrent update, need to retry. + %} + ins_pipe(pipe_class_dummy); +%} + +// Increment value in memory, save old value in dst. +instruct addI_mem_reg_atomic_z196(memoryRSY mem, iRegI dst, iRegI src) %{ + match(Set dst (GetAndAddI mem src)); + predicate(VM_Version::has_LoadAndALUAtomicV1()); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); + size(6); + format %{ "LAA $dst,$src,[$mem]" %} + ins_encode %{ __ z_laa($dst$$Register, $src$$Register, $mem$$Address); %} + ins_pipe(pipe_class_dummy); +%} + +// Increment value in memory, save old value in dst. +instruct addL_mem_reg_atomic_z196(memoryRSY mem, iRegL dst, iRegL src) %{ + match(Set dst (GetAndAddL mem src)); + predicate(VM_Version::has_LoadAndALUAtomicV1()); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); + size(6); + format %{ "LAAG $dst,$src,[$mem]" %} + ins_encode %{ __ z_laag($dst$$Register, $src$$Register, $mem$$Address); %} + ins_pipe(pipe_class_dummy); +%} + + +instruct xchgI_reg_mem(memoryRSY mem, iRegI dst, iRegI tmp, flagsReg cr) %{ + match(Set dst (GetAndSetI mem dst)); + effect(KILL cr, TEMP tmp); // USE_DEF dst by match rule. + format %{ "XCHGI $dst,[$mem]\t # EXCHANGE (int, atomic), temp $tmp" %} + ins_encode(z_enc_SwapI(mem, dst, tmp)); + ins_pipe(pipe_class_dummy); +%} + +instruct xchgL_reg_mem(memoryRSY mem, iRegL dst, iRegL tmp, flagsReg cr) %{ + match(Set dst (GetAndSetL mem dst)); + effect(KILL cr, TEMP tmp); // USE_DEF dst by match rule. + format %{ "XCHGL $dst,[$mem]\t # EXCHANGE (long, atomic), temp $tmp" %} + ins_encode(z_enc_SwapL(mem, dst, tmp)); + ins_pipe(pipe_class_dummy); +%} + +instruct xchgN_reg_mem(memoryRSY mem, iRegN dst, iRegI tmp, flagsReg cr) %{ + match(Set dst (GetAndSetN mem dst)); + effect(KILL cr, TEMP tmp); // USE_DEF dst by match rule. + format %{ "XCHGN $dst,[$mem]\t # EXCHANGE (coop, atomic), temp $tmp" %} + ins_encode(z_enc_SwapI(mem, dst, tmp)); + ins_pipe(pipe_class_dummy); +%} + +instruct xchgP_reg_mem(memoryRSY mem, iRegP dst, iRegL tmp, flagsReg cr) %{ + match(Set dst (GetAndSetP mem dst)); + effect(KILL cr, TEMP tmp); // USE_DEF dst by match rule. + format %{ "XCHGP $dst,[$mem]\t # EXCHANGE (oop, atomic), temp $tmp" %} + ins_encode(z_enc_SwapL(mem, dst, tmp)); + ins_pipe(pipe_class_dummy); +%} + + +//----------Arithmetic Instructions-------------------------------------------- + +// The rules are sorted by right operand type and operand length. Please keep +// it that way. +// Left operand type is always reg. Left operand len is I, L, P +// Right operand type is reg, imm, mem. Right operand len is S, I, L, P +// Special instruction formats, e.g. multi-operand, are inserted at the end. + +// ADD + +// REG = REG + REG + +// Register Addition +instruct addI_reg_reg_CISC(iRegI dst, iRegI src, flagsReg cr) %{ + match(Set dst (AddI dst src)); + effect(KILL cr); + // TODO: s390 port size(FIXED_SIZE); + format %{ "AR $dst,$src\t # int CISC ALU" %} + opcode(AR_ZOPC); + ins_encode(z_rrform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Avoid use of LA(Y) for general ALU operation. +instruct addI_reg_reg_RISC(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{ + match(Set dst (AddI src1 src2)); + effect(KILL cr); + predicate(VM_Version::has_DistinctOpnds()); + ins_cost(DEFAULT_COST); + size(4); + format %{ "ARK $dst,$src1,$src2\t # int RISC ALU" %} + opcode(ARK_ZOPC); + ins_encode(z_rrfform(dst, src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +// REG = REG + IMM + +// Avoid use of LA(Y) for general ALU operation. +// Immediate Addition +instruct addI_reg_imm16_CISC(iRegI dst, immI16 con, flagsReg cr) %{ + match(Set dst (AddI dst con)); + effect(KILL cr); + ins_cost(DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "AHI $dst,$con\t # int CISC ALU" %} + opcode(AHI_ZOPC); + ins_encode(z_riform_signed(dst, con)); + ins_pipe(pipe_class_dummy); +%} + +// Avoid use of LA(Y) for general ALU operation. +// Immediate Addition +instruct addI_reg_imm16_RISC(iRegI dst, iRegI src, immI16 con, flagsReg cr) %{ + match(Set dst (AddI src con)); + effect(KILL cr); + predicate( VM_Version::has_DistinctOpnds()); + ins_cost(DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "AHIK $dst,$src,$con\t # int RISC ALU" %} + opcode(AHIK_ZOPC); + ins_encode(z_rieform_d(dst, src, con)); + ins_pipe(pipe_class_dummy); +%} + +// Immediate Addition +instruct addI_reg_imm32(iRegI dst, immI src, flagsReg cr) %{ + match(Set dst (AddI dst src)); + effect(KILL cr); + ins_cost(DEFAULT_COST_HIGH); + size(6); + format %{ "AFI $dst,$src" %} + opcode(AFI_ZOPC); + ins_encode(z_rilform_signed(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Immediate Addition +instruct addI_reg_imm12(iRegI dst, iRegI src, uimmI12 con) %{ + match(Set dst (AddI src con)); + predicate(PreferLAoverADD); + ins_cost(DEFAULT_COST_LOW); + size(4); + format %{ "LA $dst,$con(,$src)\t # int d12(,b)" %} + opcode(LA_ZOPC); + ins_encode(z_rxform_imm_reg(dst, con, src)); + ins_pipe(pipe_class_dummy); +%} + +// Immediate Addition +instruct addI_reg_imm20(iRegI dst, iRegI src, immI20 con) %{ + match(Set dst (AddI src con)); + predicate(PreferLAoverADD); + ins_cost(DEFAULT_COST); + size(6); + format %{ "LAY $dst,$con(,$src)\t # int d20(,b)" %} + opcode(LAY_ZOPC); + ins_encode(z_rxyform_imm_reg(dst, con, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct addI_reg_reg_imm12(iRegI dst, iRegI src1, iRegI src2, uimmI12 con) %{ + match(Set dst (AddI (AddI src1 src2) con)); + predicate( PreferLAoverADD); + ins_cost(DEFAULT_COST_LOW); + size(4); + format %{ "LA $dst,$con($src1,$src2)\t # int d12(x,b)" %} + opcode(LA_ZOPC); + ins_encode(z_rxform_imm_reg_reg(dst, con, src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +instruct addI_reg_reg_imm20(iRegI dst, iRegI src1, iRegI src2, immI20 con) %{ + match(Set dst (AddI (AddI src1 src2) con)); + predicate(PreferLAoverADD); + ins_cost(DEFAULT_COST); + size(6); + format %{ "LAY $dst,$con($src1,$src2)\t # int d20(x,b)" %} + opcode(LAY_ZOPC); + ins_encode(z_rxyform_imm_reg_reg(dst, con, src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +// REG = REG + MEM + +instruct addI_Reg_mem(iRegI dst, memory src, flagsReg cr)%{ + match(Set dst (AddI dst (LoadI src))); + effect(KILL cr); + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "A(Y) $dst, $src\t # int" %} + opcode(AY_ZOPC, A_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// MEM = MEM + IMM + +// Add Immediate to 4-byte memory operand and result +instruct addI_mem_imm(memoryRSY mem, immI8 src, flagsReg cr) %{ + match(Set mem (StoreI mem (AddI (LoadI mem) src))); + effect(KILL cr); + predicate(VM_Version::has_MemWithImmALUOps()); + ins_cost(MEMORY_REF_COST); + size(6); + format %{ "ASI $mem,$src\t # direct mem add 4" %} + opcode(ASI_ZOPC); + ins_encode(z_siyform(mem, src)); + ins_pipe(pipe_class_dummy); +%} + + +// + +// REG = REG + REG + +instruct addL_reg_regI(iRegL dst, iRegI src, flagsReg cr) %{ + match(Set dst (AddL dst (ConvI2L src))); + effect(KILL cr); + size(4); + format %{ "AGFR $dst,$src\t # long<-int CISC ALU" %} + opcode(AGFR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct addL_reg_reg_CISC(iRegL dst, iRegL src, flagsReg cr) %{ + match(Set dst (AddL dst src)); + effect(KILL cr); + // TODO: s390 port size(FIXED_SIZE); + format %{ "AGR $dst, $src\t # long CISC ALU" %} + opcode(AGR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Avoid use of LA(Y) for general ALU operation. +instruct addL_reg_reg_RISC(iRegL dst, iRegL src1, iRegL src2, flagsReg cr) %{ + match(Set dst (AddL src1 src2)); + effect(KILL cr); + predicate(VM_Version::has_DistinctOpnds()); + ins_cost(DEFAULT_COST); + size(4); + format %{ "AGRK $dst,$src1,$src2\t # long RISC ALU" %} + opcode(AGRK_ZOPC); + ins_encode(z_rrfform(dst, src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +// REG = REG + IMM + +instruct addL_reg_imm12(iRegL dst, iRegL src, uimmL12 con) %{ + match(Set dst (AddL src con)); + predicate( PreferLAoverADD); + ins_cost(DEFAULT_COST_LOW); + size(4); + format %{ "LA $dst,$con(,$src)\t # long d12(,b)" %} + opcode(LA_ZOPC); + ins_encode(z_rxform_imm_reg(dst, con, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct addL_reg_imm20(iRegL dst, iRegL src, immL20 con) %{ + match(Set dst (AddL src con)); + predicate(PreferLAoverADD); + ins_cost(DEFAULT_COST); + size(6); + format %{ "LAY $dst,$con(,$src)\t # long d20(,b)" %} + opcode(LAY_ZOPC); + ins_encode(z_rxyform_imm_reg(dst, con, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct addL_reg_imm32(iRegL dst, immL32 con, flagsReg cr) %{ + match(Set dst (AddL dst con)); + effect(KILL cr); + ins_cost(DEFAULT_COST_HIGH); + size(6); + format %{ "AGFI $dst,$con\t # long CISC ALU" %} + opcode(AGFI_ZOPC); + ins_encode(z_rilform_signed(dst, con)); + ins_pipe(pipe_class_dummy); +%} + +// Avoid use of LA(Y) for general ALU operation. +instruct addL_reg_imm16_CISC(iRegL dst, immL16 con, flagsReg cr) %{ + match(Set dst (AddL dst con)); + effect(KILL cr); + ins_cost(DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "AGHI $dst,$con\t # long CISC ALU" %} + opcode(AGHI_ZOPC); + ins_encode(z_riform_signed(dst, con)); + ins_pipe(pipe_class_dummy); +%} + +// Avoid use of LA(Y) for general ALU operation. +instruct addL_reg_imm16_RISC(iRegL dst, iRegL src, immL16 con, flagsReg cr) %{ + match(Set dst (AddL src con)); + effect(KILL cr); + predicate( VM_Version::has_DistinctOpnds()); + ins_cost(DEFAULT_COST); + size(6); + format %{ "AGHIK $dst,$src,$con\t # long RISC ALU" %} + opcode(AGHIK_ZOPC); + ins_encode(z_rieform_d(dst, src, con)); + ins_pipe(pipe_class_dummy); +%} + +// REG = REG + MEM + +instruct addL_Reg_memI(iRegL dst, memory src, flagsReg cr)%{ + match(Set dst (AddL dst (ConvI2L (LoadI src)))); + effect(KILL cr); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "AGF $dst, $src\t # long/int" %} + opcode(AGF_ZOPC, AGF_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct addL_Reg_mem(iRegL dst, memory src, flagsReg cr)%{ + match(Set dst (AddL dst (LoadL src))); + effect(KILL cr); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "AG $dst, $src\t # long" %} + opcode(AG_ZOPC, AG_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct addL_reg_reg_imm12(iRegL dst, iRegL src1, iRegL src2, uimmL12 con) %{ + match(Set dst (AddL (AddL src1 src2) con)); + predicate( PreferLAoverADD); + ins_cost(DEFAULT_COST_LOW); + size(4); + format %{ "LA $dst,$con($src1,$src2)\t # long d12(x,b)" %} + opcode(LA_ZOPC); + ins_encode(z_rxform_imm_reg_reg(dst, con, src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +instruct addL_reg_reg_imm20(iRegL dst, iRegL src1, iRegL src2, immL20 con) %{ + match(Set dst (AddL (AddL src1 src2) con)); + predicate(PreferLAoverADD); + ins_cost(DEFAULT_COST); + size(6); + format %{ "LAY $dst,$con($src1,$src2)\t # long d20(x,b)" %} + opcode(LAY_ZOPC); + ins_encode(z_rxyform_imm_reg_reg(dst, con, src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +// MEM = MEM + IMM + +// Add Immediate to 8-byte memory operand and result. +instruct addL_mem_imm(memoryRSY mem, immL8 src, flagsReg cr) %{ + match(Set mem (StoreL mem (AddL (LoadL mem) src))); + effect(KILL cr); + predicate(VM_Version::has_MemWithImmALUOps()); + ins_cost(MEMORY_REF_COST); + size(6); + format %{ "AGSI $mem,$src\t # direct mem add 8" %} + opcode(AGSI_ZOPC); + ins_encode(z_siyform(mem, src)); + ins_pipe(pipe_class_dummy); +%} + + +// REG = REG + REG + +// Ptr Addition +instruct addP_reg_reg_LA(iRegP dst, iRegP_N2P src1, iRegL src2) %{ + match(Set dst (AddP src1 src2)); + predicate( PreferLAoverADD); + ins_cost(DEFAULT_COST); + size(4); + format %{ "LA $dst,#0($src1,$src2)\t # ptr 0(x,b)" %} + opcode(LA_ZOPC); + ins_encode(z_rxform_imm_reg_reg(dst, 0x0, src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +// Ptr Addition +// Avoid use of LA(Y) for general ALU operation. +instruct addP_reg_reg_CISC(iRegP dst, iRegL src, flagsReg cr) %{ + match(Set dst (AddP dst src)); + effect(KILL cr); + predicate(!PreferLAoverADD && !VM_Version::has_DistinctOpnds()); + ins_cost(DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "ALGR $dst,$src\t # ptr CICS ALU" %} + opcode(ALGR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Ptr Addition +// Avoid use of LA(Y) for general ALU operation. +instruct addP_reg_reg_RISC(iRegP dst, iRegP_N2P src1, iRegL src2, flagsReg cr) %{ + match(Set dst (AddP src1 src2)); + effect(KILL cr); + predicate(!PreferLAoverADD && VM_Version::has_DistinctOpnds()); + ins_cost(DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "ALGRK $dst,$src1,$src2\t # ptr RISC ALU" %} + opcode(ALGRK_ZOPC); + ins_encode(z_rrfform(dst, src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +// REG = REG + IMM + +instruct addP_reg_imm12(iRegP dst, iRegP_N2P src, uimmL12 con) %{ + match(Set dst (AddP src con)); + predicate( PreferLAoverADD); + ins_cost(DEFAULT_COST_LOW); + size(4); + format %{ "LA $dst,$con(,$src)\t # ptr d12(,b)" %} + opcode(LA_ZOPC); + ins_encode(z_rxform_imm_reg(dst, con, src)); + ins_pipe(pipe_class_dummy); +%} + +// Avoid use of LA(Y) for general ALU operation. +instruct addP_reg_imm16_CISC(iRegP dst, immL16 src, flagsReg cr) %{ + match(Set dst (AddP dst src)); + effect(KILL cr); + predicate(!PreferLAoverADD && !VM_Version::has_DistinctOpnds()); + ins_cost(DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "AGHI $dst,$src\t # ptr CISC ALU" %} + opcode(AGHI_ZOPC); + ins_encode(z_riform_signed(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Avoid use of LA(Y) for general ALU operation. +instruct addP_reg_imm16_RISC(iRegP dst, iRegP_N2P src, immL16 con, flagsReg cr) %{ + match(Set dst (AddP src con)); + effect(KILL cr); + predicate(!PreferLAoverADD && VM_Version::has_DistinctOpnds()); + ins_cost(DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "ALGHSIK $dst,$src,$con\t # ptr RISC ALU" %} + opcode(ALGHSIK_ZOPC); + ins_encode(z_rieform_d(dst, src, con)); + ins_pipe(pipe_class_dummy); +%} + +instruct addP_reg_imm20(iRegP dst, memoryRegP src, immL20 con) %{ + match(Set dst (AddP src con)); + predicate(PreferLAoverADD); + ins_cost(DEFAULT_COST); + size(6); + format %{ "LAY $dst,$con(,$src)\t # ptr d20(,b)" %} + opcode(LAY_ZOPC); + ins_encode(z_rxyform_imm_reg(dst, con, src)); + ins_pipe(pipe_class_dummy); +%} + +// Pointer Immediate Addition +instruct addP_reg_imm32(iRegP dst, immL32 src, flagsReg cr) %{ + match(Set dst (AddP dst src)); + effect(KILL cr); + ins_cost(DEFAULT_COST_HIGH); + // TODO: s390 port size(FIXED_SIZE); + format %{ "AGFI $dst,$src\t # ptr" %} + opcode(AGFI_ZOPC); + ins_encode(z_rilform_signed(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// REG = REG1 + REG2 + IMM + +instruct addP_reg_reg_imm12(iRegP dst, memoryRegP src1, iRegL src2, uimmL12 con) %{ + match(Set dst (AddP (AddP src1 src2) con)); + predicate( PreferLAoverADD); + ins_cost(DEFAULT_COST_LOW); + size(4); + format %{ "LA $dst,$con($src1,$src2)\t # ptr d12(x,b)" %} + opcode(LA_ZOPC); + ins_encode(z_rxform_imm_reg_reg(dst, con, src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +instruct addP_regN_reg_imm12(iRegP dst, iRegP_N2P src1, iRegL src2, uimmL12 con) %{ + match(Set dst (AddP (AddP src1 src2) con)); + predicate( PreferLAoverADD && Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); + ins_cost(DEFAULT_COST_LOW); + size(4); + format %{ "LA $dst,$con($src1,$src2)\t # ptr d12(x,b)" %} + opcode(LA_ZOPC); + ins_encode(z_rxform_imm_reg_reg(dst, con, src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +instruct addP_reg_reg_imm20(iRegP dst, memoryRegP src1, iRegL src2, immL20 con) %{ + match(Set dst (AddP (AddP src1 src2) con)); + predicate(PreferLAoverADD); + ins_cost(DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "LAY $dst,$con($src1,$src2)\t # ptr d20(x,b)" %} + opcode(LAY_ZOPC); + ins_encode(z_rxyform_imm_reg_reg(dst, con, src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +instruct addP_regN_reg_imm20(iRegP dst, iRegP_N2P src1, iRegL src2, immL20 con) %{ + match(Set dst (AddP (AddP src1 src2) con)); + predicate( PreferLAoverADD && Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); + ins_cost(DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "LAY $dst,$con($src1,$src2)\t # ptr d20(x,b)" %} + opcode(LAY_ZOPC); + ins_encode(z_rxyform_imm_reg_reg(dst, con, src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +// MEM = MEM + IMM + +// Add Immediate to 8-byte memory operand and result +instruct addP_mem_imm(memoryRSY mem, immL8 src, flagsReg cr) %{ + match(Set mem (StoreP mem (AddP (LoadP mem) src))); + effect(KILL cr); + predicate(VM_Version::has_MemWithImmALUOps()); + ins_cost(MEMORY_REF_COST); + size(6); + format %{ "AGSI $mem,$src\t # direct mem add 8 (ptr)" %} + opcode(AGSI_ZOPC); + ins_encode(z_siyform(mem, src)); + ins_pipe(pipe_class_dummy); +%} + +// SUB + +// Register Subtraction +instruct subI_reg_reg_CISC(iRegI dst, iRegI src, flagsReg cr) %{ + match(Set dst (SubI dst src)); + effect(KILL cr); + // TODO: s390 port size(FIXED_SIZE); + format %{ "SR $dst,$src\t # int CISC ALU" %} + opcode(SR_ZOPC); + ins_encode(z_rrform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct subI_reg_reg_RISC(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{ + match(Set dst (SubI src1 src2)); + effect(KILL cr); + predicate(VM_Version::has_DistinctOpnds()); + ins_cost(DEFAULT_COST); + size(4); + format %{ "SRK $dst,$src1,$src2\t # int RISC ALU" %} + opcode(SRK_ZOPC); + ins_encode(z_rrfform(dst, src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +instruct subI_Reg_mem(iRegI dst, memory src, flagsReg cr)%{ + match(Set dst (SubI dst (LoadI src))); + effect(KILL cr); + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "S(Y) $dst, $src\t # int" %} + opcode(SY_ZOPC, S_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct subI_zero_reg(iRegI dst, immI_0 zero, iRegI src, flagsReg cr) %{ + match(Set dst (SubI zero src)); + effect(KILL cr); + size(2); + format %{ "NEG $dst, $src" %} + ins_encode %{ __ z_lcr($dst$$Register, $src$$Register); %} + ins_pipe(pipe_class_dummy); +%} + +// + +// Long subtraction +instruct subL_reg_reg_CISC(iRegL dst, iRegL src, flagsReg cr) %{ + match(Set dst (SubL dst src)); + effect(KILL cr); + // TODO: s390 port size(FIXED_SIZE); + format %{ "SGR $dst,$src\t # int CISC ALU" %} + opcode(SGR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Avoid use of LA(Y) for general ALU operation. +instruct subL_reg_reg_RISC(iRegL dst, iRegL src1, iRegL src2, flagsReg cr) %{ + match(Set dst (SubL src1 src2)); + effect(KILL cr); + predicate(VM_Version::has_DistinctOpnds()); + ins_cost(DEFAULT_COST); + size(4); + format %{ "SGRK $dst,$src1,$src2\t # int RISC ALU" %} + opcode(SGRK_ZOPC); + ins_encode(z_rrfform(dst, src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +instruct subL_reg_regI_CISC(iRegL dst, iRegI src, flagsReg cr) %{ + match(Set dst (SubL dst (ConvI2L src))); + effect(KILL cr); + size(4); + format %{ "SGFR $dst, $src\t # int CISC ALU" %} + opcode(SGFR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct subL_Reg_memI(iRegL dst, memory src, flagsReg cr)%{ + match(Set dst (SubL dst (ConvI2L (LoadI src)))); + effect(KILL cr); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "SGF $dst, $src\t # long/int" %} + opcode(SGF_ZOPC, SGF_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct subL_Reg_mem(iRegL dst, memory src, flagsReg cr)%{ + match(Set dst (SubL dst (LoadL src))); + effect(KILL cr); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "SG $dst, $src\t # long" %} + opcode(SG_ZOPC, SG_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Moved declaration of negL_reg_reg before encode nodes, where it is used. + +// MUL + +// Register Multiplication +instruct mulI_reg_reg(iRegI dst, iRegI src) %{ + match(Set dst (MulI dst src)); + ins_cost(DEFAULT_COST); + size(4); + format %{ "MSR $dst, $src" %} + opcode(MSR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Immediate Multiplication +instruct mulI_reg_imm16(iRegI dst, immI16 con) %{ + match(Set dst (MulI dst con)); + ins_cost(DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "MHI $dst,$con" %} + opcode(MHI_ZOPC); + ins_encode(z_riform_signed(dst,con)); + ins_pipe(pipe_class_dummy); +%} + +// Immediate (32bit) Multiplication +instruct mulI_reg_imm32(iRegI dst, immI con) %{ + match(Set dst (MulI dst con)); + ins_cost(DEFAULT_COST); + size(6); + format %{ "MSFI $dst,$con" %} + opcode(MSFI_ZOPC); + ins_encode(z_rilform_signed(dst,con)); + ins_pipe(pipe_class_dummy); +%} + +instruct mulI_Reg_mem(iRegI dst, memory src)%{ + match(Set dst (MulI dst (LoadI src))); + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MS(Y) $dst, $src\t # int" %} + opcode(MSY_ZOPC, MS_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// + +instruct mulL_reg_regI(iRegL dst, iRegI src) %{ + match(Set dst (MulL dst (ConvI2L src))); + ins_cost(DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "MSGFR $dst $src\t # long/int" %} + opcode(MSGFR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct mulL_reg_reg(iRegL dst, iRegL src) %{ + match(Set dst (MulL dst src)); + ins_cost(DEFAULT_COST); + size(4); + format %{ "MSGR $dst $src\t # long" %} + opcode(MSGR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Immediate Multiplication +instruct mulL_reg_imm16(iRegL dst, immL16 src) %{ + match(Set dst (MulL dst src)); + ins_cost(DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "MGHI $dst,$src\t # long" %} + opcode(MGHI_ZOPC); + ins_encode(z_riform_signed(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Immediate (32bit) Multiplication +instruct mulL_reg_imm32(iRegL dst, immL32 con) %{ + match(Set dst (MulL dst con)); + ins_cost(DEFAULT_COST); + size(6); + format %{ "MSGFI $dst,$con" %} + opcode(MSGFI_ZOPC); + ins_encode(z_rilform_signed(dst,con)); + ins_pipe(pipe_class_dummy); +%} + +instruct mulL_Reg_memI(iRegL dst, memory src)%{ + match(Set dst (MulL dst (ConvI2L (LoadI src)))); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "MSGF $dst, $src\t # long" %} + opcode(MSGF_ZOPC, MSGF_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct mulL_Reg_mem(iRegL dst, memory src)%{ + match(Set dst (MulL dst (LoadL src))); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "MSG $dst, $src\t # long" %} + opcode(MSG_ZOPC, MSG_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// DIV + +// Integer DIVMOD with Register, both quotient and mod results +instruct divModI_reg_divmod(roddRegI dst1src1, revenRegI dst2, noOdd_iRegI src2, flagsReg cr) %{ + match(DivModI dst1src1 src2); + effect(KILL cr); + ins_cost(2 * DEFAULT_COST + BRANCH_COST); + size(VM_Version::has_CompareBranch() ? 24 : 26); + format %{ "DIVMODI ($dst1src1, $dst2) $src2" %} + ins_encode %{ + Register d1s1 = $dst1src1$$Register; + Register d2 = $dst2$$Register; + Register s2 = $src2$$Register; + + assert_different_registers(d1s1, s2); + + Label do_div, done_div; + if (VM_Version::has_CompareBranch()) { + __ z_cij(s2, -1, Assembler::bcondNotEqual, do_div); + } else { + __ z_chi(s2, -1); + __ z_brne(do_div); + } + __ z_lcr(d1s1, d1s1); + __ clear_reg(d2, false, false); + __ z_bru(done_div); + __ bind(do_div); + __ z_lgfr(d1s1, d1s1); + __ z_dsgfr(d2, s2); + __ bind(done_div); + %} + ins_pipe(pipe_class_dummy); +%} + + +// Register Division +instruct divI_reg_reg(roddRegI dst, iRegI src1, noOdd_iRegI src2, revenRegI tmp, flagsReg cr) %{ + match(Set dst (DivI src1 src2)); + effect(KILL tmp, KILL cr); + ins_cost(2 * DEFAULT_COST + BRANCH_COST); + size(VM_Version::has_CompareBranch() ? 20 : 22); + format %{ "DIV_checked $dst, $src1,$src2\t # treats special case 0x80../-1" %} + ins_encode %{ + Register a = $src1$$Register; + Register b = $src2$$Register; + Register t = $dst$$Register; + + assert_different_registers(t, b); + + Label do_div, done_div; + if (VM_Version::has_CompareBranch()) { + __ z_cij(b, -1, Assembler::bcondNotEqual, do_div); + } else { + __ z_chi(b, -1); + __ z_brne(do_div); + } + __ z_lcr(t, a); + __ z_bru(done_div); + __ bind(do_div); + __ z_lgfr(t, a); + __ z_dsgfr(t->predecessor()/* t is odd part of a register pair. */, b); + __ bind(done_div); + %} + ins_pipe(pipe_class_dummy); +%} + +// Immediate Division +instruct divI_reg_imm16(roddRegI dst, iRegI src1, immI16 src2, revenRegI tmp, flagsReg cr) %{ + match(Set dst (DivI src1 src2)); + effect(KILL tmp, KILL cr); // R0 is killed, too. + ins_cost(2 * DEFAULT_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "DIV_const $dst,$src1,$src2" %} + ins_encode %{ + // No sign extension of Rdividend needed here. + if ($src2$$constant != -1) { + __ z_lghi(Z_R0_scratch, $src2$$constant); + __ z_lgfr($dst$$Register, $src1$$Register); + __ z_dsgfr($dst$$Register->predecessor()/* Dst is odd part of a register pair. */, Z_R0_scratch); + } else { + __ z_lcr($dst$$Register, $src1$$Register); + } + %} + ins_pipe(pipe_class_dummy); +%} + +// Long DIVMOD with Register, both quotient and mod results +instruct divModL_reg_divmod(roddRegL dst1src1, revenRegL dst2, iRegL src2, flagsReg cr) %{ + match(DivModL dst1src1 src2); + effect(KILL cr); + ins_cost(2 * DEFAULT_COST + BRANCH_COST); + size(VM_Version::has_CompareBranch() ? 22 : 24); + format %{ "DIVMODL ($dst1src1, $dst2) $src2" %} + ins_encode %{ + Register d1s1 = $dst1src1$$Register; + Register d2 = $dst2$$Register; + Register s2 = $src2$$Register; + + Label do_div, done_div; + if (VM_Version::has_CompareBranch()) { + __ z_cgij(s2, -1, Assembler::bcondNotEqual, do_div); + } else { + __ z_cghi(s2, -1); + __ z_brne(do_div); + } + __ z_lcgr(d1s1, d1s1); + // indicate unused result + (void) __ clear_reg(d2, true, false); + __ z_bru(done_div); + __ bind(do_div); + __ z_dsgr(d2, s2); + __ bind(done_div); + %} + ins_pipe(pipe_class_dummy); +%} + +// Register Long Division +instruct divL_reg_reg(roddRegL dst, iRegL src, revenRegL tmp, flagsReg cr) %{ + match(Set dst (DivL dst src)); + effect(KILL tmp, KILL cr); + ins_cost(2 * DEFAULT_COST + BRANCH_COST); + size(VM_Version::has_CompareBranch() ? 18 : 20); + format %{ "DIVG_checked $dst, $src\t # long, treats special case 0x80../-1" %} + ins_encode %{ + Register b = $src$$Register; + Register t = $dst$$Register; + + Label done_div; + __ z_lcgr(t, t); // Does no harm. divisor is in other register. + if (VM_Version::has_CompareBranch()) { + __ z_cgij(b, -1, Assembler::bcondEqual, done_div); + } else { + __ z_cghi(b, -1); + __ z_bre(done_div); + } + __ z_lcgr(t, t); // Restore sign. + __ z_dsgr(t->predecessor()/* t is odd part of a register pair. */, b); + __ bind(done_div); + %} + ins_pipe(pipe_class_dummy); +%} + +// Immediate Long Division +instruct divL_reg_imm16(roddRegL dst, iRegL src1, immL16 src2, revenRegL tmp, flagsReg cr) %{ + match(Set dst (DivL src1 src2)); + effect(KILL tmp, KILL cr); // R0 is killed, too. + ins_cost(2 * DEFAULT_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "DIVG_const $dst,$src1,$src2\t # long" %} + ins_encode %{ + if ($src2$$constant != -1) { + __ z_lghi(Z_R0_scratch, $src2$$constant); + __ lgr_if_needed($dst$$Register, $src1$$Register); + __ z_dsgr($dst$$Register->predecessor()/* Dst is odd part of a register pair. */, Z_R0_scratch); + } else { + __ z_lcgr($dst$$Register, $src1$$Register); + } + %} + ins_pipe(pipe_class_dummy); +%} + +// REM + +// Integer Remainder +// Register Remainder +instruct modI_reg_reg(revenRegI dst, iRegI src1, noOdd_iRegI src2, roddRegI tmp, flagsReg cr) %{ + match(Set dst (ModI src1 src2)); + effect(KILL tmp, KILL cr); + ins_cost(2 * DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MOD_checked $dst,$src1,$src2" %} + ins_encode %{ + Register a = $src1$$Register; + Register b = $src2$$Register; + Register t = $dst$$Register; + assert_different_registers(t->successor(), b); + + Label do_div, done_div; + + if ((t->encoding() != b->encoding()) && (t->encoding() != a->encoding())) { + (void) __ clear_reg(t, true, false); // Does no harm. Operands are in other regs. + if (VM_Version::has_CompareBranch()) { + __ z_cij(b, -1, Assembler::bcondEqual, done_div); + } else { + __ z_chi(b, -1); + __ z_bre(done_div); + } + __ z_lgfr(t->successor(), a); + __ z_dsgfr(t/* t is even part of a register pair. */, b); + } else { + if (VM_Version::has_CompareBranch()) { + __ z_cij(b, -1, Assembler::bcondNotEqual, do_div); + } else { + __ z_chi(b, -1); + __ z_brne(do_div); + } + __ clear_reg(t, true, false); + __ z_bru(done_div); + __ bind(do_div); + __ z_lgfr(t->successor(), a); + __ z_dsgfr(t/* t is even part of a register pair. */, b); + } + __ bind(done_div); + %} + ins_pipe(pipe_class_dummy); +%} + +// Immediate Remainder +instruct modI_reg_imm16(revenRegI dst, iRegI src1, immI16 src2, roddRegI tmp, flagsReg cr) %{ + match(Set dst (ModI src1 src2)); + effect(KILL tmp, KILL cr); // R0 is killed, too. + ins_cost(3 * DEFAULT_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MOD_const $dst,src1,$src2" %} + ins_encode %{ + assert_different_registers($dst$$Register, $src1$$Register); + assert_different_registers($dst$$Register->successor(), $src1$$Register); + int divisor = $src2$$constant; + + if (divisor != -1) { + __ z_lghi(Z_R0_scratch, divisor); + __ z_lgfr($dst$$Register->successor(), $src1$$Register); + __ z_dsgfr($dst$$Register/* Dst is even part of a register pair. */, Z_R0_scratch); // Instruction kills tmp. + } else { + __ clear_reg($dst$$Register, true, false); + } + %} + ins_pipe(pipe_class_dummy); +%} + +// Register Long Remainder +instruct modL_reg_reg(revenRegL dst, roddRegL src1, iRegL src2, flagsReg cr) %{ + match(Set dst (ModL src1 src2)); + effect(KILL src1, KILL cr); // R0 is killed, too. + ins_cost(2 * DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MODG_checked $dst,$src1,$src2" %} + ins_encode %{ + Register a = $src1$$Register; + Register b = $src2$$Register; + Register t = $dst$$Register; + assert(t->successor() == a, "(t,a) is an even-odd pair" ); + + Label do_div, done_div; + if (t->encoding() != b->encoding()) { + (void) __ clear_reg(t, true, false); // Does no harm. Dividend is in successor. + if (VM_Version::has_CompareBranch()) { + __ z_cgij(b, -1, Assembler::bcondEqual, done_div); + } else { + __ z_cghi(b, -1); + __ z_bre(done_div); + } + __ z_dsgr(t, b); + } else { + if (VM_Version::has_CompareBranch()) { + __ z_cgij(b, -1, Assembler::bcondNotEqual, do_div); + } else { + __ z_cghi(b, -1); + __ z_brne(do_div); + } + __ clear_reg(t, true, false); + __ z_bru(done_div); + __ bind(do_div); + __ z_dsgr(t, b); + } + __ bind(done_div); + %} + ins_pipe(pipe_class_dummy); +%} + +// Register Long Remainder +instruct modL_reg_imm16(revenRegL dst, iRegL src1, immL16 src2, roddRegL tmp, flagsReg cr) %{ + match(Set dst (ModL src1 src2)); + effect(KILL tmp, KILL cr); // R0 is killed, too. + ins_cost(3 * DEFAULT_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MODG_const $dst,src1,$src2\t # long" %} + ins_encode %{ + int divisor = $src2$$constant; + if (divisor != -1) { + __ z_lghi(Z_R0_scratch, divisor); + __ z_lgr($dst$$Register->successor(), $src1$$Register); + __ z_dsgr($dst$$Register /* Dst is even part of a register pair. */, Z_R0_scratch); // Instruction kills tmp. + } else { + __ clear_reg($dst$$Register, true, false); + } + %} + ins_pipe(pipe_class_dummy); +%} + +// SHIFT + +// Shift left logical + +// Register Shift Left variable +instruct sllI_reg_reg(iRegI dst, iRegI src, iRegI nbits, flagsReg cr) %{ + match(Set dst (LShiftI src nbits)); + effect(KILL cr); // R1 is killed, too. + ins_cost(3 * DEFAULT_COST); + size(14); + format %{ "SLL $dst,$src,[$nbits] & 31\t# use RISC-like SLLG also for int" %} + ins_encode %{ + __ z_lgr(Z_R1_scratch, $nbits$$Register); + __ z_nill(Z_R1_scratch, BitsPerJavaInteger-1); + __ z_sllg($dst$$Register, $src$$Register, 0, Z_R1_scratch); + %} + ins_pipe(pipe_class_dummy); +%} + +// Register Shift Left Immediate +// Constant shift count is masked in ideal graph already. +instruct sllI_reg_imm(iRegI dst, iRegI src, immI nbits) %{ + match(Set dst (LShiftI src nbits)); + size(6); + format %{ "SLL $dst,$src,$nbits\t# use RISC-like SLLG also for int" %} + ins_encode %{ + int Nbit = $nbits$$constant; + __ z_sllg($dst$$Register, $src$$Register, Nbit & (BitsPerJavaInteger - 1), Z_R0); + %} + ins_pipe(pipe_class_dummy); +%} + +// Register Shift Left Immediate by 1bit +instruct sllI_reg_imm_1(iRegI dst, iRegI src, immI_1 nbits) %{ + match(Set dst (LShiftI src nbits)); + predicate(PreferLAoverADD); + ins_cost(DEFAULT_COST_LOW); + size(4); + format %{ "LA $dst,#0($src,$src)\t # SLL by 1 (int)" %} + ins_encode %{ __ z_la($dst$$Register, 0, $src$$Register, $src$$Register); %} + ins_pipe(pipe_class_dummy); +%} + +// Register Shift Left Long +instruct sllL_reg_reg(iRegL dst, iRegL src1, iRegI nbits) %{ + match(Set dst (LShiftL src1 nbits)); + size(6); + format %{ "SLLG $dst,$src1,[$nbits]" %} + opcode(SLLG_ZOPC); + ins_encode(z_rsyform_reg_reg(dst, src1, nbits)); + ins_pipe(pipe_class_dummy); +%} + +// Register Shift Left Long Immediate +instruct sllL_reg_imm(iRegL dst, iRegL src1, immI nbits) %{ + match(Set dst (LShiftL src1 nbits)); + size(6); + format %{ "SLLG $dst,$src1,$nbits" %} + opcode(SLLG_ZOPC); + ins_encode(z_rsyform_const(dst, src1, nbits)); + ins_pipe(pipe_class_dummy); +%} + +// Register Shift Left Long Immediate by 1bit +instruct sllL_reg_imm_1(iRegL dst, iRegL src1, immI_1 nbits) %{ + match(Set dst (LShiftL src1 nbits)); + predicate(PreferLAoverADD); + ins_cost(DEFAULT_COST_LOW); + size(4); + format %{ "LA $dst,#0($src1,$src1)\t # SLLG by 1 (long)" %} + ins_encode %{ __ z_la($dst$$Register, 0, $src1$$Register, $src1$$Register); %} + ins_pipe(pipe_class_dummy); +%} + +// Shift right arithmetic + +// Register Arithmetic Shift Right +instruct sraI_reg_reg(iRegI dst, iRegI src, flagsReg cr) %{ + match(Set dst (RShiftI dst src)); + effect(KILL cr); // R1 is killed, too. + ins_cost(3 * DEFAULT_COST); + size(12); + format %{ "SRA $dst,[$src] & 31" %} + ins_encode %{ + __ z_lgr(Z_R1_scratch, $src$$Register); + __ z_nill(Z_R1_scratch, BitsPerJavaInteger-1); + __ z_sra($dst$$Register, 0, Z_R1_scratch); + %} + ins_pipe(pipe_class_dummy); +%} + +// Register Arithmetic Shift Right Immediate +// Constant shift count is masked in ideal graph already. +instruct sraI_reg_imm(iRegI dst, immI src, flagsReg cr) %{ + match(Set dst (RShiftI dst src)); + effect(KILL cr); + size(4); + format %{ "SRA $dst,$src" %} + ins_encode %{ + int Nbit = $src$$constant; + __ z_sra($dst$$Register, Nbit & (BitsPerJavaInteger - 1), Z_R0); + %} + ins_pipe(pipe_class_dummy); +%} + +// Register Arithmetic Shift Right Long +instruct sraL_reg_reg(iRegL dst, iRegL src1, iRegI src2, flagsReg cr) %{ + match(Set dst (RShiftL src1 src2)); + effect(KILL cr); + size(6); + format %{ "SRAG $dst,$src1,[$src2]" %} + opcode(SRAG_ZOPC); + ins_encode(z_rsyform_reg_reg(dst, src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +// Register Arithmetic Shift Right Long Immediate +instruct sraL_reg_imm(iRegL dst, iRegL src1, immI src2, flagsReg cr) %{ + match(Set dst (RShiftL src1 src2)); + effect(KILL cr); + size(6); + format %{ "SRAG $dst,$src1,$src2" %} + opcode(SRAG_ZOPC); + ins_encode(z_rsyform_const(dst, src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +// Shift right logical + +// Register Shift Right +instruct srlI_reg_reg(iRegI dst, iRegI src, flagsReg cr) %{ + match(Set dst (URShiftI dst src)); + effect(KILL cr); // R1 is killed, too. + ins_cost(3 * DEFAULT_COST); + size(12); + format %{ "SRL $dst,[$src] & 31" %} + ins_encode %{ + __ z_lgr(Z_R1_scratch, $src$$Register); + __ z_nill(Z_R1_scratch, BitsPerJavaInteger-1); + __ z_srl($dst$$Register, 0, Z_R1_scratch); + %} + ins_pipe(pipe_class_dummy); +%} + +// Register Shift Right Immediate +// Constant shift count is masked in ideal graph already. +instruct srlI_reg_imm(iRegI dst, immI src) %{ + match(Set dst (URShiftI dst src)); + size(4); + format %{ "SRL $dst,$src" %} + ins_encode %{ + int Nbit = $src$$constant; + __ z_srl($dst$$Register, Nbit & (BitsPerJavaInteger - 1), Z_R0); + %} + ins_pipe(pipe_class_dummy); +%} + +// Register Shift Right Long +instruct srlL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{ + match(Set dst (URShiftL src1 src2)); + size(6); + format %{ "SRLG $dst,$src1,[$src2]" %} + opcode(SRLG_ZOPC); + ins_encode(z_rsyform_reg_reg(dst, src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +// Register Shift Right Long Immediate +instruct srlL_reg_imm(iRegL dst, iRegL src1, immI src2) %{ + match(Set dst (URShiftL src1 src2)); + size(6); + format %{ "SRLG $dst,$src1,$src2" %} + opcode(SRLG_ZOPC); + ins_encode(z_rsyform_const(dst, src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +// Register Shift Right Immediate with a CastP2X +instruct srlP_reg_imm(iRegL dst, iRegP_N2P src1, immI src2) %{ + match(Set dst (URShiftL (CastP2X src1) src2)); + size(6); + format %{ "SRLG $dst,$src1,$src2\t # Cast ptr $src1 to long and shift" %} + opcode(SRLG_ZOPC); + ins_encode(z_rsyform_const(dst, src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +//----------Rotate Instructions------------------------------------------------ + +// Rotate left 32bit. +instruct rotlI_reg_immI8(iRegI dst, iRegI src, immI8 lshift, immI8 rshift) %{ + match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift))); + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); + size(6); + format %{ "RLL $dst,$src,$lshift\t # ROTL32" %} + opcode(RLL_ZOPC); + ins_encode(z_rsyform_const(dst, src, lshift)); + ins_pipe(pipe_class_dummy); +%} + +// Rotate left 64bit. +instruct rotlL_reg_immI8(iRegL dst, iRegL src, immI8 lshift, immI8 rshift) %{ + match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); + size(6); + format %{ "RLLG $dst,$src,$lshift\t # ROTL64" %} + opcode(RLLG_ZOPC); + ins_encode(z_rsyform_const(dst, src, lshift)); + ins_pipe(pipe_class_dummy); +%} + +// Rotate right 32bit. +instruct rotrI_reg_immI8(iRegI dst, iRegI src, immI8 rshift, immI8 lshift) %{ + match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift))); + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); + // TODO: s390 port size(FIXED_SIZE); + format %{ "RLL $dst,$src,$rshift\t # ROTR32" %} + opcode(RLL_ZOPC); + ins_encode(z_rsyform_const(dst, src, rshift)); + ins_pipe(pipe_class_dummy); +%} + +// Rotate right 64bit. +instruct rotrL_reg_immI8(iRegL dst, iRegL src, immI8 rshift, immI8 lshift) %{ + match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); + // TODO: s390 port size(FIXED_SIZE); + format %{ "RLLG $dst,$src,$rshift\t # ROTR64" %} + opcode(RLLG_ZOPC); + ins_encode(z_rsyform_const(dst, src, rshift)); + ins_pipe(pipe_class_dummy); +%} + + +//----------Overflow Math Instructions----------------------------------------- + +instruct overflowAddI_reg_reg(flagsReg cr, iRegI op1, iRegI op2) %{ + match(Set cr (OverflowAddI op1 op2)); + effect(DEF cr, USE op1, USE op2); + // TODO: s390 port size(FIXED_SIZE); + format %{ "AR $op1,$op2\t # overflow check int" %} + ins_encode %{ + __ z_lr(Z_R0_scratch, $op1$$Register); + __ z_ar(Z_R0_scratch, $op2$$Register); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct overflowAddI_reg_imm(flagsReg cr, iRegI op1, immI op2) %{ + match(Set cr (OverflowAddI op1 op2)); + effect(DEF cr, USE op1, USE op2); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "AR $op1,$op2\t # overflow check int" %} + ins_encode %{ + __ load_const_optimized(Z_R0_scratch, $op2$$constant); + __ z_ar(Z_R0_scratch, $op1$$Register); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct overflowAddL_reg_reg(flagsReg cr, iRegL op1, iRegL op2) %{ + match(Set cr (OverflowAddL op1 op2)); + effect(DEF cr, USE op1, USE op2); + // TODO: s390 port size(FIXED_SIZE); + format %{ "AGR $op1,$op2\t # overflow check long" %} + ins_encode %{ + __ z_lgr(Z_R0_scratch, $op1$$Register); + __ z_agr(Z_R0_scratch, $op2$$Register); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct overflowAddL_reg_imm(flagsReg cr, iRegL op1, immL op2) %{ + match(Set cr (OverflowAddL op1 op2)); + effect(DEF cr, USE op1, USE op2); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "AGR $op1,$op2\t # overflow check long" %} + ins_encode %{ + __ load_const_optimized(Z_R0_scratch, $op2$$constant); + __ z_agr(Z_R0_scratch, $op1$$Register); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct overflowSubI_reg_reg(flagsReg cr, iRegI op1, iRegI op2) %{ + match(Set cr (OverflowSubI op1 op2)); + effect(DEF cr, USE op1, USE op2); + // TODO: s390 port size(FIXED_SIZE); + format %{ "SR $op1,$op2\t # overflow check int" %} + ins_encode %{ + __ z_lr(Z_R0_scratch, $op1$$Register); + __ z_sr(Z_R0_scratch, $op2$$Register); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct overflowSubI_reg_imm(flagsReg cr, iRegI op1, immI op2) %{ + match(Set cr (OverflowSubI op1 op2)); + effect(DEF cr, USE op1, USE op2); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "SR $op1,$op2\t # overflow check int" %} + ins_encode %{ + __ load_const_optimized(Z_R1_scratch, $op2$$constant); + __ z_lr(Z_R0_scratch, $op1$$Register); + __ z_sr(Z_R0_scratch, Z_R1_scratch); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct overflowSubL_reg_reg(flagsReg cr, iRegL op1, iRegL op2) %{ + match(Set cr (OverflowSubL op1 op2)); + effect(DEF cr, USE op1, USE op2); + // TODO: s390 port size(FIXED_SIZE); + format %{ "SGR $op1,$op2\t # overflow check long" %} + ins_encode %{ + __ z_lgr(Z_R0_scratch, $op1$$Register); + __ z_sgr(Z_R0_scratch, $op2$$Register); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct overflowSubL_reg_imm(flagsReg cr, iRegL op1, immL op2) %{ + match(Set cr (OverflowSubL op1 op2)); + effect(DEF cr, USE op1, USE op2); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "SGR $op1,$op2\t # overflow check long" %} + ins_encode %{ + __ load_const_optimized(Z_R1_scratch, $op2$$constant); + __ z_lgr(Z_R0_scratch, $op1$$Register); + __ z_sgr(Z_R0_scratch, Z_R1_scratch); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct overflowNegI_rReg(flagsReg cr, immI_0 zero, iRegI op2) %{ + match(Set cr (OverflowSubI zero op2)); + effect(DEF cr, USE op2); + format %{ "NEG $op2\t# overflow check int" %} + ins_encode %{ + __ clear_reg(Z_R0_scratch, false, false); + __ z_sr(Z_R0_scratch, $op2$$Register); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct overflowNegL_rReg(flagsReg cr, immL_0 zero, iRegL op2) %{ + match(Set cr (OverflowSubL zero op2)); + effect(DEF cr, USE op2); + format %{ "NEGG $op2\t# overflow check long" %} + ins_encode %{ + __ clear_reg(Z_R0_scratch, true, false); + __ z_sgr(Z_R0_scratch, $op2$$Register); + %} + ins_pipe(pipe_class_dummy); +%} + +// No intrinsics for multiplication, since there is no easy way +// to check for overflow. + + +//----------Floating Point Arithmetic Instructions----------------------------- + +// ADD + +// Add float single precision +instruct addF_reg_reg(regF dst, regF src, flagsReg cr) %{ + match(Set dst (AddF dst src)); + effect(KILL cr); + ins_cost(ALU_REG_COST); + size(4); + format %{ "AEBR $dst,$src" %} + opcode(AEBR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct addF_reg_mem(regF dst, memoryRX src, flagsReg cr)%{ + match(Set dst (AddF dst (LoadF src))); + effect(KILL cr); + ins_cost(ALU_MEMORY_COST); + size(6); + format %{ "AEB $dst,$src\t # floatMemory" %} + opcode(AEB_ZOPC); + ins_encode(z_form_rt_memFP(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Add float double precision +instruct addD_reg_reg(regD dst, regD src, flagsReg cr) %{ + match(Set dst (AddD dst src)); + effect(KILL cr); + ins_cost(ALU_REG_COST); + size(4); + format %{ "ADBR $dst,$src" %} + opcode(ADBR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct addD_reg_mem(regD dst, memoryRX src, flagsReg cr)%{ + match(Set dst (AddD dst (LoadD src))); + effect(KILL cr); + ins_cost(ALU_MEMORY_COST); + size(6); + format %{ "ADB $dst,$src\t # doubleMemory" %} + opcode(ADB_ZOPC); + ins_encode(z_form_rt_memFP(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// SUB + +// Sub float single precision +instruct subF_reg_reg(regF dst, regF src, flagsReg cr) %{ + match(Set dst (SubF dst src)); + effect(KILL cr); + ins_cost(ALU_REG_COST); + size(4); + format %{ "SEBR $dst,$src" %} + opcode(SEBR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct subF_reg_mem(regF dst, memoryRX src, flagsReg cr)%{ + match(Set dst (SubF dst (LoadF src))); + effect(KILL cr); + ins_cost(ALU_MEMORY_COST); + size(6); + format %{ "SEB $dst,$src\t # floatMemory" %} + opcode(SEB_ZOPC); + ins_encode(z_form_rt_memFP(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Sub float double precision +instruct subD_reg_reg(regD dst, regD src, flagsReg cr) %{ + match(Set dst (SubD dst src)); + effect(KILL cr); + ins_cost(ALU_REG_COST); + size(4); + format %{ "SDBR $dst,$src" %} + opcode(SDBR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct subD_reg_mem(regD dst, memoryRX src, flagsReg cr)%{ + match(Set dst (SubD dst (LoadD src))); + effect(KILL cr); + ins_cost(ALU_MEMORY_COST); + size(6); + format %{ "SDB $dst,$src\t # doubleMemory" %} + opcode(SDB_ZOPC); + ins_encode(z_form_rt_memFP(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// MUL + +// Mul float single precision +instruct mulF_reg_reg(regF dst, regF src) %{ + match(Set dst (MulF dst src)); + // CC unchanged by MUL. + ins_cost(ALU_REG_COST); + size(4); + format %{ "MEEBR $dst,$src" %} + opcode(MEEBR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct mulF_reg_mem(regF dst, memoryRX src)%{ + match(Set dst (MulF dst (LoadF src))); + // CC unchanged by MUL. + ins_cost(ALU_MEMORY_COST); + size(6); + format %{ "MEEB $dst,$src\t # floatMemory" %} + opcode(MEEB_ZOPC); + ins_encode(z_form_rt_memFP(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Mul float double precision +instruct mulD_reg_reg(regD dst, regD src) %{ + match(Set dst (MulD dst src)); + // CC unchanged by MUL. + ins_cost(ALU_REG_COST); + size(4); + format %{ "MDBR $dst,$src" %} + opcode(MDBR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct mulD_reg_mem(regD dst, memoryRX src)%{ + match(Set dst (MulD dst (LoadD src))); + // CC unchanged by MUL. + ins_cost(ALU_MEMORY_COST); + size(6); + format %{ "MDB $dst,$src\t # doubleMemory" %} + opcode(MDB_ZOPC); + ins_encode(z_form_rt_memFP(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// DIV + +// Div float single precision +instruct divF_reg_reg(regF dst, regF src) %{ + match(Set dst (DivF dst src)); + // CC unchanged by DIV. + ins_cost(ALU_REG_COST); + size(4); + format %{ "DEBR $dst,$src" %} + opcode(DEBR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct divF_reg_mem(regF dst, memoryRX src)%{ + match(Set dst (DivF dst (LoadF src))); + // CC unchanged by DIV. + ins_cost(ALU_MEMORY_COST); + size(6); + format %{ "DEB $dst,$src\t # floatMemory" %} + opcode(DEB_ZOPC); + ins_encode(z_form_rt_memFP(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Div float double precision +instruct divD_reg_reg(regD dst, regD src) %{ + match(Set dst (DivD dst src)); + // CC unchanged by DIV. + ins_cost(ALU_REG_COST); + size(4); + format %{ "DDBR $dst,$src" %} + opcode(DDBR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct divD_reg_mem(regD dst, memoryRX src)%{ + match(Set dst (DivD dst (LoadD src))); + // CC unchanged by DIV. + ins_cost(ALU_MEMORY_COST); + size(6); + format %{ "DDB $dst,$src\t # doubleMemory" %} + opcode(DDB_ZOPC); + ins_encode(z_form_rt_memFP(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// ABS + +// Absolute float single precision +instruct absF_reg(regF dst, regF src, flagsReg cr) %{ + match(Set dst (AbsF src)); + effect(KILL cr); + size(4); + format %{ "LPEBR $dst,$src\t float" %} + opcode(LPEBR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Absolute float double precision +instruct absD_reg(regD dst, regD src, flagsReg cr) %{ + match(Set dst (AbsD src)); + effect(KILL cr); + size(4); + format %{ "LPDBR $dst,$src\t double" %} + opcode(LPDBR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// NEG(ABS) + +// Negative absolute float single precision +instruct nabsF_reg(regF dst, regF src, flagsReg cr) %{ + match(Set dst (NegF (AbsF src))); + effect(KILL cr); + size(4); + format %{ "LNEBR $dst,$src\t float" %} + opcode(LNEBR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Negative absolute float double precision +instruct nabsD_reg(regD dst, regD src, flagsReg cr) %{ + match(Set dst (NegD (AbsD src))); + effect(KILL cr); + size(4); + format %{ "LNDBR $dst,$src\t double" %} + opcode(LNDBR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// NEG + +instruct negF_reg(regF dst, regF src, flagsReg cr) %{ + match(Set dst (NegF src)); + effect(KILL cr); + size(4); + format %{ "NegF $dst,$src\t float" %} + ins_encode %{ __ z_lcebr($dst$$FloatRegister, $src$$FloatRegister); %} + ins_pipe(pipe_class_dummy); +%} + +instruct negD_reg(regD dst, regD src, flagsReg cr) %{ + match(Set dst (NegD src)); + effect(KILL cr); + size(4); + format %{ "NegD $dst,$src\t double" %} + ins_encode %{ __ z_lcdbr($dst$$FloatRegister, $src$$FloatRegister); %} + ins_pipe(pipe_class_dummy); +%} + +// SQRT + +// Sqrt float precision +instruct sqrtF_reg(regF dst, regF src) %{ + match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); + // CC remains unchanged. + ins_cost(ALU_REG_COST); + size(4); + format %{ "SQEBR $dst,$src" %} + opcode(SQEBR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Sqrt double precision +instruct sqrtD_reg(regD dst, regD src) %{ + match(Set dst (SqrtD src)); + // CC remains unchanged. + ins_cost(ALU_REG_COST); + size(4); + format %{ "SQDBR $dst,$src" %} + opcode(SQDBR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct sqrtF_mem(regF dst, memoryRX src) %{ + match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); + // CC remains unchanged. + ins_cost(ALU_MEMORY_COST); + size(6); + format %{ "SQEB $dst,$src\t # floatMemory" %} + opcode(SQEB_ZOPC); + ins_encode(z_form_rt_memFP(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct sqrtD_mem(regD dst, memoryRX src) %{ + match(Set dst (SqrtD src)); + // CC remains unchanged. + ins_cost(ALU_MEMORY_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "SQDB $dst,$src\t # doubleMemory" %} + opcode(SQDB_ZOPC); + ins_encode(z_form_rt_memFP(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +//----------Logical Instructions----------------------------------------------- + +// Register And +instruct andI_reg_reg(iRegI dst, iRegI src, flagsReg cr) %{ + match(Set dst (AndI dst src)); + effect(KILL cr); + ins_cost(DEFAULT_COST_LOW); + size(2); + format %{ "NR $dst,$src\t # int" %} + opcode(NR_ZOPC); + ins_encode(z_rrform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct andI_Reg_mem(iRegI dst, memory src, flagsReg cr)%{ + match(Set dst (AndI dst (LoadI src))); + effect(KILL cr); + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "N(Y) $dst, $src\t # int" %} + opcode(NY_ZOPC, N_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Immediate And +instruct andI_reg_uimm32(iRegI dst, uimmI src, flagsReg cr) %{ + match(Set dst (AndI dst src)); + effect(KILL cr); + ins_cost(DEFAULT_COST_HIGH); + size(6); + format %{ "NILF $dst,$src" %} + opcode(NILF_ZOPC); + ins_encode(z_rilform_unsigned(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct andI_reg_uimmI_LH1(iRegI dst, uimmI_LH1 src, flagsReg cr) %{ + match(Set dst (AndI dst src)); + effect(KILL cr); + ins_cost(DEFAULT_COST); + size(4); + format %{ "NILH $dst,$src" %} + ins_encode %{ __ z_nilh($dst$$Register, ($src$$constant >> 16) & 0xFFFF); %} + ins_pipe(pipe_class_dummy); +%} + +instruct andI_reg_uimmI_LL1(iRegI dst, uimmI_LL1 src, flagsReg cr) %{ + match(Set dst (AndI dst src)); + effect(KILL cr); + ins_cost(DEFAULT_COST); + size(4); + format %{ "NILL $dst,$src" %} + ins_encode %{ __ z_nill($dst$$Register, $src$$constant & 0xFFFF); %} + ins_pipe(pipe_class_dummy); +%} + +// Register And Long +instruct andL_reg_reg(iRegL dst, iRegL src, flagsReg cr) %{ + match(Set dst (AndL dst src)); + effect(KILL cr); + ins_cost(DEFAULT_COST); + size(4); + format %{ "NGR $dst,$src\t # long" %} + opcode(NGR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct andL_Reg_mem(iRegL dst, memory src, flagsReg cr)%{ + match(Set dst (AndL dst (LoadL src))); + effect(KILL cr); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "NG $dst, $src\t # long" %} + opcode(NG_ZOPC, NG_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct andL_reg_uimmL_LL1(iRegL dst, uimmL_LL1 src, flagsReg cr) %{ + match(Set dst (AndL dst src)); + effect(KILL cr); + ins_cost(DEFAULT_COST); + size(4); + format %{ "NILL $dst,$src\t # long" %} + ins_encode %{ __ z_nill($dst$$Register, $src$$constant & 0xFFFF); %} + ins_pipe(pipe_class_dummy); +%} + +instruct andL_reg_uimmL_LH1(iRegL dst, uimmL_LH1 src, flagsReg cr) %{ + match(Set dst (AndL dst src)); + effect(KILL cr); + ins_cost(DEFAULT_COST); + size(4); + format %{ "NILH $dst,$src\t # long" %} + ins_encode %{ __ z_nilh($dst$$Register, ($src$$constant >> 16) & 0xFFFF); %} + ins_pipe(pipe_class_dummy); +%} + +instruct andL_reg_uimmL_HL1(iRegL dst, uimmL_HL1 src, flagsReg cr) %{ + match(Set dst (AndL dst src)); + effect(KILL cr); + ins_cost(DEFAULT_COST); + size(4); + format %{ "NIHL $dst,$src\t # long" %} + ins_encode %{ __ z_nihl($dst$$Register, ($src$$constant >> 32) & 0xFFFF); %} + ins_pipe(pipe_class_dummy); +%} + +instruct andL_reg_uimmL_HH1(iRegL dst, uimmL_HH1 src, flagsReg cr) %{ + match(Set dst (AndL dst src)); + effect(KILL cr); + ins_cost(DEFAULT_COST); + size(4); + format %{ "NIHH $dst,$src\t # long" %} + ins_encode %{ __ z_nihh($dst$$Register, ($src$$constant >> 48) & 0xFFFF); %} + ins_pipe(pipe_class_dummy); +%} + +// OR + +// Or Instructions +// Register Or +instruct orI_reg_reg(iRegI dst, iRegI src, flagsReg cr) %{ + match(Set dst (OrI dst src)); + effect(KILL cr); + size(2); + format %{ "OR $dst,$src" %} + opcode(OR_ZOPC); + ins_encode(z_rrform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct orI_Reg_mem(iRegI dst, memory src, flagsReg cr)%{ + match(Set dst (OrI dst (LoadI src))); + effect(KILL cr); + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "O(Y) $dst, $src\t # int" %} + opcode(OY_ZOPC, O_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Immediate Or +instruct orI_reg_uimm16(iRegI dst, uimmI16 con, flagsReg cr) %{ + match(Set dst (OrI dst con)); + effect(KILL cr); + size(4); + format %{ "OILL $dst,$con" %} + opcode(OILL_ZOPC); + ins_encode(z_riform_unsigned(dst,con)); + ins_pipe(pipe_class_dummy); +%} + +instruct orI_reg_uimm32(iRegI dst, uimmI con, flagsReg cr) %{ + match(Set dst (OrI dst con)); + effect(KILL cr); + ins_cost(DEFAULT_COST_HIGH); + size(6); + format %{ "OILF $dst,$con" %} + opcode(OILF_ZOPC); + ins_encode(z_rilform_unsigned(dst,con)); + ins_pipe(pipe_class_dummy); +%} + +// Register Or Long +instruct orL_reg_reg(iRegL dst, iRegL src, flagsReg cr) %{ + match(Set dst (OrL dst src)); + effect(KILL cr); + ins_cost(DEFAULT_COST); + size(4); + format %{ "OGR $dst,$src\t # long" %} + opcode(OGR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct orL_Reg_mem(iRegL dst, memory src, flagsReg cr)%{ + match(Set dst (OrL dst (LoadL src))); + effect(KILL cr); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "OG $dst, $src\t # long" %} + opcode(OG_ZOPC, OG_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Immediate Or long +instruct orL_reg_uimm16(iRegL dst, uimmL16 con, flagsReg cr) %{ + match(Set dst (OrL dst con)); + effect(KILL cr); + ins_cost(DEFAULT_COST); + size(4); + format %{ "OILL $dst,$con\t # long" %} + opcode(OILL_ZOPC); + ins_encode(z_riform_unsigned(dst,con)); + ins_pipe(pipe_class_dummy); +%} + +instruct orL_reg_uimm32(iRegI dst, uimmL32 con, flagsReg cr) %{ + match(Set dst (OrI dst con)); + effect(KILL cr); + ins_cost(DEFAULT_COST_HIGH); + // TODO: s390 port size(FIXED_SIZE); + format %{ "OILF $dst,$con\t # long" %} + opcode(OILF_ZOPC); + ins_encode(z_rilform_unsigned(dst,con)); + ins_pipe(pipe_class_dummy); +%} + +// XOR + +// Register Xor +instruct xorI_reg_reg(iRegI dst, iRegI src, flagsReg cr) %{ + match(Set dst (XorI dst src)); + effect(KILL cr); + size(2); + format %{ "XR $dst,$src" %} + opcode(XR_ZOPC); + ins_encode(z_rrform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct xorI_Reg_mem(iRegI dst, memory src, flagsReg cr)%{ + match(Set dst (XorI dst (LoadI src))); + effect(KILL cr); + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "X(Y) $dst, $src\t # int" %} + opcode(XY_ZOPC, X_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Immediate Xor +instruct xorI_reg_uimm32(iRegI dst, uimmI src, flagsReg cr) %{ + match(Set dst (XorI dst src)); + effect(KILL cr); + ins_cost(DEFAULT_COST_HIGH); + size(6); + format %{ "XILF $dst,$src" %} + opcode(XILF_ZOPC); + ins_encode(z_rilform_unsigned(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Register Xor Long +instruct xorL_reg_reg(iRegL dst, iRegL src, flagsReg cr) %{ + match(Set dst (XorL dst src)); + effect(KILL cr); + ins_cost(DEFAULT_COST); + size(4); + format %{ "XGR $dst,$src\t # long" %} + opcode(XGR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct xorL_Reg_mem(iRegL dst, memory src, flagsReg cr)%{ + match(Set dst (XorL dst (LoadL src))); + effect(KILL cr); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "XG $dst, $src\t # long" %} + opcode(XG_ZOPC, XG_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Immediate Xor Long +instruct xorL_reg_uimm32(iRegL dst, uimmL32 con, flagsReg cr) %{ + match(Set dst (XorL dst con)); + effect(KILL cr); + ins_cost(DEFAULT_COST_HIGH); + size(6); + format %{ "XILF $dst,$con\t # long" %} + opcode(XILF_ZOPC); + ins_encode(z_rilform_unsigned(dst,con)); + ins_pipe(pipe_class_dummy); +%} + +//----------Convert to Boolean------------------------------------------------- + +// Convert integer to boolean. +instruct convI2B(iRegI dst, iRegI src, flagsReg cr) %{ + match(Set dst (Conv2B src)); + effect(KILL cr); + ins_cost(3 * DEFAULT_COST); + size(6); + format %{ "convI2B $dst,$src" %} + ins_encode %{ + __ z_lnr($dst$$Register, $src$$Register); // Rdst := -|Rsrc|, i.e. Rdst == 0 <=> Rsrc == 0 + __ z_srl($dst$$Register, 31); // Rdst := sign(Rdest) + %} + ins_pipe(pipe_class_dummy); +%} + +instruct convP2B(iRegI dst, iRegP_N2P src, flagsReg cr) %{ + match(Set dst (Conv2B src)); + effect(KILL cr); + ins_cost(3 * DEFAULT_COST); + size(10); + format %{ "convP2B $dst,$src" %} + ins_encode %{ + __ z_lngr($dst$$Register, $src$$Register); // Rdst := -|Rsrc| i.e. Rdst == 0 <=> Rsrc == 0 + __ z_srlg($dst$$Register, $dst$$Register, 63); // Rdst := sign(Rdest) + %} + ins_pipe(pipe_class_dummy); +%} + +instruct cmpLTMask_reg_reg(iRegI dst, iRegI src, flagsReg cr) %{ + match(Set dst (CmpLTMask dst src)); + effect(KILL cr); + ins_cost(2 * DEFAULT_COST); + size(18); + format %{ "Set $dst CmpLTMask $dst,$src" %} + ins_encode %{ + // Avoid signed 32 bit overflow: Do sign extend and sub 64 bit. + __ z_lgfr(Z_R0_scratch, $src$$Register); + __ z_lgfr($dst$$Register, $dst$$Register); + __ z_sgr($dst$$Register, Z_R0_scratch); + __ z_srag($dst$$Register, $dst$$Register, 63); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct cmpLTMask_reg_zero(iRegI dst, immI_0 zero, flagsReg cr) %{ + match(Set dst (CmpLTMask dst zero)); + effect(KILL cr); + ins_cost(DEFAULT_COST); + size(4); + format %{ "Set $dst CmpLTMask $dst,$zero" %} + ins_encode %{ __ z_sra($dst$$Register, 31); %} + ins_pipe(pipe_class_dummy); +%} + + +//----------Arithmetic Conversion Instructions--------------------------------- +// The conversions operations are all Alpha sorted. Please keep it that way! + +instruct convD2F_reg(regF dst, regD src) %{ + match(Set dst (ConvD2F src)); + // CC remains unchanged. + size(4); + format %{ "LEDBR $dst,$src" %} + opcode(LEDBR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct convF2I_reg(iRegI dst, regF src, flagsReg cr) %{ + match(Set dst (ConvF2I src)); + effect(KILL cr); + ins_cost(2 * DEFAULT_COST + BRANCH_COST); + size(16); + format %{ "convF2I $dst,$src" %} + ins_encode %{ + Label done; + __ clear_reg($dst$$Register, false, false); // Initialize with result for unordered: 0. + __ z_cebr($src$$FloatRegister, $src$$FloatRegister); // Round. + __ z_brno(done); // Result is zero if unordered argument. + __ z_cfebr($dst$$Register, $src$$FloatRegister, Assembler::to_zero); + __ bind(done); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct convD2I_reg(iRegI dst, regD src, flagsReg cr) %{ + match(Set dst (ConvD2I src)); + effect(KILL cr); + ins_cost(2 * DEFAULT_COST + BRANCH_COST); + size(16); + format %{ "convD2I $dst,$src" %} + ins_encode %{ + Label done; + __ clear_reg($dst$$Register, false, false); // Initialize with result for unordered: 0. + __ z_cdbr($src$$FloatRegister, $src$$FloatRegister); // Round. + __ z_brno(done); // Result is zero if unordered argument. + __ z_cfdbr($dst$$Register, $src$$FloatRegister, Assembler::to_zero); + __ bind(done); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct convF2L_reg(iRegL dst, regF src, flagsReg cr) %{ + match(Set dst (ConvF2L src)); + effect(KILL cr); + ins_cost(2 * DEFAULT_COST + BRANCH_COST); + size(16); + format %{ "convF2L $dst,$src" %} + ins_encode %{ + Label done; + __ clear_reg($dst$$Register, true, false); // Initialize with result for unordered: 0. + __ z_cebr($src$$FloatRegister, $src$$FloatRegister); // Round. + __ z_brno(done); // Result is zero if unordered argument. + __ z_cgebr($dst$$Register, $src$$FloatRegister, Assembler::to_zero); + __ bind(done); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct convD2L_reg(iRegL dst, regD src, flagsReg cr) %{ + match(Set dst (ConvD2L src)); + effect(KILL cr); + ins_cost(2 * DEFAULT_COST + BRANCH_COST); + size(16); + format %{ "convD2L $dst,$src" %} + ins_encode %{ + Label done; + __ clear_reg($dst$$Register, true, false); // Initialize with result for unordered: 0. + __ z_cdbr($src$$FloatRegister, $src$$FloatRegister); // Round. + __ z_brno(done); // Result is zero if unordered argument. + __ z_cgdbr($dst$$Register, $src$$FloatRegister, Assembler::to_zero); + __ bind(done); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct convF2D_reg(regD dst, regF src) %{ + match(Set dst (ConvF2D src)); + // CC remains unchanged. + size(4); + format %{ "LDEBR $dst,$src" %} + opcode(LDEBR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct convF2D_mem(regD dst, memoryRX src) %{ + match(Set dst (ConvF2D src)); + // CC remains unchanged. + size(6); + format %{ "LDEB $dst,$src" %} + opcode(LDEB_ZOPC); + ins_encode(z_form_rt_memFP(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct convI2D_reg(regD dst, iRegI src) %{ + match(Set dst (ConvI2D src)); + // CC remains unchanged. + ins_cost(DEFAULT_COST); + size(4); + format %{ "CDFBR $dst,$src" %} + opcode(CDFBR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Optimization that saves up to two memory operations for each conversion. +instruct convI2F_ireg(regF dst, iRegI src) %{ + match(Set dst (ConvI2F src)); + // CC remains unchanged. + ins_cost(DEFAULT_COST); + size(4); + format %{ "CEFBR $dst,$src\t # convert int to float" %} + opcode(CEFBR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct convI2L_reg(iRegL dst, iRegI src) %{ + match(Set dst (ConvI2L src)); + size(4); + format %{ "LGFR $dst,$src\t # int->long" %} + opcode(LGFR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Zero-extend convert int to long. +instruct convI2L_reg_zex(iRegL dst, iRegI src, immL_32bits mask) %{ + match(Set dst (AndL (ConvI2L src) mask)); + size(4); + format %{ "LLGFR $dst, $src \t # zero-extend int to long" %} + ins_encode %{ __ z_llgfr($dst$$Register, $src$$Register); %} + ins_pipe(pipe_class_dummy); +%} + +// Zero-extend convert int to long. +instruct convI2L_mem_zex(iRegL dst, memory src, immL_32bits mask) %{ + match(Set dst (AndL (ConvI2L (LoadI src)) mask)); + // Uses load_const_optmized, so size can vary. + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "LLGF $dst, $src \t # zero-extend int to long" %} + opcode(LLGF_ZOPC, LLGF_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Zero-extend long +instruct zeroExtend_long(iRegL dst, iRegL src, immL_32bits mask) %{ + match(Set dst (AndL src mask)); + size(4); + format %{ "LLGFR $dst, $src \t # zero-extend long to long" %} + ins_encode %{ __ z_llgfr($dst$$Register, $src$$Register); %} + ins_pipe(pipe_class_dummy); +%} + +instruct rShiftI16_lShiftI16_reg(iRegI dst, iRegI src, immI_16 amount) %{ + match(Set dst (RShiftI (LShiftI src amount) amount)); + size(4); + format %{ "LHR $dst,$src\t short->int" %} + opcode(LHR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct rShiftI24_lShiftI24_reg(iRegI dst, iRegI src, immI_24 amount) %{ + match(Set dst (RShiftI (LShiftI src amount) amount)); + size(4); + format %{ "LBR $dst,$src\t byte->int" %} + opcode(LBR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct MoveF2I_stack_reg(iRegI dst, stackSlotF src) %{ + match(Set dst (MoveF2I src)); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "L $dst,$src\t # MoveF2I" %} + opcode(L_ZOPC); + ins_encode(z_form_rt_mem(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// javax.imageio.stream.ImageInputStreamImpl.toFloats([B[FII) +instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{ + match(Set dst (MoveI2F src)); + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "LE $dst,$src\t # MoveI2F" %} + opcode(LE_ZOPC); + ins_encode(z_form_rt_mem(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct MoveD2L_stack_reg(iRegL dst, stackSlotD src) %{ + match(Set dst (MoveD2L src)); + ins_cost(MEMORY_REF_COST); + size(6); + format %{ "LG $src,$dst\t # MoveD2L" %} + opcode(LG_ZOPC); + ins_encode(z_form_rt_mem(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{ + match(Set dst (MoveL2D src)); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "LD $dst,$src\t # MoveL2D" %} + opcode(LD_ZOPC); + ins_encode(z_form_rt_mem(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{ + match(Set dst (MoveI2F src)); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "ST $src,$dst\t # MoveI2F" %} + opcode(ST_ZOPC); + ins_encode(z_form_rt_mem(src, dst)); + ins_pipe(pipe_class_dummy); +%} + +instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{ + match(Set dst (MoveD2L src)); + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STD $src,$dst\t # MoveD2L" %} + opcode(STD_ZOPC); + ins_encode(z_form_rt_mem(src,dst)); + ins_pipe(pipe_class_dummy); +%} + +instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{ + match(Set dst (MoveL2D src)); + ins_cost(MEMORY_REF_COST); + size(6); + format %{ "STG $src,$dst\t # MoveL2D" %} + opcode(STG_ZOPC); + ins_encode(z_form_rt_mem(src,dst)); + ins_pipe(pipe_class_dummy); +%} + +instruct convL2F_reg(regF dst, iRegL src) %{ + match(Set dst (ConvL2F src)); + // CC remains unchanged. + ins_cost(DEFAULT_COST); + size(4); + format %{ "CEGBR $dst,$src" %} + opcode(CEGBR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct convL2D_reg(regD dst, iRegL src) %{ + match(Set dst (ConvL2D src)); + // CC remains unchanged. + ins_cost(DEFAULT_COST); + size(4); + format %{ "CDGBR $dst,$src" %} + opcode(CDGBR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct convL2I_reg(iRegI dst, iRegL src) %{ + match(Set dst (ConvL2I src)); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "LR $dst,$src\t # long->int (if needed)" %} + ins_encode %{ __ lr_if_needed($dst$$Register, $src$$Register); %} + ins_pipe(pipe_class_dummy); +%} + +// Register Shift Right Immediate +instruct shrL_reg_imm6_L2I(iRegI dst, iRegL src, immI_32_63 cnt, flagsReg cr) %{ + match(Set dst (ConvL2I (RShiftL src cnt))); + effect(KILL cr); + size(6); + format %{ "SRAG $dst,$src,$cnt" %} + opcode(SRAG_ZOPC); + ins_encode(z_rsyform_const(dst, src, cnt)); + ins_pipe(pipe_class_dummy); +%} + +//----------TRAP based zero checks and range checks---------------------------- + +// SIGTRAP based implicit range checks in compiled code. +// A range check in the ideal world has one of the following shapes: +// - (If le (CmpU length index)), (IfTrue throw exception) +// - (If lt (CmpU index length)), (IfFalse throw exception) +// +// Match range check 'If le (CmpU length index)' +instruct rangeCheck_iReg_uimmI16(cmpOpT cmp, iRegI length, uimmI16 index, label labl) %{ + match(If cmp (CmpU length index)); + effect(USE labl); + predicate(TrapBasedRangeChecks && + _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le && + PROB_UNLIKELY(_leaf->as_If ()->_prob) >= PROB_ALWAYS && + Matcher::branches_to_uncommon_trap(_leaf)); + ins_cost(1); + // TODO: s390 port size(FIXED_SIZE); + + ins_is_TrapBasedCheckNode(true); + + format %{ "RangeCheck len=$length cmp=$cmp idx=$index => trap $labl" %} + ins_encode %{ __ z_clfit($length$$Register, $index$$constant, $cmp$$cmpcode); %} + ins_pipe(pipe_class_trap); +%} + +// Match range check 'If lt (CmpU index length)' +instruct rangeCheck_iReg_iReg(cmpOpT cmp, iRegI index, iRegI length, label labl, flagsReg cr) %{ + match(If cmp (CmpU index length)); + effect(USE labl, KILL cr); + predicate(TrapBasedRangeChecks && + _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt && + _leaf->as_If ()->_prob >= PROB_ALWAYS && + Matcher::branches_to_uncommon_trap(_leaf)); + ins_cost(1); + // TODO: s390 port size(FIXED_SIZE); + + ins_is_TrapBasedCheckNode(true); + + format %{ "RangeCheck idx=$index cmp=$cmp len=$length => trap $labl" %} + ins_encode %{ __ z_clrt($index$$Register, $length$$Register, $cmp$$cmpcode); %} + ins_pipe(pipe_class_trap); +%} + +// Match range check 'If lt (CmpU index length)' +instruct rangeCheck_uimmI16_iReg(cmpOpT cmp, iRegI index, uimmI16 length, label labl) %{ + match(If cmp (CmpU index length)); + effect(USE labl); + predicate(TrapBasedRangeChecks && + _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt && + _leaf->as_If ()->_prob >= PROB_ALWAYS && + Matcher::branches_to_uncommon_trap(_leaf)); + ins_cost(1); + // TODO: s390 port size(FIXED_SIZE); + + ins_is_TrapBasedCheckNode(true); + + format %{ "RangeCheck idx=$index cmp=$cmp len= $length => trap $labl" %} + ins_encode %{ __ z_clfit($index$$Register, $length$$constant, $cmp$$cmpcode); %} + ins_pipe(pipe_class_trap); +%} + +// Implicit zero checks (more implicit null checks). +instruct zeroCheckP_iReg_imm0(cmpOpT cmp, iRegP_N2P value, immP0 zero, label labl) %{ + match(If cmp (CmpP value zero)); + effect(USE labl); + predicate(TrapBasedNullChecks && + _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne && + _leaf->as_If ()->_prob >= PROB_LIKELY_MAG(4) && + Matcher::branches_to_uncommon_trap(_leaf)); + size(6); + + ins_is_TrapBasedCheckNode(true); + + format %{ "ZeroCheckP value=$value cmp=$cmp zero=$zero => trap $labl" %} + ins_encode %{ __ z_cgit($value$$Register, 0, $cmp$$cmpcode); %} + ins_pipe(pipe_class_trap); +%} + +// Implicit zero checks (more implicit null checks). +instruct zeroCheckN_iReg_imm0(cmpOpT cmp, iRegN_P2N value, immN0 zero, label labl) %{ + match(If cmp (CmpN value zero)); + effect(USE labl); + predicate(TrapBasedNullChecks && + _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne && + _leaf->as_If ()->_prob >= PROB_LIKELY_MAG(4) && + Matcher::branches_to_uncommon_trap(_leaf)); + size(6); + + ins_is_TrapBasedCheckNode(true); + + format %{ "ZeroCheckN value=$value cmp=$cmp zero=$zero => trap $labl" %} + ins_encode %{ __ z_cit($value$$Register, 0, $cmp$$cmpcode); %} + ins_pipe(pipe_class_trap); +%} + +//----------Compare instructions----------------------------------------------- + +// INT signed + +// Compare Integers +instruct compI_reg_reg(flagsReg cr, iRegI op1, iRegI op2) %{ + match(Set cr (CmpI op1 op2)); + size(2); + format %{ "CR $op1,$op2" %} + opcode(CR_ZOPC); + ins_encode(z_rrform(op1, op2)); + ins_pipe(pipe_class_dummy); +%} + +instruct compI_reg_imm(flagsReg cr, iRegI op1, immI op2) %{ + match(Set cr (CmpI op1 op2)); + size(6); + format %{ "CFI $op1,$op2" %} + opcode(CFI_ZOPC); + ins_encode(z_rilform_signed(op1, op2)); + ins_pipe(pipe_class_dummy); +%} + +instruct compI_reg_imm16(flagsReg cr, iRegI op1, immI16 op2) %{ + match(Set cr (CmpI op1 op2)); + size(4); + format %{ "CHI $op1,$op2" %} + opcode(CHI_ZOPC); + ins_encode(z_riform_signed(op1, op2)); + ins_pipe(pipe_class_dummy); +%} + +instruct compI_reg_imm0(flagsReg cr, iRegI op1, immI_0 zero) %{ + match(Set cr (CmpI op1 zero)); + ins_cost(DEFAULT_COST_LOW); + size(2); + format %{ "LTR $op1,$op1" %} + opcode(LTR_ZOPC); + ins_encode(z_rrform(op1, op1)); + ins_pipe(pipe_class_dummy); +%} + +instruct compI_reg_mem(flagsReg cr, iRegI op1, memory op2)%{ + match(Set cr (CmpI op1 (LoadI op2))); + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "C(Y) $op1, $op2\t # int" %} + opcode(CY_ZOPC, C_ZOPC); + ins_encode(z_form_rt_mem_opt(op1, op2)); + ins_pipe(pipe_class_dummy); +%} + +// INT unsigned + +instruct compU_reg_reg(flagsReg cr, iRegI op1, iRegI op2) %{ + match(Set cr (CmpU op1 op2)); + size(2); + format %{ "CLR $op1,$op2\t # unsigned" %} + opcode(CLR_ZOPC); + ins_encode(z_rrform(op1, op2)); + ins_pipe(pipe_class_dummy); +%} + +instruct compU_reg_uimm(flagsReg cr, iRegI op1, uimmI op2) %{ + match(Set cr (CmpU op1 op2)); + size(6); + format %{ "CLFI $op1,$op2\t # unsigned" %} + opcode(CLFI_ZOPC); + ins_encode(z_rilform_unsigned(op1, op2)); + ins_pipe(pipe_class_dummy); +%} + +instruct compU_reg_imm0(flagsReg cr, iRegI op1, immI_0 zero) %{ + match(Set cr (CmpU op1 zero)); + ins_cost(DEFAULT_COST_LOW); + size(2); + format %{ "LTR $op1,$op1\t # unsigned" %} + opcode(LTR_ZOPC); + ins_encode(z_rrform(op1, op1)); + ins_pipe(pipe_class_dummy); +%} + +instruct compU_reg_mem(flagsReg cr, iRegI op1, memory op2)%{ + match(Set cr (CmpU op1 (LoadI op2))); + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "CL(Y) $op1, $op2\t # unsigned" %} + opcode(CLY_ZOPC, CL_ZOPC); + ins_encode(z_form_rt_mem_opt(op1, op2)); + ins_pipe(pipe_class_dummy); +%} + +// LONG signed + +instruct compL_reg_reg(flagsReg cr, iRegL op1, iRegL op2) %{ + match(Set cr (CmpL op1 op2)); + size(4); + format %{ "CGR $op1,$op2\t # long" %} + opcode(CGR_ZOPC); + ins_encode(z_rreform(op1, op2)); + ins_pipe(pipe_class_dummy); +%} + +instruct compL_reg_regI(flagsReg cr, iRegL op1, iRegI op2) %{ + match(Set cr (CmpL op1 (ConvI2L op2))); + size(4); + format %{ "CGFR $op1,$op2\t # long/int" %} + opcode(CGFR_ZOPC); + ins_encode(z_rreform(op1, op2)); + ins_pipe(pipe_class_dummy); +%} + +instruct compL_reg_imm32(flagsReg cr, iRegL op1, immL32 con) %{ + match(Set cr (CmpL op1 con)); + size(6); + format %{ "CGFI $op1,$con" %} + opcode(CGFI_ZOPC); + ins_encode(z_rilform_signed(op1, con)); + ins_pipe(pipe_class_dummy); +%} + +instruct compL_reg_imm16(flagsReg cr, iRegL op1, immL16 con) %{ + match(Set cr (CmpL op1 con)); + size(4); + format %{ "CGHI $op1,$con" %} + opcode(CGHI_ZOPC); + ins_encode(z_riform_signed(op1, con)); + ins_pipe(pipe_class_dummy); +%} + +instruct compL_reg_imm0(flagsReg cr, iRegL op1, immL_0 con) %{ + match(Set cr (CmpL op1 con)); + ins_cost(DEFAULT_COST_LOW); + size(4); + format %{ "LTGR $op1,$op1" %} + opcode(LTGR_ZOPC); + ins_encode(z_rreform(op1, op1)); + ins_pipe(pipe_class_dummy); +%} + +instruct compL_conv_reg_imm0(flagsReg cr, iRegI op1, immL_0 con) %{ + match(Set cr (CmpL (ConvI2L op1) con)); + ins_cost(DEFAULT_COST_LOW); + size(4); + format %{ "LTGFR $op1,$op1" %} + opcode(LTGFR_ZOPC); + ins_encode(z_rreform(op1, op1)); + ins_pipe(pipe_class_dummy); +%} + +instruct compL_reg_mem(iRegL dst, memory src, flagsReg cr)%{ + match(Set cr (CmpL dst (LoadL src))); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "CG $dst, $src\t # long" %} + opcode(CG_ZOPC, CG_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct compL_reg_memI(iRegL dst, memory src, flagsReg cr)%{ + match(Set cr (CmpL dst (ConvI2L (LoadI src)))); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "CGF $dst, $src\t # long/int" %} + opcode(CGF_ZOPC, CGF_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// LONG unsigned + +// PTR unsigned + +instruct compP_reg_reg(flagsReg cr, iRegP_N2P op1, iRegP_N2P op2) %{ + match(Set cr (CmpP op1 op2)); + size(4); + format %{ "CLGR $op1,$op2\t # ptr" %} + opcode(CLGR_ZOPC); + ins_encode(z_rreform(op1, op2)); + ins_pipe(pipe_class_dummy); +%} + +instruct compP_reg_imm0(flagsReg cr, iRegP_N2P op1, immP0 op2) %{ + match(Set cr (CmpP op1 op2)); + ins_cost(DEFAULT_COST_LOW); + size(4); + format %{ "LTGR $op1, $op1\t # ptr" %} + opcode(LTGR_ZOPC); + ins_encode(z_rreform(op1, op1)); + ins_pipe(pipe_class_dummy); +%} + +// Don't use LTGFR which performs sign extend. +instruct compP_decode_reg_imm0(flagsReg cr, iRegN op1, immP0 op2) %{ + match(Set cr (CmpP (DecodeN op1) op2)); + predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); + ins_cost(DEFAULT_COST_LOW); + size(2); + format %{ "LTR $op1, $op1\t # ptr" %} + opcode(LTR_ZOPC); + ins_encode(z_rrform(op1, op1)); + ins_pipe(pipe_class_dummy); +%} + +instruct compP_reg_mem(iRegP dst, memory src, flagsReg cr)%{ + match(Set cr (CmpP dst (LoadP src))); + ins_cost(MEMORY_REF_COST); + size(Z_DISP3_SIZE); + format %{ "CLG $dst, $src\t # ptr" %} + opcode(CLG_ZOPC, CLG_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +//----------Max and Min-------------------------------------------------------- + +// Max Register with Register +instruct z196_minI_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{ + match(Set dst (MinI src1 src2)); + effect(KILL cr); + predicate(VM_Version::has_LoadStoreConditional()); + ins_cost(3 * DEFAULT_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MinI $dst $src1,$src2\t MinI (z196 only)" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc1 = $src1$$Register; + Register Rsrc2 = $src2$$Register; + + if (Rsrc1 == Rsrc2) { + if (Rdst != Rsrc1) { + __ z_lgfr(Rdst, Rsrc1); + } + } else if (Rdst == Rsrc1) { // Rdst preset with src1. + __ z_cr(Rsrc1, Rsrc2); // Move src2 only if src1 is NotLow. + __ z_locr(Rdst, Rsrc2, Assembler::bcondNotLow); + } else if (Rdst == Rsrc2) { // Rdst preset with src2. + __ z_cr(Rsrc2, Rsrc1); // Move src1 only if src2 is NotLow. + __ z_locr(Rdst, Rsrc1, Assembler::bcondNotLow); + } else { + // Rdst is disjoint from operands, move in either case. + __ z_cr(Rsrc1, Rsrc2); + __ z_locr(Rdst, Rsrc2, Assembler::bcondNotLow); + __ z_locr(Rdst, Rsrc1, Assembler::bcondLow); + } + %} + ins_pipe(pipe_class_dummy); +%} + +// Min Register with Register. +instruct z10_minI_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{ + match(Set dst (MinI src1 src2)); + effect(KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(2 * DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MinI $dst $src1,$src2\t MinI (z10 only)" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc1 = $src1$$Register; + Register Rsrc2 = $src2$$Register; + Label done; + + if (Rsrc1 == Rsrc2) { + if (Rdst != Rsrc1) { + __ z_lgfr(Rdst, Rsrc1); + } + } else if (Rdst == Rsrc1) { + __ z_crj(Rsrc1, Rsrc2, Assembler::bcondLow, done); + __ z_lgfr(Rdst, Rsrc2); + } else if (Rdst == Rsrc2) { + __ z_crj(Rsrc2, Rsrc1, Assembler::bcondLow, done); + __ z_lgfr(Rdst, Rsrc1); + } else { + __ z_lgfr(Rdst, Rsrc1); + __ z_crj(Rsrc1, Rsrc2, Assembler::bcondLow, done); + __ z_lgfr(Rdst, Rsrc2); + } + __ bind(done); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct minI_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{ + match(Set dst (MinI src1 src2)); + effect(KILL cr); + predicate(!VM_Version::has_CompareBranch()); + ins_cost(3 * DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MinI $dst $src1,$src2\t MinI" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc1 = $src1$$Register; + Register Rsrc2 = $src2$$Register; + Label done; + + if (Rsrc1 == Rsrc2) { + if (Rdst != Rsrc1) { + __ z_lgfr(Rdst, Rsrc1); + } + } else if (Rdst == Rsrc1) { + __ z_cr(Rsrc1, Rsrc2); + __ z_brl(done); + __ z_lgfr(Rdst, Rsrc2); + } else if (Rdst == Rsrc2) { + __ z_cr(Rsrc2, Rsrc1); + __ z_brl(done); + __ z_lgfr(Rdst, Rsrc1); + } else { + __ z_lgfr(Rdst, Rsrc1); + __ z_cr(Rsrc1, Rsrc2); + __ z_brl(done); + __ z_lgfr(Rdst, Rsrc2); + } + __ bind(done); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct z196_minI_reg_imm32(iRegI dst, iRegI src1, immI src2, flagsReg cr) %{ + match(Set dst (MinI src1 src2)); + effect(KILL cr); + predicate(VM_Version::has_LoadStoreConditional()); + ins_cost(3 * DEFAULT_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MinI $dst $src1,$src2\t MinI const32 (z196 only)" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc1 = $src1$$Register; + int Isrc2 = $src2$$constant; + + if (Rdst == Rsrc1) { + __ load_const_optimized(Z_R0_scratch, Isrc2); + __ z_cfi(Rsrc1, Isrc2); + __ z_locr(Rdst, Z_R0_scratch, Assembler::bcondNotLow); + } else { + __ load_const_optimized(Rdst, Isrc2); + __ z_cfi(Rsrc1, Isrc2); + __ z_locr(Rdst, Rsrc1, Assembler::bcondLow); + } + %} + ins_pipe(pipe_class_dummy); +%} + +instruct minI_reg_imm32(iRegI dst, iRegI src1, immI src2, flagsReg cr) %{ + match(Set dst (MinI src1 src2)); + effect(KILL cr); + ins_cost(2 * DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MinI $dst $src1,$src2\t MinI const32" %} + ins_encode %{ + Label done; + if ($dst$$Register != $src1$$Register) { + __ z_lgfr($dst$$Register, $src1$$Register); + } + __ z_cfi($src1$$Register, $src2$$constant); + __ z_brl(done); + __ z_lgfi($dst$$Register, $src2$$constant); + __ bind(done); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct z196_minI_reg_imm16(iRegI dst, iRegI src1, immI16 src2, flagsReg cr) %{ + match(Set dst (MinI src1 src2)); + effect(KILL cr); + predicate(VM_Version::has_LoadStoreConditional()); + ins_cost(3 * DEFAULT_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MinI $dst $src1,$src2\t MinI const16 (z196 only)" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc1 = $src1$$Register; + int Isrc2 = $src2$$constant; + + if (Rdst == Rsrc1) { + __ load_const_optimized(Z_R0_scratch, Isrc2); + __ z_chi(Rsrc1, Isrc2); + __ z_locr(Rdst, Z_R0_scratch, Assembler::bcondNotLow); + } else { + __ load_const_optimized(Rdst, Isrc2); + __ z_chi(Rsrc1, Isrc2); + __ z_locr(Rdst, Rsrc1, Assembler::bcondLow); + } + %} + ins_pipe(pipe_class_dummy); +%} + +instruct minI_reg_imm16(iRegI dst, iRegI src1, immI16 src2, flagsReg cr) %{ + match(Set dst (MinI src1 src2)); + effect(KILL cr); + ins_cost(2 * DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MinI $dst $src1,$src2\t MinI const16" %} + ins_encode %{ + Label done; + if ($dst$$Register != $src1$$Register) { + __ z_lgfr($dst$$Register, $src1$$Register); + } + __ z_chi($src1$$Register, $src2$$constant); + __ z_brl(done); + __ z_lghi($dst$$Register, $src2$$constant); + __ bind(done); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct z10_minI_reg_imm8(iRegI dst, iRegI src1, immI8 src2, flagsReg cr) %{ + match(Set dst (MinI src1 src2)); + effect(KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MinI $dst $src1,$src2\t MinI const8 (z10 only)" %} + ins_encode %{ + Label done; + if ($dst$$Register != $src1$$Register) { + __ z_lgfr($dst$$Register, $src1$$Register); + } + __ z_cij($src1$$Register, $src2$$constant, Assembler::bcondLow, done); + __ z_lghi($dst$$Register, $src2$$constant); + __ bind(done); + %} + ins_pipe(pipe_class_dummy); +%} + +// Max Register with Register +instruct z196_maxI_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{ + match(Set dst (MaxI src1 src2)); + effect(KILL cr); + predicate(VM_Version::has_LoadStoreConditional()); + ins_cost(3 * DEFAULT_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MaxI $dst $src1,$src2\t MaxI (z196 only)" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc1 = $src1$$Register; + Register Rsrc2 = $src2$$Register; + + if (Rsrc1 == Rsrc2) { + if (Rdst != Rsrc1) { + __ z_lgfr(Rdst, Rsrc1); + } + } else if (Rdst == Rsrc1) { // Rdst preset with src1. + __ z_cr(Rsrc1, Rsrc2); // Move src2 only if src1 is NotHigh. + __ z_locr(Rdst, Rsrc2, Assembler::bcondNotHigh); + } else if (Rdst == Rsrc2) { // Rdst preset with src2. + __ z_cr(Rsrc2, Rsrc1); // Move src1 only if src2 is NotHigh. + __ z_locr(Rdst, Rsrc1, Assembler::bcondNotHigh); + } else { // Rdst is disjoint from operands, move in either case. + __ z_cr(Rsrc1, Rsrc2); + __ z_locr(Rdst, Rsrc2, Assembler::bcondNotHigh); + __ z_locr(Rdst, Rsrc1, Assembler::bcondHigh); + } + %} + ins_pipe(pipe_class_dummy); +%} + +// Max Register with Register +instruct z10_maxI_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{ + match(Set dst (MaxI src1 src2)); + effect(KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(2 * DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MaxI $dst $src1,$src2\t MaxI (z10 only)" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc1 = $src1$$Register; + Register Rsrc2 = $src2$$Register; + Label done; + + if (Rsrc1 == Rsrc2) { + if (Rdst != Rsrc1) { + __ z_lgfr(Rdst, Rsrc1); + } + } else if (Rdst == Rsrc1) { + __ z_crj(Rsrc1, Rsrc2, Assembler::bcondHigh, done); + __ z_lgfr(Rdst, Rsrc2); + } else if (Rdst == Rsrc2) { + __ z_crj(Rsrc2, Rsrc1, Assembler::bcondHigh, done); + __ z_lgfr(Rdst, Rsrc1); + } else { + __ z_lgfr(Rdst, Rsrc1); + __ z_crj(Rsrc1, Rsrc2, Assembler::bcondHigh, done); + __ z_lgfr(Rdst, Rsrc2); + } + __ bind(done); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct maxI_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{ + match(Set dst (MaxI src1 src2)); + effect(KILL cr); + predicate(!VM_Version::has_CompareBranch()); + ins_cost(3 * DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MaxI $dst $src1,$src2\t MaxI" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc1 = $src1$$Register; + Register Rsrc2 = $src2$$Register; + Label done; + + if (Rsrc1 == Rsrc2) { + if (Rdst != Rsrc1) { + __ z_lgfr(Rdst, Rsrc1); + } + } else if (Rdst == Rsrc1) { + __ z_cr(Rsrc1, Rsrc2); + __ z_brh(done); + __ z_lgfr(Rdst, Rsrc2); + } else if (Rdst == Rsrc2) { + __ z_cr(Rsrc2, Rsrc1); + __ z_brh(done); + __ z_lgfr(Rdst, Rsrc1); + } else { + __ z_lgfr(Rdst, Rsrc1); + __ z_cr(Rsrc1, Rsrc2); + __ z_brh(done); + __ z_lgfr(Rdst, Rsrc2); + } + + __ bind(done); + %} + + ins_pipe(pipe_class_dummy); +%} + +instruct z196_maxI_reg_imm32(iRegI dst, iRegI src1, immI src2, flagsReg cr) %{ + match(Set dst (MaxI src1 src2)); + effect(KILL cr); + predicate(VM_Version::has_LoadStoreConditional()); + ins_cost(3 * DEFAULT_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MaxI $dst $src1,$src2\t MaxI const32 (z196 only)" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc1 = $src1$$Register; + int Isrc2 = $src2$$constant; + + if (Rdst == Rsrc1) { + __ load_const_optimized(Z_R0_scratch, Isrc2); + __ z_cfi(Rsrc1, Isrc2); + __ z_locr(Rdst, Z_R0_scratch, Assembler::bcondNotHigh); + } else { + __ load_const_optimized(Rdst, Isrc2); + __ z_cfi(Rsrc1, Isrc2); + __ z_locr(Rdst, Rsrc1, Assembler::bcondHigh); + } + %} + ins_pipe(pipe_class_dummy); +%} + +instruct maxI_reg_imm32(iRegI dst, iRegI src1, immI src2, flagsReg cr) %{ + match(Set dst (MaxI src1 src2)); + effect(KILL cr); + ins_cost(2 * DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MaxI $dst $src1,$src2\t MaxI const32" %} + ins_encode %{ + Label done; + if ($dst$$Register != $src1$$Register) { + __ z_lgfr($dst$$Register, $src1$$Register); + } + __ z_cfi($src1$$Register, $src2$$constant); + __ z_brh(done); + __ z_lgfi($dst$$Register, $src2$$constant); + __ bind(done); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct z196_maxI_reg_imm16(iRegI dst, iRegI src1, immI16 src2, flagsReg cr) %{ + match(Set dst (MaxI src1 src2)); + effect(KILL cr); + predicate(VM_Version::has_LoadStoreConditional()); + ins_cost(3 * DEFAULT_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MaxI $dst $src1,$src2\t MaxI const16 (z196 only)" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc1 = $src1$$Register; + int Isrc2 = $src2$$constant; + if (Rdst == Rsrc1) { + __ load_const_optimized(Z_R0_scratch, Isrc2); + __ z_chi(Rsrc1, Isrc2); + __ z_locr(Rdst, Z_R0_scratch, Assembler::bcondNotHigh); + } else { + __ load_const_optimized(Rdst, Isrc2); + __ z_chi(Rsrc1, Isrc2); + __ z_locr(Rdst, Rsrc1, Assembler::bcondHigh); + } + %} + ins_pipe(pipe_class_dummy); +%} + +instruct maxI_reg_imm16(iRegI dst, iRegI src1, immI16 src2, flagsReg cr) %{ + match(Set dst (MaxI src1 src2)); + effect(KILL cr); + ins_cost(2 * DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MaxI $dst $src1,$src2\t MaxI const16" %} + ins_encode %{ + Label done; + if ($dst$$Register != $src1$$Register) { + __ z_lgfr($dst$$Register, $src1$$Register); + } + __ z_chi($src1$$Register, $src2$$constant); + __ z_brh(done); + __ z_lghi($dst$$Register, $src2$$constant); + __ bind(done); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct z10_maxI_reg_imm8(iRegI dst, iRegI src1, immI8 src2, flagsReg cr) %{ + match(Set dst (MaxI src1 src2)); + effect(KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(DEFAULT_COST + BRANCH_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "MaxI $dst $src1,$src2\t MaxI const8" %} + ins_encode %{ + Label done; + if ($dst$$Register != $src1$$Register) { + __ z_lgfr($dst$$Register, $src1$$Register); + } + __ z_cij($src1$$Register, $src2$$constant, Assembler::bcondHigh, done); + __ z_lghi($dst$$Register, $src2$$constant); + __ bind(done); + %} + ins_pipe(pipe_class_dummy); +%} + +//----------Abs--------------------------------------------------------------- + +instruct absI_reg(iRegI dst, iRegI src, flagsReg cr) %{ + match(Set dst (AbsI src)); + effect(KILL cr); + ins_cost(DEFAULT_COST_LOW); + // TODO: s390 port size(FIXED_SIZE); + format %{ "LPR $dst, $src" %} + opcode(LPR_ZOPC); + ins_encode(z_rrform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct negabsI_reg(iRegI dst, iRegI src, immI_0 zero, flagsReg cr) %{ + match(Set dst (SubI zero (AbsI src))); + effect(KILL cr); + ins_cost(DEFAULT_COST_LOW); + // TODO: s390 port size(FIXED_SIZE); + format %{ "LNR $dst, $src" %} + opcode(LNR_ZOPC); + ins_encode(z_rrform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +//----------Float Compares---------------------------------------------------- + +// Compare floating, generate condition code. +instruct cmpF_cc(flagsReg cr, regF src1, regF src2) %{ + match(Set cr (CmpF src1 src2)); + ins_cost(ALU_REG_COST); + size(4); + format %{ "FCMPcc $src1,$src2\t # float" %} + ins_encode %{ __ z_cebr($src1$$FloatRegister, $src2$$FloatRegister); %} + ins_pipe(pipe_class_dummy); +%} + +instruct cmpD_cc(flagsReg cr, regD src1, regD src2) %{ + match(Set cr (CmpD src1 src2)); + ins_cost(ALU_REG_COST); + size(4); + format %{ "FCMPcc $src1,$src2 \t # double" %} + ins_encode %{ __ z_cdbr($src1$$FloatRegister, $src2$$FloatRegister); %} + ins_pipe(pipe_class_dummy); +%} + +instruct cmpF_cc_mem(flagsReg cr, regF src1, memoryRX src2) %{ + match(Set cr (CmpF src1 (LoadF src2))); + ins_cost(ALU_MEMORY_COST); + size(6); + format %{ "FCMPcc_mem $src1,$src2\t # floatMemory" %} + opcode(CEB_ZOPC); + ins_encode(z_form_rt_memFP(src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +instruct cmpD_cc_mem(flagsReg cr, regD src1, memoryRX src2) %{ + match(Set cr (CmpD src1 (LoadD src2))); + ins_cost(ALU_MEMORY_COST); + size(6); + format %{ "DCMPcc_mem $src1,$src2\t # doubleMemory" %} + opcode(CDB_ZOPC); + ins_encode(z_form_rt_memFP(src1, src2)); + ins_pipe(pipe_class_dummy); +%} + +// Compare floating, generate condition code +instruct cmpF0_cc(flagsReg cr, regF src1, immFpm0 src2) %{ + match(Set cr (CmpF src1 src2)); + ins_cost(DEFAULT_COST); + size(4); + format %{ "LTEBR $src1,$src1\t # float" %} + opcode(LTEBR_ZOPC); + ins_encode(z_rreform(src1, src1)); + ins_pipe(pipe_class_dummy); +%} + +instruct cmpD0_cc(flagsReg cr, regD src1, immDpm0 src2) %{ + match(Set cr (CmpD src1 src2)); + ins_cost(DEFAULT_COST); + size(4); + format %{ "LTDBR $src1,$src1 \t # double" %} + opcode(LTDBR_ZOPC); + ins_encode(z_rreform(src1, src1)); + ins_pipe(pipe_class_dummy); +%} + +// Compare floating, generate -1,0,1 +instruct cmpF_reg(iRegI dst, regF src1, regF src2, flagsReg cr) %{ + match(Set dst (CmpF3 src1 src2)); + effect(KILL cr); + ins_cost(DEFAULT_COST * 5 + BRANCH_COST); + size(24); + format %{ "CmpF3 $dst,$src1,$src2" %} + ins_encode %{ + // compare registers + __ z_cebr($src1$$FloatRegister, $src2$$FloatRegister); + // Convert condition code into -1,0,1, where + // -1 means unordered or less + // 0 means equal + // 1 means greater. + if (VM_Version::has_LoadStoreConditional()) { + Register one = Z_R0_scratch; + Register minus_one = Z_R1_scratch; + __ z_lghi(minus_one, -1); + __ z_lghi(one, 1); + __ z_lghi( $dst$$Register, 0); + __ z_locgr($dst$$Register, one, Assembler::bcondHigh); + __ z_locgr($dst$$Register, minus_one, Assembler::bcondLowOrNotOrdered); + } else { + Label done; + __ clear_reg($dst$$Register, true, false); + __ z_bre(done); + __ z_lhi($dst$$Register, 1); + __ z_brh(done); + __ z_lhi($dst$$Register, -1); + __ bind(done); + } + %} + ins_pipe(pipe_class_dummy); +%} + +instruct cmpD_reg(iRegI dst, regD src1, regD src2, flagsReg cr) %{ + match(Set dst (CmpD3 src1 src2)); + effect(KILL cr); + ins_cost(DEFAULT_COST * 5 + BRANCH_COST); + size(24); + format %{ "CmpD3 $dst,$src1,$src2" %} + ins_encode %{ + // compare registers + __ z_cdbr($src1$$FloatRegister, $src2$$FloatRegister); + // Convert condition code into -1,0,1, where + // -1 means unordered or less + // 0 means equal + // 1 means greater. + if (VM_Version::has_LoadStoreConditional()) { + Register one = Z_R0_scratch; + Register minus_one = Z_R1_scratch; + __ z_lghi(minus_one, -1); + __ z_lghi(one, 1); + __ z_lghi( $dst$$Register, 0); + __ z_locgr($dst$$Register, one, Assembler::bcondHigh); + __ z_locgr($dst$$Register, minus_one, Assembler::bcondLowOrNotOrdered); + } else { + Label done; + // indicate unused result + (void) __ clear_reg($dst$$Register, true, false); + __ z_bre(done); + __ z_lhi($dst$$Register, 1); + __ z_brh(done); + __ z_lhi($dst$$Register, -1); + __ bind(done); + } + %} + ins_pipe(pipe_class_dummy); +%} + +//----------Branches--------------------------------------------------------- +// Jump + +// Direct Branch. +instruct branch(label labl) %{ + match(Goto); + effect(USE labl); + ins_cost(BRANCH_COST); + size(4); + format %{ "BRU $labl" %} + ins_encode(z_enc_bru(labl)); + ins_pipe(pipe_class_dummy); + // If set to 1 this indicates that the current instruction is a + // short variant of a long branch. This avoids using this + // instruction in first-pass matching. It will then only be used in + // the `Shorten_branches' pass. + ins_short_branch(1); +%} + +// Direct Branch. +instruct branchFar(label labl) %{ + match(Goto); + effect(USE labl); + ins_cost(BRANCH_COST); + size(6); + format %{ "BRUL $labl" %} + ins_encode(z_enc_brul(labl)); + ins_pipe(pipe_class_dummy); + // This is not a short variant of a branch, but the long variant. + ins_short_branch(0); +%} + +// Conditional Near Branch +instruct branchCon(cmpOp cmp, flagsReg cr, label lbl) %{ + // Same match rule as `branchConFar'. + match(If cmp cr); + effect(USE lbl); + ins_cost(BRANCH_COST); + size(4); + format %{ "branch_con_short,$cmp $cr, $lbl" %} + ins_encode(z_enc_branch_con_short(cmp, lbl)); + ins_pipe(pipe_class_dummy); + // If set to 1 this indicates that the current instruction is a + // short variant of a long branch. This avoids using this + // instruction in first-pass matching. It will then only be used in + // the `Shorten_branches' pass. + ins_short_branch(1); +%} + +// This is for cases when the z/Architecture conditional branch instruction +// does not reach far enough. So we emit a far branch here, which is +// more expensive. +// +// Conditional Far Branch +instruct branchConFar(cmpOp cmp, flagsReg cr, label lbl) %{ + // Same match rule as `branchCon'. + match(If cmp cr); + effect(USE cr, USE lbl); + // Make more expensive to prefer compare_and_branch over separate instructions. + ins_cost(2 * BRANCH_COST); + size(6); + format %{ "branch_con_far,$cmp $cr, $lbl" %} + ins_encode(z_enc_branch_con_far(cmp, lbl)); + ins_pipe(pipe_class_dummy); + // This is not a short variant of a branch, but the long variant.. + ins_short_branch(0); +%} + +instruct branchLoopEnd(cmpOp cmp, flagsReg cr, label labl) %{ + match(CountedLoopEnd cmp cr); + effect(USE labl); + ins_cost(BRANCH_COST); + size(4); + format %{ "branch_con_short,$cmp $labl\t # counted loop end" %} + ins_encode(z_enc_branch_con_short(cmp, labl)); + ins_pipe(pipe_class_dummy); + // If set to 1 this indicates that the current instruction is a + // short variant of a long branch. This avoids using this + // instruction in first-pass matching. It will then only be used in + // the `Shorten_branches' pass. + ins_short_branch(1); +%} + +instruct branchLoopEndFar(cmpOp cmp, flagsReg cr, label labl) %{ + match(CountedLoopEnd cmp cr); + effect(USE labl); + ins_cost(BRANCH_COST); + size(6); + format %{ "branch_con_far,$cmp $labl\t # counted loop end" %} + ins_encode(z_enc_branch_con_far(cmp, labl)); + ins_pipe(pipe_class_dummy); + // This is not a short variant of a branch, but the long variant. + ins_short_branch(0); +%} + +//----------Compare and Branch (short distance)------------------------------ + +// INT REG operands for loop counter processing. +instruct testAndBranchLoopEnd_Reg(cmpOpT boolnode, iRegI src1, iRegI src2, label labl, flagsReg cr) %{ + match(CountedLoopEnd boolnode (CmpI src1 src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "test_and_branch_loop_end,$boolnode $src1,$src2,$labl\t # counted loop end SHORT" %} + opcode(CRJ_ZOPC); + ins_encode(z_enc_cmpb_regreg(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(1); +%} + +// INT REG operands. +instruct cmpb_RegI(cmpOpT boolnode, iRegI src1, iRegI src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpI src1 src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CRJ,$boolnode $src1,$src2,$labl\t # SHORT" %} + opcode(CRJ_ZOPC); + ins_encode(z_enc_cmpb_regreg(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(1); +%} + +// Unsigned INT REG operands +instruct cmpbU_RegI(cmpOpT boolnode, iRegI src1, iRegI src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpU src1 src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CLRJ,$boolnode $src1,$src2,$labl\t # SHORT" %} + opcode(CLRJ_ZOPC); + ins_encode(z_enc_cmpb_regreg(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(1); +%} + +// LONG REG operands +instruct cmpb_RegL(cmpOpT boolnode, iRegL src1, iRegL src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpL src1 src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CGRJ,$boolnode $src1,$src2,$labl\t # SHORT" %} + opcode(CGRJ_ZOPC); + ins_encode(z_enc_cmpb_regreg(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(1); +%} + +// PTR REG operands + +// Separate rules for regular and narrow oops. ADLC can't recognize +// rules with polymorphic operands to be sisters -> shorten_branches +// will not shorten. + +instruct cmpb_RegPP(cmpOpT boolnode, iRegP src1, iRegP src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpP src1 src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CLGRJ,$boolnode $src1,$src2,$labl\t # SHORT" %} + opcode(CLGRJ_ZOPC); + ins_encode(z_enc_cmpb_regreg(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(1); +%} + +instruct cmpb_RegNN(cmpOpT boolnode, iRegN src1, iRegN src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpP (DecodeN src1) (DecodeN src2))); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CLGRJ,$boolnode $src1,$src2,$labl\t # SHORT" %} + opcode(CLGRJ_ZOPC); + ins_encode(z_enc_cmpb_regreg(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(1); +%} + +// INT REG/IMM operands for loop counter processing +instruct testAndBranchLoopEnd_Imm(cmpOpT boolnode, iRegI src1, immI8 src2, label labl, flagsReg cr) %{ + match(CountedLoopEnd boolnode (CmpI src1 src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "test_and_branch_loop_end,$boolnode $src1,$src2,$labl\t # counted loop end SHORT" %} + opcode(CIJ_ZOPC); + ins_encode(z_enc_cmpb_regimm(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(1); +%} + +// INT REG/IMM operands +instruct cmpb_RegI_imm(cmpOpT boolnode, iRegI src1, immI8 src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpI src1 src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CIJ,$boolnode $src1,$src2,$labl\t # SHORT" %} + opcode(CIJ_ZOPC); + ins_encode(z_enc_cmpb_regimm(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(1); +%} + +// INT REG/IMM operands +instruct cmpbU_RegI_imm(cmpOpT boolnode, iRegI src1, uimmI8 src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpU src1 src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CLIJ,$boolnode $src1,$src2,$labl\t # SHORT" %} + opcode(CLIJ_ZOPC); + ins_encode(z_enc_cmpb_regimm(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(1); +%} + +// LONG REG/IMM operands +instruct cmpb_RegL_imm(cmpOpT boolnode, iRegL src1, immL8 src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpL src1 src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CGIJ,$boolnode $src1,$src2,$labl\t # SHORT" %} + opcode(CGIJ_ZOPC); + ins_encode(z_enc_cmpb_regimm(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(1); +%} + +// PTR REG-imm operands + +// Separate rules for regular and narrow oops. ADLC can't recognize +// rules with polymorphic operands to be sisters -> shorten_branches +// will not shorten. + +instruct cmpb_RegP_immP(cmpOpT boolnode, iRegP src1, immP8 src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpP src1 src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CLGIJ,$boolnode $src1,$src2,$labl\t # SHORT" %} + opcode(CLGIJ_ZOPC); + ins_encode(z_enc_cmpb_regimm(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(1); +%} + +// Compare against zero only, do not mix N and P oops (encode/decode required). +instruct cmpb_RegN_immP0(cmpOpT boolnode, iRegN src1, immP0 src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpP (DecodeN src1) src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CLGIJ,$boolnode $src1,$src2,$labl\t # SHORT" %} + opcode(CLGIJ_ZOPC); + ins_encode(z_enc_cmpb_regimm(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(1); +%} + +instruct cmpb_RegN_imm(cmpOpT boolnode, iRegN src1, immN8 src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpP (DecodeN src1) (DecodeN src2))); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CLGIJ,$boolnode $src1,$src2,$labl\t # SHORT" %} + opcode(CLGIJ_ZOPC); + ins_encode(z_enc_cmpb_regimm(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(1); +%} + + +//----------Compare and Branch (far distance)------------------------------ + +// INT REG operands for loop counter processing +instruct testAndBranchLoopEnd_RegFar(cmpOpT boolnode, iRegI src1, iRegI src2, label labl, flagsReg cr) %{ + match(CountedLoopEnd boolnode (CmpI src1 src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST+DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "test_and_branch_loop_end,$boolnode $src1,$src2,$labl\t # counted loop end FAR" %} + opcode(CR_ZOPC, BRCL_ZOPC); + ins_encode(z_enc_cmpb_regregFar(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(0); +%} + +// INT REG operands +instruct cmpb_RegI_Far(cmpOpT boolnode, iRegI src1, iRegI src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpI src1 src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST+DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CRJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %} + opcode(CR_ZOPC, BRCL_ZOPC); + ins_encode(z_enc_cmpb_regregFar(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(0); +%} + +// INT REG operands +instruct cmpbU_RegI_Far(cmpOpT boolnode, iRegI src1, iRegI src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpU src1 src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST+DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CLRJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %} + opcode(CLR_ZOPC, BRCL_ZOPC); + ins_encode(z_enc_cmpb_regregFar(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(0); +%} + +// LONG REG operands +instruct cmpb_RegL_Far(cmpOpT boolnode, iRegL src1, iRegL src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpL src1 src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST+DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CGRJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %} + opcode(CGR_ZOPC, BRCL_ZOPC); + ins_encode(z_enc_cmpb_regregFar(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(0); +%} + +// PTR REG operands + +// Separate rules for regular and narrow oops. ADLC can't recognize +// rules with polymorphic operands to be sisters -> shorten_branches +// will not shorten. + +instruct cmpb_RegPP_Far(cmpOpT boolnode, iRegP src1, iRegP src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpP src1 src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST+DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CLGRJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %} + opcode(CLGR_ZOPC, BRCL_ZOPC); + ins_encode(z_enc_cmpb_regregFar(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(0); +%} + +instruct cmpb_RegNN_Far(cmpOpT boolnode, iRegN src1, iRegN src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpP (DecodeN src1) (DecodeN src2))); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST+DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CLGRJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %} + opcode(CLGR_ZOPC, BRCL_ZOPC); + ins_encode(z_enc_cmpb_regregFar(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(0); +%} + +// INT REG/IMM operands for loop counter processing +instruct testAndBranchLoopEnd_ImmFar(cmpOpT boolnode, iRegI src1, immI8 src2, label labl, flagsReg cr) %{ + match(CountedLoopEnd boolnode (CmpI src1 src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST+DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "test_and_branch_loop_end,$boolnode $src1,$src2,$labl\t # counted loop end FAR" %} + opcode(CHI_ZOPC, BRCL_ZOPC); + ins_encode(z_enc_cmpb_regimmFar(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(0); +%} + +// INT REG/IMM operands +instruct cmpb_RegI_imm_Far(cmpOpT boolnode, iRegI src1, immI8 src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpI src1 src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST+DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CIJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %} + opcode(CHI_ZOPC, BRCL_ZOPC); + ins_encode(z_enc_cmpb_regimmFar(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(0); +%} + +// INT REG/IMM operands +instruct cmpbU_RegI_imm_Far(cmpOpT boolnode, iRegI src1, uimmI8 src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpU src1 src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST+DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CLIJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %} + opcode(CLFI_ZOPC, BRCL_ZOPC); + ins_encode(z_enc_cmpb_regimmFar(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(0); +%} + +// LONG REG/IMM operands +instruct cmpb_RegL_imm_Far(cmpOpT boolnode, iRegL src1, immL8 src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpL src1 src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST+DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CGIJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %} + opcode(CGHI_ZOPC, BRCL_ZOPC); + ins_encode(z_enc_cmpb_regimmFar(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(0); +%} + +// PTR REG-imm operands + +// Separate rules for regular and narrow oops. ADLC can't recognize +// rules with polymorphic operands to be sisters -> shorten_branches +// will not shorten. + +instruct cmpb_RegP_immP_Far(cmpOpT boolnode, iRegP src1, immP8 src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpP src1 src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST+DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CLGIJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %} + opcode(CLGFI_ZOPC, BRCL_ZOPC); + ins_encode(z_enc_cmpb_regimmFar(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(0); +%} + +// Compare against zero only, do not mix N and P oops (encode/decode required). +instruct cmpb_RegN_immP0_Far(cmpOpT boolnode, iRegN src1, immP0 src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpP (DecodeN src1) src2)); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST+DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CLGIJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %} + opcode(CLGFI_ZOPC, BRCL_ZOPC); + ins_encode(z_enc_cmpb_regimmFar(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(0); +%} + +instruct cmpb_RegN_immN_Far(cmpOpT boolnode, iRegN src1, immN8 src2, label labl, flagsReg cr) %{ + match(If boolnode (CmpP (DecodeN src1) (DecodeN src2))); + effect(USE labl, KILL cr); + predicate(VM_Version::has_CompareBranch()); + ins_cost(BRANCH_COST+DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CLGIJ,$boolnode $src1,$src2,$labl\t # FAR(substituted)" %} + opcode(CLGFI_ZOPC, BRCL_ZOPC); + ins_encode(z_enc_cmpb_regimmFar(src1, src2, labl, boolnode)); + ins_pipe(pipe_class_dummy); + ins_short_branch(0); +%} + +// ============================================================================ +// Long Compare + +// Due to a shortcoming in the ADLC, it mixes up expressions like: +// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the +// difference between 'Y' and '0L'. The tree-matches for the CmpI sections +// are collapsed internally in the ADLC's dfa-gen code. The match for +// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the +// foo match ends up with the wrong leaf. One fix is to not match both +// reg-reg and reg-zero forms of long-compare. This is unfortunate because +// both forms beat the trinary form of long-compare and both are very useful +// on platforms which have few registers. + +// Manifest a CmpL3 result in an integer register. Very painful. +// This is the test to avoid. +instruct cmpL3_reg_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg cr) %{ + match(Set dst (CmpL3 src1 src2)); + effect(KILL cr); + ins_cost(DEFAULT_COST * 5 + BRANCH_COST); + size(24); + format %{ "CmpL3 $dst,$src1,$src2" %} + ins_encode %{ + Label done; + // compare registers + __ z_cgr($src1$$Register, $src2$$Register); + // Convert condition code into -1,0,1, where + // -1 means less + // 0 means equal + // 1 means greater. + if (VM_Version::has_LoadStoreConditional()) { + Register one = Z_R0_scratch; + Register minus_one = Z_R1_scratch; + __ z_lghi(minus_one, -1); + __ z_lghi(one, 1); + __ z_lghi( $dst$$Register, 0); + __ z_locgr($dst$$Register, one, Assembler::bcondHigh); + __ z_locgr($dst$$Register, minus_one, Assembler::bcondLow); + } else { + __ clear_reg($dst$$Register, true, false); + __ z_bre(done); + __ z_lhi($dst$$Register, 1); + __ z_brh(done); + __ z_lhi($dst$$Register, -1); + } + __ bind(done); + %} + ins_pipe(pipe_class_dummy); +%} + +// ============================================================================ +// Safepoint Instruction + +instruct safePoint() %{ + match(SafePoint); + predicate(false); + // TODO: s390 port size(FIXED_SIZE); + format %{ "UNIMPLEMENTED Safepoint_ " %} + ins_encode(enc_unimplemented()); + ins_pipe(pipe_class_dummy); +%} + +instruct safePoint_poll(iRegP poll, flagsReg cr) %{ + match(SafePoint poll); + effect(USE poll, KILL cr); // R0 is killed, too. + // TODO: s390 port size(FIXED_SIZE); + format %{ "TM #0[,$poll],#111\t # Safepoint: poll for GC" %} + ins_encode %{ + // Mark the code position where the load from the safepoint + // polling page was emitted as relocInfo::poll_type. + __ relocate(relocInfo::poll_type); + __ load_from_polling_page($poll$$Register); + %} + ins_pipe(pipe_class_dummy); +%} + +// ============================================================================ + +// Call Instructions + +// Call Java Static Instruction +instruct CallStaticJavaDirect_dynTOC(method meth) %{ + match(CallStaticJava); + effect(USE meth); + ins_cost(CALL_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "CALL,static dynTOC $meth; ==> " %} + ins_encode( z_enc_java_static_call(meth) ); + ins_pipe(pipe_class_dummy); + ins_alignment(2); +%} + +// Call Java Dynamic Instruction +instruct CallDynamicJavaDirect_dynTOC(method meth) %{ + match(CallDynamicJava); + effect(USE meth); + ins_cost(CALL_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "CALL,dynamic dynTOC $meth; ==> " %} + ins_encode(z_enc_java_dynamic_call(meth)); + ins_pipe(pipe_class_dummy); + ins_alignment(2); +%} + +// Call Runtime Instruction +instruct CallRuntimeDirect(method meth) %{ + match(CallRuntime); + effect(USE meth); + ins_cost(CALL_COST); + // TODO: s390 port size(VARIABLE_SIZE); + ins_num_consts(1); + ins_alignment(2); + format %{ "CALL,runtime" %} + ins_encode( z_enc_java_to_runtime_call(meth) ); + ins_pipe(pipe_class_dummy); +%} + +// Call runtime without safepoint - same as CallRuntime +instruct CallLeafDirect(method meth) %{ + match(CallLeaf); + effect(USE meth); + ins_cost(CALL_COST); + // TODO: s390 port size(VARIABLE_SIZE); + ins_num_consts(1); + ins_alignment(2); + format %{ "CALL,runtime leaf $meth" %} + ins_encode( z_enc_java_to_runtime_call(meth) ); + ins_pipe(pipe_class_dummy); +%} + +// Call runtime without safepoint - same as CallLeaf +instruct CallLeafNoFPDirect(method meth) %{ + match(CallLeafNoFP); + effect(USE meth); + ins_cost(CALL_COST); + // TODO: s390 port size(VARIABLE_SIZE); + ins_num_consts(1); + format %{ "CALL,runtime leaf nofp $meth" %} + ins_encode( z_enc_java_to_runtime_call(meth) ); + ins_pipe(pipe_class_dummy); + ins_alignment(2); +%} + +// Tail Call; Jump from runtime stub to Java code. +// Also known as an 'interprocedural jump'. +// Target of jump will eventually return to caller. +// TailJump below removes the return address. +instruct TailCalljmpInd(iRegP jump_target, inline_cache_regP method_oop) %{ + match(TailCall jump_target method_oop); + ins_cost(CALL_COST); + size(2); + format %{ "Jmp $jump_target\t# $method_oop holds method oop" %} + ins_encode %{ __ z_br($jump_target$$Register); %} + ins_pipe(pipe_class_dummy); +%} + +// Return Instruction +instruct Ret() %{ + match(Return); + size(2); + format %{ "BR(Z_R14) // branch to link register" %} + ins_encode %{ __ z_br(Z_R14); %} + ins_pipe(pipe_class_dummy); +%} + +// Tail Jump; remove the return address; jump to target. +// TailCall above leaves the return address around. +// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2). +// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a +// "restore" before this instruction (in Epilogue), we need to materialize it +// in %i0. +instruct tailjmpInd(iRegP jump_target, rarg1RegP ex_oop) %{ + match(TailJump jump_target ex_oop); + ins_cost(CALL_COST); + size(8); + format %{ "TailJump $jump_target" %} + ins_encode %{ + __ z_lg(Z_ARG2/* issuing pc */, _z_abi(return_pc), Z_SP); + __ z_br($jump_target$$Register); + %} + ins_pipe(pipe_class_dummy); +%} + +// Create exception oop: created by stack-crawling runtime code. +// Created exception is now available to this handler, and is setup +// just prior to jumping to this handler. No code emitted. +instruct CreateException(rarg1RegP ex_oop) %{ + match(Set ex_oop (CreateEx)); + ins_cost(0); + size(0); + format %{ "# exception oop; no code emitted" %} + ins_encode(/*empty*/); + ins_pipe(pipe_class_dummy); +%} + +// Rethrow exception: The exception oop will come in the first +// argument position. Then JUMP (not call) to the rethrow stub code. +instruct RethrowException() %{ + match(Rethrow); + ins_cost(CALL_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "Jmp rethrow_stub" %} + ins_encode %{ + cbuf.set_insts_mark(); + __ load_const_optimized(Z_R1_scratch, (address)OptoRuntime::rethrow_stub()); + __ z_br(Z_R1_scratch); + %} + ins_pipe(pipe_class_dummy); +%} + +// Die now. +instruct ShouldNotReachHere() %{ + match(Halt); + ins_cost(CALL_COST); + size(2); + format %{ "ILLTRAP; ShouldNotReachHere" %} + ins_encode %{ __ z_illtrap(); %} + ins_pipe(pipe_class_dummy); +%} + +// ============================================================================ +// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass +// array for an instance of the superklass. Set a hidden internal cache on a +// hit (cache is checked with exposed code in gen_subtype_check()). Return +// not zero for a miss or zero for a hit. The encoding ALSO sets flags. +instruct partialSubtypeCheck(rarg1RegP index, rarg2RegP sub, rarg3RegP super, flagsReg pcc, + rarg4RegP scratch1, rarg5RegP scratch2) %{ + match(Set index (PartialSubtypeCheck sub super)); + effect(KILL pcc, KILL scratch1, KILL scratch2); + ins_cost(10 * DEFAULT_COST); + size(12); + format %{ " CALL PartialSubtypeCheck\n" %} + ins_encode %{ + AddressLiteral stub_address(StubRoutines::zarch::partial_subtype_check()); + __ load_const_optimized(Z_ARG4, stub_address); + __ z_basr(Z_R14, Z_ARG4); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct partialSubtypeCheck_vs_zero(flagsReg pcc, rarg2RegP sub, rarg3RegP super, immP0 zero, + rarg1RegP index, rarg4RegP scratch1, rarg5RegP scratch2) %{ + match(Set pcc (CmpI (PartialSubtypeCheck sub super) zero)); + effect(KILL scratch1, KILL scratch2, KILL index); + ins_cost(10 * DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "CALL PartialSubtypeCheck_vs_zero\n" %} + ins_encode %{ + AddressLiteral stub_address(StubRoutines::zarch::partial_subtype_check()); + __ load_const_optimized(Z_ARG4, stub_address); + __ z_basr(Z_R14, Z_ARG4); + %} + ins_pipe(pipe_class_dummy); +%} + +// ============================================================================ +// inlined locking and unlocking + +instruct cmpFastLock(flagsReg pcc, iRegP_N2P oop, iRegP_N2P box, iRegP tmp1, iRegP tmp2) %{ + match(Set pcc (FastLock oop box)); + effect(TEMP tmp1, TEMP tmp2); + ins_cost(100); + // TODO: s390 port size(VARIABLE_SIZE); // Uses load_const_optimized. + format %{ "FASTLOCK $oop, $box; KILL Z_ARG4, Z_ARG5" %} + ins_encode %{ __ compiler_fast_lock_object($oop$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register, + UseBiasedLocking && !UseOptoBiasInlining); %} + ins_pipe(pipe_class_dummy); +%} + +instruct cmpFastUnlock(flagsReg pcc, iRegP_N2P oop, iRegP_N2P box, iRegP tmp1, iRegP tmp2) %{ + match(Set pcc (FastUnlock oop box)); + effect(TEMP tmp1, TEMP tmp2); + ins_cost(100); + // TODO: s390 port size(FIXED_SIZE); // emitted code depends on UseBiasedLocking being on/off. + format %{ "FASTUNLOCK $oop, $box; KILL Z_ARG4, Z_ARG5" %} + ins_encode %{ __ compiler_fast_unlock_object($oop$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register, + UseBiasedLocking && !UseOptoBiasInlining); %} + ins_pipe(pipe_class_dummy); +%} + +instruct inlineCallClearArrayConst(SSlenDW cnt, iRegP_N2P base, Universe dummy, flagsReg cr) %{ + match(Set dummy (ClearArray cnt base)); + effect(KILL cr); + ins_cost(100); + // TODO: s390 port size(VARIABLE_SIZE); // Variable in size due to varying #instructions. + format %{ "ClearArrayConst $cnt,$base" %} + ins_encode %{ __ Clear_Array_Const($cnt$$constant, $base$$Register); %} + ins_pipe(pipe_class_dummy); +%} + +instruct inlineCallClearArrayConstBig(immL cnt, iRegP_N2P base, Universe dummy, revenRegL srcA, roddRegL srcL, flagsReg cr) %{ + match(Set dummy (ClearArray cnt base)); + effect(TEMP srcA, TEMP srcL, KILL cr); // R0, R1 are killed, too. + ins_cost(200); + // TODO: s390 port size(VARIABLE_SIZE); // Variable in size due to optimized constant loader. + format %{ "ClearArrayConstBig $cnt,$base" %} + ins_encode %{ __ Clear_Array_Const_Big($cnt$$constant, $base$$Register, $srcA$$Register, $srcL$$Register); %} + ins_pipe(pipe_class_dummy); +%} + +instruct inlineCallClearArray(iRegL cnt, iRegP_N2P base, Universe dummy, revenRegL srcA, roddRegL srcL, flagsReg cr) %{ + match(Set dummy (ClearArray cnt base)); + effect(TEMP srcA, TEMP srcL, KILL cr); // R0, R1 are killed, too. + ins_cost(300); + // TODO: s390 port size(FIXED_SIZE); // z/Architecture: emitted code depends on PreferLAoverADD being on/off. + format %{ "ClearArrayVar $cnt,$base" %} + ins_encode %{ __ Clear_Array($cnt$$Register, $base$$Register, $srcA$$Register, $srcL$$Register); %} + ins_pipe(pipe_class_dummy); +%} + +// ============================================================================ +// CompactStrings + +// String equals +instruct string_equalsL(iRegP str1, iRegP str2, iRegI cnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{ + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too. + predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); + ins_cost(300); + format %{ "String Equals byte[] $str1,$str2,$cnt -> $result" %} + ins_encode %{ + __ array_equals(false, $str1$$Register, $str2$$Register, + $cnt$$Register, $oddReg$$Register, $evenReg$$Register, + $result$$Register, true /* byte */); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct string_equalsU(iRegP str1, iRegP str2, iRegI cnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{ + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too. + predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU || ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::none); + ins_cost(300); + format %{ "String Equals char[] $str1,$str2,$cnt -> $result" %} + ins_encode %{ + __ array_equals(false, $str1$$Register, $str2$$Register, + $cnt$$Register, $oddReg$$Register, $evenReg$$Register, + $result$$Register, false /* byte */); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct string_equals_imm(iRegP str1, iRegP str2, uimmI8 cnt, iRegI result, flagsReg cr) %{ + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(KILL cr); // R0 is killed, too. + predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL || ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); + ins_cost(100); + format %{ "String Equals byte[] $str1,$str2,$cnt -> $result" %} + ins_encode %{ + const int cnt_imm = $cnt$$constant; + if (cnt_imm) { __ z_clc(0, cnt_imm - 1, $str1$$Register, 0, $str2$$Register); } + __ z_lhi($result$$Register, 1); + if (cnt_imm) { + if (VM_Version::has_LoadStoreConditional()) { + __ z_lhi(Z_R0_scratch, 0); + __ z_locr($result$$Register, Z_R0_scratch, Assembler::bcondNotEqual); + } else { + Label Lskip; + __ z_bre(Lskip); + __ clear_reg($result$$Register); + __ bind(Lskip); + } + } + %} + ins_pipe(pipe_class_dummy); +%} + +instruct string_equalsC_imm(iRegP str1, iRegP str2, immI8 cnt, iRegI result, flagsReg cr) %{ + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(KILL cr); // R0 is killed, too. + predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::none); + ins_cost(100); + format %{ "String Equals $str1,$str2,$cnt -> $result" %} + ins_encode %{ + const int cnt_imm = $cnt$$constant; // positive immI8 (7 bits used) + if (cnt_imm) { __ z_clc(0, (cnt_imm << 1) - 1, $str1$$Register, 0, $str2$$Register); } + __ z_lhi($result$$Register, 1); + if (cnt_imm) { + if (VM_Version::has_LoadStoreConditional()) { + __ z_lhi(Z_R0_scratch, 0); + __ z_locr($result$$Register, Z_R0_scratch, Assembler::bcondNotEqual); + } else { + Label Lskip; + __ z_bre(Lskip); + __ clear_reg($result$$Register); + __ bind(Lskip); + } + } + %} + ins_pipe(pipe_class_dummy); +%} + +// Array equals +instruct array_equalsB(iRegP ary1, iRegP ary2, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{ + match(Set result (AryEq ary1 ary2)); + effect(TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too. + predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); + ins_cost(300); + format %{ "Array Equals $ary1,$ary2 -> $result" %} + ins_encode %{ + __ array_equals(true, $ary1$$Register, $ary2$$Register, + noreg, $oddReg$$Register, $evenReg$$Register, + $result$$Register, true /* byte */); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct array_equalsC(iRegP ary1, iRegP ary2, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{ + match(Set result (AryEq ary1 ary2)); + effect(TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too. + predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); + ins_cost(300); + format %{ "Array Equals $ary1,$ary2 -> $result" %} + ins_encode %{ + __ array_equals(true, $ary1$$Register, $ary2$$Register, + noreg, $oddReg$$Register, $evenReg$$Register, + $result$$Register, false /* byte */); + %} + ins_pipe(pipe_class_dummy); +%} + +// String CompareTo +instruct string_compareL(iRegP str1, iRegP str2, rarg2RegI cnt1, rarg5RegI cnt2, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{ + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(TEMP_DEF result, USE_KILL cnt1, USE_KILL cnt2, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too. + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); + ins_cost(300); + format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result" %} + ins_encode %{ + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, + $oddReg$$Register, $evenReg$$Register, + $result$$Register, StrIntrinsicNode::LL); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct string_compareU(iRegP str1, iRegP str2, rarg2RegI cnt1, rarg5RegI cnt2, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{ + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(TEMP_DEF result, USE_KILL cnt1, USE_KILL cnt2, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too. + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU || ((StrCompNode*)n)->encoding() == StrIntrinsicNode::none); + ins_cost(300); + format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result" %} + ins_encode %{ + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, + $oddReg$$Register, $evenReg$$Register, + $result$$Register, StrIntrinsicNode::UU); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct string_compareLU(iRegP str1, iRegP str2, rarg2RegI cnt1, rarg5RegI cnt2, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{ + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(TEMP_DEF result, USE_KILL cnt1, USE_KILL cnt2, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too. + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); + ins_cost(300); + format %{ "String Compare byte[],char[] $str1,$cnt1,$str2,$cnt2 -> $result" %} + ins_encode %{ + __ string_compare($str1$$Register, $str2$$Register, + $cnt1$$Register, $cnt2$$Register, + $oddReg$$Register, $evenReg$$Register, + $result$$Register, StrIntrinsicNode::LU); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct string_compareUL(iRegP str1, iRegP str2, rarg2RegI cnt1, rarg5RegI cnt2, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{ + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(TEMP_DEF result, USE_KILL cnt1, USE_KILL cnt2, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too. + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); + ins_cost(300); + format %{ "String Compare char[],byte[] $str1,$cnt1,$str2,$cnt2 -> $result" %} + ins_encode %{ + __ string_compare($str2$$Register, $str1$$Register, + $cnt2$$Register, $cnt1$$Register, + $oddReg$$Register, $evenReg$$Register, + $result$$Register, StrIntrinsicNode::UL); + %} + ins_pipe(pipe_class_dummy); +%} + +// String IndexOfChar +instruct indexOfChar_U(iRegP haystack, iRegI haycnt, iRegI ch, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{ + predicate(CompactStrings); + match(Set result (StrIndexOfChar (Binary haystack haycnt) ch)); + effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too. + ins_cost(200); + format %{ "String IndexOfChar [0..$haycnt]($haystack), $ch -> $result" %} + ins_encode %{ + __ string_indexof_char($result$$Register, + $haystack$$Register, $haycnt$$Register, + $ch$$Register, 0 /* unused, ch is in register */, + $oddReg$$Register, $evenReg$$Register, false /*is_byte*/); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct indexOf_imm1_U(iRegP haystack, iRegI haycnt, immP needle, immI_1 needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{ + match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt))); + effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too. + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU || ((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::none); + ins_cost(200); + format %{ "String IndexOf UL [0..$haycnt]($haystack), [0]($needle) -> $result" %} + ins_encode %{ + immPOper *needleOper = (immPOper *)$needle; + const TypeOopPtr *t = needleOper->type()->isa_oopptr(); + ciTypeArray* needle_values = t->const_oop()->as_type_array(); // Pointer to live char * + jchar chr; +#ifdef VM_LITTLE_ENDIAN + Unimplemented(); +#else + chr = (((jchar)(unsigned char)needle_values->element_value(0).as_byte()) << 8) | + ((jchar)(unsigned char)needle_values->element_value(1).as_byte()); +#endif + __ string_indexof_char($result$$Register, + $haystack$$Register, $haycnt$$Register, + noreg, chr, + $oddReg$$Register, $evenReg$$Register, false /*is_byte*/); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct indexOf_imm1_L(iRegP haystack, iRegI haycnt, immP needle, immI_1 needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{ + match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt))); + effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too. + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); + ins_cost(200); + format %{ "String IndexOf L [0..$haycnt]($haystack), [0]($needle) -> $result" %} + ins_encode %{ + immPOper *needleOper = (immPOper *)$needle; + const TypeOopPtr *t = needleOper->type()->isa_oopptr(); + ciTypeArray* needle_values = t->const_oop()->as_type_array(); // Pointer to live char * + jchar chr = (jchar)needle_values->element_value(0).as_byte(); + __ string_indexof_char($result$$Register, + $haystack$$Register, $haycnt$$Register, + noreg, chr, + $oddReg$$Register, $evenReg$$Register, true /*is_byte*/); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct indexOf_imm1_UL(iRegP haystack, iRegI haycnt, immP needle, immI_1 needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{ + match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt))); + effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too. + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); + ins_cost(200); + format %{ "String IndexOf UL [0..$haycnt]($haystack), [0]($needle) -> $result" %} + ins_encode %{ + immPOper *needleOper = (immPOper *)$needle; + const TypeOopPtr *t = needleOper->type()->isa_oopptr(); + ciTypeArray* needle_values = t->const_oop()->as_type_array(); // Pointer to live char * + jchar chr = (jchar)needle_values->element_value(0).as_byte(); + __ string_indexof_char($result$$Register, + $haystack$$Register, $haycnt$$Register, + noreg, chr, + $oddReg$$Register, $evenReg$$Register, false /*is_byte*/); + %} + ins_pipe(pipe_class_dummy); +%} + +// String IndexOf +instruct indexOf_imm_U(iRegP haystack, rarg2RegI haycnt, iRegP needle, immI16 needlecntImm, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{ + match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm))); + effect(TEMP_DEF result, USE_KILL haycnt, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too. + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU || ((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::none); + ins_cost(250); + format %{ "String IndexOf U [0..$needlecntImm]($needle) .in. [0..$haycnt]($haystack) -> $result" %} + ins_encode %{ + __ string_indexof($result$$Register, + $haystack$$Register, $haycnt$$Register, + $needle$$Register, noreg, $needlecntImm$$constant, + $oddReg$$Register, $evenReg$$Register, StrIntrinsicNode::UU); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct indexOf_imm_L(iRegP haystack, rarg2RegI haycnt, iRegP needle, immI16 needlecntImm, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{ + match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm))); + effect(TEMP_DEF result, USE_KILL haycnt, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too. + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); + ins_cost(250); + format %{ "String IndexOf L [0..$needlecntImm]($needle) .in. [0..$haycnt]($haystack) -> $result" %} + ins_encode %{ + __ string_indexof($result$$Register, + $haystack$$Register, $haycnt$$Register, + $needle$$Register, noreg, $needlecntImm$$constant, + $oddReg$$Register, $evenReg$$Register, StrIntrinsicNode::LL); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct indexOf_imm_UL(iRegP haystack, rarg2RegI haycnt, iRegP needle, immI16 needlecntImm, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{ + match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm))); + effect(TEMP_DEF result, USE_KILL haycnt, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too. + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); + ins_cost(250); + format %{ "String IndexOf UL [0..$needlecntImm]($needle) .in. [0..$haycnt]($haystack) -> $result" %} + ins_encode %{ + __ string_indexof($result$$Register, + $haystack$$Register, $haycnt$$Register, + $needle$$Register, noreg, $needlecntImm$$constant, + $oddReg$$Register, $evenReg$$Register, StrIntrinsicNode::UL); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct indexOf_U(iRegP haystack, rarg2RegI haycnt, iRegP needle, rarg5RegI needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{ + match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt))); + effect(TEMP_DEF result, USE_KILL haycnt, USE_KILL needlecnt, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too. + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU || ((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::none); + ins_cost(300); + format %{ "String IndexOf U [0..$needlecnt]($needle) .in. [0..$haycnt]($haystack) -> $result" %} + ins_encode %{ + __ string_indexof($result$$Register, + $haystack$$Register, $haycnt$$Register, + $needle$$Register, $needlecnt$$Register, 0, + $oddReg$$Register, $evenReg$$Register, StrIntrinsicNode::UU); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct indexOf_L(iRegP haystack, rarg2RegI haycnt, iRegP needle, rarg5RegI needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{ + match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt))); + effect(TEMP_DEF result, USE_KILL haycnt, USE_KILL needlecnt, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too. + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL); + ins_cost(300); + format %{ "String IndexOf L [0..$needlecnt]($needle) .in. [0..$haycnt]($haystack) -> $result" %} + ins_encode %{ + __ string_indexof($result$$Register, + $haystack$$Register, $haycnt$$Register, + $needle$$Register, $needlecnt$$Register, 0, + $oddReg$$Register, $evenReg$$Register, StrIntrinsicNode::LL); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct indexOf_UL(iRegP haystack, rarg2RegI haycnt, iRegP needle, rarg5RegI needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{ + match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt))); + effect(TEMP_DEF result, USE_KILL haycnt, USE_KILL needlecnt, TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too. + predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL); + ins_cost(300); + format %{ "String IndexOf UL [0..$needlecnt]($needle) .in. [0..$haycnt]($haystack) -> $result" %} + ins_encode %{ + __ string_indexof($result$$Register, + $haystack$$Register, $haycnt$$Register, + $needle$$Register, $needlecnt$$Register, 0, + $oddReg$$Register, $evenReg$$Register, StrIntrinsicNode::UL); + %} + ins_pipe(pipe_class_dummy); +%} + +// char[] to byte[] compression +instruct string_compress(iRegP src, rarg5RegP dst, iRegI result, roddRegI len, revenRegI evenReg, iRegI tmp, flagsReg cr) %{ + match(Set result (StrCompressedCopy src (Binary dst len))); + effect(TEMP_DEF result, USE_KILL dst, USE_KILL len, TEMP evenReg, TEMP tmp, KILL cr); // R0, R1 are killed, too. + ins_cost(300); + format %{ "String Compress $src->$dst($len) -> $result" %} + ins_encode %{ + __ string_compress($result$$Register, $src$$Register, $dst$$Register, $len$$Register, + $evenReg$$Register, $tmp$$Register); + %} + ins_pipe(pipe_class_dummy); +%} + +// byte[] to char[] inflation. trot implementation is shorter, but slower than the unrolled icm(h) loop. +//instruct string_inflate_trot(Universe dummy, iRegP src, revenRegP dst, roddRegI len, iRegI tmp, flagsReg cr) %{ +// match(Set dummy (StrInflatedCopy src (Binary dst len))); +// effect(USE_KILL dst, USE_KILL len, TEMP tmp, KILL cr); // R0, R1 are killed, too. +// predicate(VM_Version::has_ETF2Enhancements()); +// ins_cost(300); +// format %{ "String Inflate (trot) $dst,$src($len)" %} +// ins_encode %{ +// __ string_inflate_trot($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register); +// %} +// ins_pipe(pipe_class_dummy); +//%} + +// byte[] to char[] inflation +instruct string_inflate(Universe dummy, rarg5RegP src, iRegP dst, roddRegI len, revenRegI evenReg, iRegI tmp, flagsReg cr) %{ + match(Set dummy (StrInflatedCopy src (Binary dst len))); + effect(USE_KILL src, USE_KILL len, TEMP evenReg, TEMP tmp, KILL cr); // R0, R1 are killed, too. + ins_cost(300); + format %{ "String Inflate $src->$dst($len)" %} + ins_encode %{ + __ string_inflate($src$$Register, $dst$$Register, $len$$Register, $evenReg$$Register, $tmp$$Register); + %} + ins_pipe(pipe_class_dummy); +%} + +// StringCoding.java intrinsics +instruct has_negatives(rarg5RegP ary1, iRegI len, iRegI result, roddRegI oddReg, revenRegI evenReg, iRegI tmp, flagsReg cr) %{ + match(Set result (HasNegatives ary1 len)); + effect(TEMP_DEF result, USE_KILL ary1, TEMP oddReg, TEMP evenReg, TEMP tmp, KILL cr); // R0, R1 are killed, too. + ins_cost(300); + format %{ "has negatives byte[] $ary1($len) -> $result" %} + ins_encode %{ + __ has_negatives($result$$Register, $ary1$$Register, $len$$Register, + $oddReg$$Register, $evenReg$$Register, $tmp$$Register); + %} + ins_pipe(pipe_class_dummy); +%} + +// encode char[] to byte[] in ISO_8859_1 +instruct encode_iso_array(rarg5RegP src, iRegP dst, iRegI result, roddRegI len, revenRegI evenReg, iRegI tmp, iRegI tmp2, flagsReg cr) %{ + match(Set result (EncodeISOArray src (Binary dst len))); + effect(TEMP_DEF result, USE_KILL src, USE_KILL len, TEMP evenReg, TEMP tmp, TEMP tmp2, KILL cr); // R0, R1 are killed, too. + ins_cost(300); + format %{ "Encode array $src->$dst($len) -> $result" %} + ins_encode %{ + __ string_compress($result$$Register, $src$$Register, $dst$$Register, $len$$Register, + $evenReg$$Register, $tmp$$Register, $tmp2$$Register); + %} + ins_pipe(pipe_class_dummy); +%} + + +//----------PEEPHOLE RULES----------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. +// +// peepmatch (root_instr_name [preceeding_instruction]*); +// +// peepconstraint %{ +// (instruction_number.operand_name relational_op instruction_number.operand_name +// [, ...]); +// // instruction numbers are zero-based using left to right order in peepmatch +// +// peepreplace (instr_name([instruction_number.operand_name]*)); +// // provide an instruction_number.operand_name for each operand that appears +// // in the replacement instruction's match rule +// +// ---------VM FLAGS--------------------------------------------------------- +// +// All peephole optimizations can be turned off using -XX:-OptoPeephole +// +// Each peephole rule is given an identifying number starting with zero and +// increasing by one in the order seen by the parser. An individual peephole +// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# +// on the command-line. +// +// ---------CURRENT LIMITATIONS---------------------------------------------- +// +// Only match adjacent instructions in same basic block +// Only equality constraints +// Only constraints between operands, not (0.dest_reg == EAX_enc) +// Only one replacement instruction +// +// ---------EXAMPLE---------------------------------------------------------- +// +// // pertinent parts of existing instructions in architecture description +// instruct movI(eRegI dst, eRegI src) %{ +// match(Set dst (CopyI src)); +// %} +// +// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{ +// match(Set dst (AddI dst src)); +// effect(KILL cr); +// %} +// +// // Change (inc mov) to lea +// peephole %{ +// // increment preceeded by register-register move +// peepmatch (incI_eReg movI); +// // require that the destination register of the increment +// // match the destination register of the move +// peepconstraint (0.dst == 1.dst); +// // construct a replacement instruction that sets +// // the destination to (move's source register + one) +// peepreplace (leaI_eReg_immI(0.dst 1.src 0.src)); +// %} +// +// Implementation no longer uses movX instructions since +// machine-independent system no longer uses CopyX nodes. +// +// peephole %{ +// peepmatch (incI_eReg movI); +// peepconstraint (0.dst == 1.dst); +// peepreplace (leaI_eReg_immI(0.dst 1.src 0.src)); +// %} +// +// peephole %{ +// peepmatch (decI_eReg movI); +// peepconstraint (0.dst == 1.dst); +// peepreplace (leaI_eReg_immI(0.dst 1.src 0.src)); +// %} +// +// peephole %{ +// peepmatch (addI_eReg_imm movI); +// peepconstraint (0.dst == 1.dst); +// peepreplace (leaI_eReg_immI(0.dst 1.src 0.src)); +// %} +// +// peephole %{ +// peepmatch (addP_eReg_imm movP); +// peepconstraint (0.dst == 1.dst); +// peepreplace (leaP_eReg_immI(0.dst 1.src 0.src)); +// %} + + +// This peephole rule does not work, probably because ADLC can't handle two effects: +// Effect 1 is defining 0.op1 and effect 2 is setting CC +// condense a load from memory and subsequent test for zero +// into a single, more efficient ICM instruction. +// peephole %{ +// peepmatch (compI_iReg_imm0 loadI); +// peepconstraint (1.dst == 0.op1); +// peepreplace (loadtest15_iReg_mem(0.op1 0.op1 1.mem)); +// %} + +// // Change load of spilled value to only a spill +// instruct storeI(memory mem, eRegI src) %{ +// match(Set mem (StoreI mem src)); +// %} +// +// instruct loadI(eRegI dst, memory mem) %{ +// match(Set dst (LoadI mem)); +// %} +// +peephole %{ + peepmatch (loadI storeI); + peepconstraint (1.src == 0.dst, 1.mem == 0.mem); + peepreplace (storeI(1.mem 1.mem 1.src)); +%} + +peephole %{ + peepmatch (loadL storeL); + peepconstraint (1.src == 0.dst, 1.mem == 0.mem); + peepreplace (storeL(1.mem 1.mem 1.src)); +%} + +peephole %{ + peepmatch (loadP storeP); + peepconstraint (1.src == 0.dst, 1.dst == 0.mem); + peepreplace (storeP(1.dst 1.dst 1.src)); +%} + +//----------SUPERWORD RULES--------------------------------------------------- + +// Expand rules for special cases + +instruct expand_storeF(stackSlotF mem, regF src) %{ + // No match rule, false predicate, for expand only. + effect(DEF mem, USE src); + predicate(false); + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "STE $src,$mem\t # replicate(float2stack)" %} + opcode(STE_ZOPC, STE_ZOPC); + ins_encode(z_form_rt_mem(src, mem)); + ins_pipe(pipe_class_dummy); +%} + +instruct expand_LoadLogical_I2L(iRegL dst, stackSlotF mem) %{ + // No match rule, false predicate, for expand only. + effect(DEF dst, USE mem); + predicate(false); + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "LLGF $dst,$mem\t # replicate(stack2reg(unsigned))" %} + opcode(LLGF_ZOPC, LLGF_ZOPC); + ins_encode(z_form_rt_mem(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +// Replicate scalar int to packed int values (8 Bytes) +instruct expand_Repl2I_reg(iRegL dst, iRegL src) %{ + // Dummy match rule, false predicate, for expand only. + match(Set dst (ConvI2L src)); + predicate(false); + ins_cost(DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "REPLIC2F $dst,$src\t # replicate(pack2F)" %} + ins_encode %{ + if ($dst$$Register == $src$$Register) { + __ z_sllg(Z_R0_scratch, $src$$Register, 64-32); + __ z_ogr($dst$$Register, Z_R0_scratch); + } else { + __ z_sllg($dst$$Register, $src$$Register, 64-32); + __ z_ogr( $dst$$Register, $src$$Register); + } + %} + ins_pipe(pipe_class_dummy); +%} + +// Replication + +// Exploit rotate_then_insert, if available +// Replicate scalar byte to packed byte values (8 Bytes). +instruct Repl8B_reg_risbg(iRegL dst, iRegI src, flagsReg cr) %{ + match(Set dst (ReplicateB src)); + effect(KILL cr); + predicate((n->as_Vector()->length() == 8)); + format %{ "REPLIC8B $dst,$src\t # pack8B" %} + ins_encode %{ + if ($dst$$Register != $src$$Register) { + __ z_lgr($dst$$Register, $src$$Register); + } + __ rotate_then_insert($dst$$Register, $dst$$Register, 48, 55, 8, false); + __ rotate_then_insert($dst$$Register, $dst$$Register, 32, 47, 16, false); + __ rotate_then_insert($dst$$Register, $dst$$Register, 0, 31, 32, false); + %} + ins_pipe(pipe_class_dummy); +%} + +// Replicate scalar byte to packed byte values (8 Bytes). +instruct Repl8B_imm(iRegL dst, immB_n0m1 src) %{ + match(Set dst (ReplicateB src)); + predicate(n->as_Vector()->length() == 8); + ins_should_rematerialize(true); + format %{ "REPLIC8B $dst,$src\t # pack8B imm" %} + ins_encode %{ + int64_t Isrc8 = $src$$constant & 0x000000ff; + int64_t Isrc16 = Isrc8 << 8 | Isrc8; + int64_t Isrc32 = Isrc16 << 16 | Isrc16; + assert(Isrc8 != 0x000000ff && Isrc8 != 0, "should be handled by other match rules."); + + __ z_llilf($dst$$Register, Isrc32); + __ z_iihf($dst$$Register, Isrc32); + %} + ins_pipe(pipe_class_dummy); +%} + +// Replicate scalar byte to packed byte values (8 Bytes). +instruct Repl8B_imm0(iRegL dst, immI_0 src) %{ + match(Set dst (ReplicateB src)); + predicate(n->as_Vector()->length() == 8); + ins_should_rematerialize(true); + format %{ "REPLIC8B $dst,$src\t # pack8B imm0" %} + ins_encode %{ __ z_laz($dst$$Register, 0, Z_R0); %} + ins_pipe(pipe_class_dummy); +%} + +// Replicate scalar byte to packed byte values (8 Bytes). +instruct Repl8B_immm1(iRegL dst, immB_minus1 src) %{ + match(Set dst (ReplicateB src)); + predicate(n->as_Vector()->length() == 8); + ins_should_rematerialize(true); + format %{ "REPLIC8B $dst,$src\t # pack8B immm1" %} + ins_encode %{ __ z_lghi($dst$$Register, -1); %} + ins_pipe(pipe_class_dummy); +%} + +// Exploit rotate_then_insert, if available +// Replicate scalar short to packed short values (8 Bytes). +instruct Repl4S_reg_risbg(iRegL dst, iRegI src, flagsReg cr) %{ + match(Set dst (ReplicateS src)); + effect(KILL cr); + predicate((n->as_Vector()->length() == 4)); + format %{ "REPLIC4S $dst,$src\t # pack4S" %} + ins_encode %{ + if ($dst$$Register != $src$$Register) { + __ z_lgr($dst$$Register, $src$$Register); + } + __ rotate_then_insert($dst$$Register, $dst$$Register, 32, 47, 16, false); + __ rotate_then_insert($dst$$Register, $dst$$Register, 0, 31, 32, false); + %} + ins_pipe(pipe_class_dummy); +%} + +// Replicate scalar short to packed short values (8 Bytes). +instruct Repl4S_imm(iRegL dst, immS_n0m1 src) %{ + match(Set dst (ReplicateS src)); + predicate(n->as_Vector()->length() == 4); + ins_should_rematerialize(true); + format %{ "REPLIC4S $dst,$src\t # pack4S imm" %} + ins_encode %{ + int64_t Isrc16 = $src$$constant & 0x0000ffff; + int64_t Isrc32 = Isrc16 << 16 | Isrc16; + assert(Isrc16 != 0x0000ffff && Isrc16 != 0, "Repl4S_imm: (src == " INT64_FORMAT + ") should be handled by other match rules.", $src$$constant); + + __ z_llilf($dst$$Register, Isrc32); + __ z_iihf($dst$$Register, Isrc32); + %} + ins_pipe(pipe_class_dummy); +%} + +// Replicate scalar short to packed short values (8 Bytes). +instruct Repl4S_imm0(iRegL dst, immI_0 src) %{ + match(Set dst (ReplicateS src)); + predicate(n->as_Vector()->length() == 4); + ins_should_rematerialize(true); + format %{ "REPLIC4S $dst,$src\t # pack4S imm0" %} + ins_encode %{ __ z_laz($dst$$Register, 0, Z_R0); %} + ins_pipe(pipe_class_dummy); +%} + +// Replicate scalar short to packed short values (8 Bytes). +instruct Repl4S_immm1(iRegL dst, immS_minus1 src) %{ + match(Set dst (ReplicateS src)); + predicate(n->as_Vector()->length() == 4); + ins_should_rematerialize(true); + format %{ "REPLIC4S $dst,$src\t # pack4S immm1" %} + ins_encode %{ __ z_lghi($dst$$Register, -1); %} + ins_pipe(pipe_class_dummy); +%} + +// Exploit rotate_then_insert, if available. +// Replicate scalar int to packed int values (8 Bytes). +instruct Repl2I_reg_risbg(iRegL dst, iRegI src, flagsReg cr) %{ + match(Set dst (ReplicateI src)); + effect(KILL cr); + predicate((n->as_Vector()->length() == 2)); + format %{ "REPLIC2I $dst,$src\t # pack2I" %} + ins_encode %{ + if ($dst$$Register != $src$$Register) { + __ z_lgr($dst$$Register, $src$$Register); + } + __ rotate_then_insert($dst$$Register, $dst$$Register, 0, 31, 32, false); + %} + ins_pipe(pipe_class_dummy); +%} + +// Replicate scalar int to packed int values (8 Bytes). +instruct Repl2I_imm(iRegL dst, immI_n0m1 src) %{ + match(Set dst (ReplicateI src)); + predicate(n->as_Vector()->length() == 2); + ins_should_rematerialize(true); + format %{ "REPLIC2I $dst,$src\t # pack2I imm" %} + ins_encode %{ + int64_t Isrc32 = $src$$constant; + assert(Isrc32 != -1 && Isrc32 != 0, "should be handled by other match rules."); + + __ z_llilf($dst$$Register, Isrc32); + __ z_iihf($dst$$Register, Isrc32); + %} + ins_pipe(pipe_class_dummy); +%} + +// Replicate scalar int to packed int values (8 Bytes). +instruct Repl2I_imm0(iRegL dst, immI_0 src) %{ + match(Set dst (ReplicateI src)); + predicate(n->as_Vector()->length() == 2); + ins_should_rematerialize(true); + format %{ "REPLIC2I $dst,$src\t # pack2I imm0" %} + ins_encode %{ __ z_laz($dst$$Register, 0, Z_R0); %} + ins_pipe(pipe_class_dummy); +%} + +// Replicate scalar int to packed int values (8 Bytes). +instruct Repl2I_immm1(iRegL dst, immI_minus1 src) %{ + match(Set dst (ReplicateI src)); + predicate(n->as_Vector()->length() == 2); + ins_should_rematerialize(true); + format %{ "REPLIC2I $dst,$src\t # pack2I immm1" %} + ins_encode %{ __ z_lghi($dst$$Register, -1); %} + ins_pipe(pipe_class_dummy); +%} + +// + +instruct Repl2F_reg_indirect(iRegL dst, regF src, flagsReg cr) %{ + match(Set dst (ReplicateF src)); + effect(KILL cr); + predicate(!VM_Version::has_FPSupportEnhancements() && n->as_Vector()->length() == 2); + format %{ "REPLIC2F $dst,$src\t # pack2F indirect" %} + expand %{ + stackSlotF tmp; + iRegL tmp2; + expand_storeF(tmp, src); + expand_LoadLogical_I2L(tmp2, tmp); + expand_Repl2I_reg(dst, tmp2); + %} +%} + +// Replicate scalar float to packed float values in GREG (8 Bytes). +instruct Repl2F_reg_direct(iRegL dst, regF src, flagsReg cr) %{ + match(Set dst (ReplicateF src)); + effect(KILL cr); + predicate(VM_Version::has_FPSupportEnhancements() && n->as_Vector()->length() == 2); + format %{ "REPLIC2F $dst,$src\t # pack2F direct" %} + ins_encode %{ + assert(VM_Version::has_FPSupportEnhancements(), "encoder should never be called on old H/W"); + __ z_lgdr($dst$$Register, $src$$FloatRegister); + + __ z_srlg(Z_R0_scratch, $dst$$Register, 32); // Floats are left-justified in 64bit reg. + __ z_iilf($dst$$Register, 0); // Save a "result not ready" stall. + __ z_ogr($dst$$Register, Z_R0_scratch); + %} + ins_pipe(pipe_class_dummy); +%} + +// Replicate scalar float immediate to packed float values in GREG (8 Bytes). +instruct Repl2F_imm(iRegL dst, immF src) %{ + match(Set dst (ReplicateF src)); + predicate(n->as_Vector()->length() == 2); + ins_should_rematerialize(true); + format %{ "REPLIC2F $dst,$src\t # pack2F imm" %} + ins_encode %{ + union { + int Isrc32; + float Fsrc32; + }; + Fsrc32 = $src$$constant; + __ z_llilf($dst$$Register, Isrc32); + __ z_iihf($dst$$Register, Isrc32); + %} + ins_pipe(pipe_class_dummy); +%} + +// Replicate scalar float immediate zeroes to packed float values in GREG (8 Bytes). +// Do this only for 'real' zeroes, especially don't loose sign of negative zeroes. +instruct Repl2F_imm0(iRegL dst, immFp0 src) %{ + match(Set dst (ReplicateF src)); + predicate(n->as_Vector()->length() == 2); + ins_should_rematerialize(true); + format %{ "REPLIC2F $dst,$src\t # pack2F imm0" %} + ins_encode %{ __ z_laz($dst$$Register, 0, Z_R0); %} + ins_pipe(pipe_class_dummy); +%} + +// Store + +// Store Aligned Packed Byte register to memory (8 Bytes). +instruct storeA8B(memory mem, iRegL src) %{ + match(Set mem (StoreVector mem src)); + predicate(n->as_StoreVector()->memory_size() == 8); + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "STG $src,$mem\t # ST(packed8B)" %} + opcode(STG_ZOPC, STG_ZOPC); + ins_encode(z_form_rt_mem_opt(src, mem)); + ins_pipe(pipe_class_dummy); +%} + +// Load + +instruct loadV8(iRegL dst, memory mem) %{ + match(Set dst (LoadVector mem)); + predicate(n->as_LoadVector()->memory_size() == 8); + ins_cost(MEMORY_REF_COST); + // TODO: s390 port size(VARIABLE_SIZE); + format %{ "LG $dst,$mem\t # L(packed8B)" %} + opcode(LG_ZOPC, LG_ZOPC); + ins_encode(z_form_rt_mem_opt(dst, mem)); + ins_pipe(pipe_class_dummy); +%} + +//----------POPULATION COUNT RULES-------------------------------------------- + +// Byte reverse + +instruct bytes_reverse_int(iRegI dst, iRegI src) %{ + match(Set dst (ReverseBytesI src)); + predicate(UseByteReverseInstruction); // See Matcher::match_rule_supported + ins_cost(DEFAULT_COST); + size(4); + format %{ "LRVR $dst,$src\t# byte reverse int" %} + opcode(LRVR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +instruct bytes_reverse_long(iRegL dst, iRegL src) %{ + match(Set dst (ReverseBytesL src)); + predicate(UseByteReverseInstruction); // See Matcher::match_rule_supported + ins_cost(DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "LRVGR $dst,$src\t# byte reverse long" %} + opcode(LRVGR_ZOPC); + ins_encode(z_rreform(dst, src)); + ins_pipe(pipe_class_dummy); +%} + +// Leading zeroes + +// The instruction FLOGR (Find Leftmost One in Grande (64bit) Register) +// returns the bit position of the leftmost 1 in the 64bit source register. +// As the bits are numbered from left to right (0..63), the returned +// position index is equivalent to the number of leading zeroes. +// If no 1-bit is found (i.e. the regsiter contains zero), the instruction +// returns position 64. That's exactly what we need. + +instruct countLeadingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{ + match(Set dst (CountLeadingZerosI src)); + effect(KILL tmp, KILL cr); + predicate(UseCountLeadingZerosInstruction); // See Matcher::match_rule_supported + ins_cost(3 * DEFAULT_COST); + size(14); + format %{ "SLLG $dst,$src,32\t# no need to always count 32 zeroes first\n\t" + "IILH $dst,0x8000 \t# insert \"stop bit\" to force result 32 for zero src.\n\t" + "FLOGR $dst,$dst" + %} + ins_encode %{ + // Performance experiments indicate that "FLOGR" is using some kind of + // iteration to find the leftmost "1" bit. + // + // The prior implementation zero-extended the 32-bit argument to 64 bit, + // thus forcing "FLOGR" to count 32 bits of which we know they are zero. + // We could gain measurable speedup in micro benchmark: + // + // leading trailing + // z10: int 2.04 1.68 + // long 1.00 1.02 + // z196: int 0.99 1.23 + // long 1.00 1.11 + // + // By shifting the argument into the high-word instead of zero-extending it. + // The add'l branch on condition (taken for a zero argument, very infrequent, + // good prediction) is well compensated for by the savings. + // + // We leave the previous implementation in for some time in the future when + // the "FLOGR" instruction may become less iterative. + + // Version 2: shows 62%(z9), 204%(z10), -1%(z196) improvement over original + __ z_sllg($dst$$Register, $src$$Register, 32); // No need to always count 32 zeroes first. + __ z_iilh($dst$$Register, 0x8000); // Insert "stop bit" to force result 32 for zero src. + __ z_flogr($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct countLeadingZerosL(revenRegI dst, iRegL src, roddRegI tmp, flagsReg cr) %{ + match(Set dst (CountLeadingZerosL src)); + effect(KILL tmp, KILL cr); + predicate(UseCountLeadingZerosInstruction); // See Matcher::match_rule_supported + ins_cost(DEFAULT_COST); + size(4); + format %{ "FLOGR $dst,$src \t# count leading zeros (long)\n\t" %} + ins_encode %{ __ z_flogr($dst$$Register, $src$$Register); %} + ins_pipe(pipe_class_dummy); +%} + +// trailing zeroes + +// We transform the trailing zeroes problem to a leading zeroes problem +// such that can use the FLOGR instruction to our advantage. + +// With +// tmp1 = src - 1 +// we flip all trailing zeroes to ones and the rightmost one to zero. +// All other bits remain unchanged. +// With the complement +// tmp2 = ~src +// we get all ones in the trailing zeroes positions. Thus, +// tmp3 = tmp1 & tmp2 +// yields ones in the trailing zeroes positions and zeroes elsewhere. +// Now we can apply FLOGR and get 64-(trailing zeroes). +instruct countTrailingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{ + match(Set dst (CountTrailingZerosI src)); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + predicate(UseCountLeadingZerosInstruction); // See Matcher::match_rule_supported + ins_cost(8 * DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); // Emitted code depends on PreferLAoverADD being on/off. + format %{ "LLGFR $dst,$src \t# clear upper 32 bits (we are dealing with int)\n\t" + "LCGFR $tmp,$src \t# load 2's complement (32->64 bit)\n\t" + "AGHI $dst,-1 \t# tmp1 = src-1\n\t" + "AGHI $tmp,-1 \t# tmp2 = -src-1 = ~src\n\t" + "NGR $dst,$tmp \t# tmp3 = tmp1&tmp2\n\t" + "FLOGR $dst,$dst \t# count trailing zeros (int)\n\t" + "AHI $dst,-64 \t# tmp4 = 64-(trailing zeroes)-64\n\t" + "LCR $dst,$dst \t# res = -tmp4" + %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + // Rtmp only needed for for zero-argument shortcut. With kill effect in + // match rule Rsrc = roddReg would be possible, saving one register. + Register Rtmp = $tmp$$Register; + + assert_different_registers(Rdst, Rsrc, Rtmp); + + // Algorithm: + // - Isolate the least significant (rightmost) set bit using (src & (-src)). + // All other bits in the result are zero. + // - Find the "leftmost one" bit position in the single-bit result from previous step. + // - 63-("leftmost one" bit position) gives the # of trailing zeros. + + // Version 2: shows 79%(z9), 68%(z10), 23%(z196) improvement over original. + Label done; + __ load_const_optimized(Rdst, 32); // Prepare for shortcut (zero argument), result will be 32. + __ z_lcgfr(Rtmp, Rsrc); + __ z_bre(done); // Taken very infrequently, good prediction, no BHT entry. + + __ z_nr(Rtmp, Rsrc); // (src) & (-src) leaves nothing but least significant bit. + __ z_ahi(Rtmp, -1); // Subtract one to fill all trailing zero positions with ones. + // Use 32bit op to prevent borrow propagation (case Rdst = 0x80000000) + // into upper half of reg. Not relevant with sllg below. + __ z_sllg(Rdst, Rtmp, 32); // Shift interesting contents to upper half of register. + __ z_bre(done); // Shortcut for argument = 1, result will be 0. + // Depends on CC set by ahi above. + // Taken very infrequently, good prediction, no BHT entry. + // Branch delayed to have Rdst set correctly (Rtmp == 0(32bit) + // after SLLG Rdst == 0(64bit)). + __ z_flogr(Rdst, Rdst); // Kills tmp which is the oddReg for dst. + __ add2reg(Rdst, -32); // 32-pos(leftmost1) is #trailing zeros + __ z_lcgfr(Rdst, Rdst); // Provide 64bit result at no cost. + __ bind(done); + %} + ins_pipe(pipe_class_dummy); +%} + +instruct countTrailingZerosL(revenRegI dst, iRegL src, roddRegL tmp, flagsReg cr) %{ + match(Set dst (CountTrailingZerosL src)); + effect(TEMP_DEF dst, KILL tmp, KILL cr); + predicate(UseCountLeadingZerosInstruction); // See Matcher::match_rule_supported + ins_cost(8 * DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); // Emitted code depends on PreferLAoverADD being on/off. + format %{ "LCGR $dst,$src \t# preserve src\n\t" + "NGR $dst,$src \t#" + "AGHI $dst,-1 \t# tmp1 = src-1\n\t" + "FLOGR $dst,$dst \t# count trailing zeros (long), kill $tmp\n\t" + "AHI $dst,-64 \t# tmp4 = 64-(trailing zeroes)-64\n\t" + "LCR $dst,$dst \t#" + %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + assert_different_registers(Rdst, Rsrc); // Rtmp == Rsrc allowed. + + // New version: shows 5%(z9), 2%(z10), 11%(z196) improvement over original. + __ z_lcgr(Rdst, Rsrc); + __ z_ngr(Rdst, Rsrc); + __ add2reg(Rdst, -1); + __ z_flogr(Rdst, Rdst); // Kills tmp which is the oddReg for dst. + __ add2reg(Rdst, -64); + __ z_lcgfr(Rdst, Rdst); // Provide 64bit result at no cost. + %} + ins_pipe(pipe_class_dummy); +%} + + +// bit count + +instruct popCountI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{ + match(Set dst (PopCountI src)); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + predicate(UsePopCountInstruction && VM_Version::has_PopCount()); + ins_cost(DEFAULT_COST); + size(24); + format %{ "POPCNT $dst,$src\t# pop count int" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + + // Prefer compile-time assertion over run-time SIGILL. + assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI"); + assert_different_registers(Rdst, Rtmp); + + // Version 2: shows 10%(z196) improvement over original. + __ z_popcnt(Rdst, Rsrc); + __ z_srlg(Rtmp, Rdst, 16); // calc byte4+byte6 and byte5+byte7 + __ z_alr(Rdst, Rtmp); // into byte6 and byte7 + __ z_srlg(Rtmp, Rdst, 8); // calc (byte4+byte6) + (byte5+byte7) + __ z_alr(Rdst, Rtmp); // into byte7 + __ z_llgcr(Rdst, Rdst); // zero-extend sum + %} + ins_pipe(pipe_class_dummy); +%} + +instruct popCountL(iRegI dst, iRegL src, iRegL tmp, flagsReg cr) %{ + match(Set dst (PopCountL src)); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + predicate(UsePopCountInstruction && VM_Version::has_PopCount()); + ins_cost(DEFAULT_COST); + // TODO: s390 port size(FIXED_SIZE); + format %{ "POPCNT $dst,$src\t# pop count long" %} + ins_encode %{ + Register Rdst = $dst$$Register; + Register Rsrc = $src$$Register; + Register Rtmp = $tmp$$Register; + + // Prefer compile-time assertion over run-time SIGILL. + assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI"); + assert_different_registers(Rdst, Rtmp); + + // Original version. Using LA instead of algr seems to be a really bad idea (-35%). + __ z_popcnt(Rdst, Rsrc); + __ z_ahhlr(Rdst, Rdst, Rdst); + __ z_sllg(Rtmp, Rdst, 16); + __ z_algr(Rdst, Rtmp); + __ z_sllg(Rtmp, Rdst, 8); + __ z_algr(Rdst, Rtmp); + __ z_srlg(Rdst, Rdst, 56); + %} + ins_pipe(pipe_class_dummy); +%} + +//----------SMARTSPILL RULES--------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. + +// ============================================================================ +// TYPE PROFILING RULES + diff --git a/hotspot/src/cpu/s390/vm/sharedRuntime_s390.cpp b/hotspot/src/cpu/s390/vm/sharedRuntime_s390.cpp new file mode 100644 index 00000000000..e28d2ef7289 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/sharedRuntime_s390.cpp @@ -0,0 +1,3552 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/debugInfoRec.hpp" +#include "code/icBuffer.hpp" +#include "code/vtableStubs.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interp_masm.hpp" +#include "memory/resourceArea.hpp" +#include "oops/compiledICHolder.hpp" +#include "registerSaver_s390.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/vframeArray.hpp" +#include "vmreg_s390.inline.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#endif +#ifdef COMPILER2 +#include "opto/ad.hpp" +#include "opto/runtime.hpp" +#endif + +#ifdef PRODUCT +#define __ masm-> +#else +#define __ (Verbose ? (masm->block_comment(FILE_AND_LINE),masm):masm)-> +#endif + +#define BLOCK_COMMENT(str) __ block_comment(str) +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +#define RegisterSaver_LiveIntReg(regname) \ + { RegisterSaver::int_reg, regname->encoding(), regname->as_VMReg() } + +#define RegisterSaver_LiveFloatReg(regname) \ + { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() } + +// Registers which are not saved/restored, but still they have got a frame slot. +// Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2 +#define RegisterSaver_ExcludedIntReg(regname) \ + { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() } + +// Registers which are not saved/restored, but still they have got a frame slot. +// Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2. +#define RegisterSaver_ExcludedFloatReg(regname) \ + { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() } + +static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = { + // Live registers which get spilled to the stack. Register positions + // in this array correspond directly to the stack layout. + // + // live float registers: + // + RegisterSaver_LiveFloatReg(Z_F0 ), + // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1) + RegisterSaver_LiveFloatReg(Z_F2 ), + RegisterSaver_LiveFloatReg(Z_F3 ), + RegisterSaver_LiveFloatReg(Z_F4 ), + RegisterSaver_LiveFloatReg(Z_F5 ), + RegisterSaver_LiveFloatReg(Z_F6 ), + RegisterSaver_LiveFloatReg(Z_F7 ), + RegisterSaver_LiveFloatReg(Z_F8 ), + RegisterSaver_LiveFloatReg(Z_F9 ), + RegisterSaver_LiveFloatReg(Z_F10), + RegisterSaver_LiveFloatReg(Z_F11), + RegisterSaver_LiveFloatReg(Z_F12), + RegisterSaver_LiveFloatReg(Z_F13), + RegisterSaver_LiveFloatReg(Z_F14), + RegisterSaver_LiveFloatReg(Z_F15), + // + // RegisterSaver_ExcludedIntReg(Z_R0), // scratch + // RegisterSaver_ExcludedIntReg(Z_R1), // scratch + RegisterSaver_LiveIntReg(Z_R2 ), + RegisterSaver_LiveIntReg(Z_R3 ), + RegisterSaver_LiveIntReg(Z_R4 ), + RegisterSaver_LiveIntReg(Z_R5 ), + RegisterSaver_LiveIntReg(Z_R6 ), + RegisterSaver_LiveIntReg(Z_R7 ), + RegisterSaver_LiveIntReg(Z_R8 ), + RegisterSaver_LiveIntReg(Z_R9 ), + RegisterSaver_LiveIntReg(Z_R10), + RegisterSaver_LiveIntReg(Z_R11), + RegisterSaver_LiveIntReg(Z_R12), + RegisterSaver_LiveIntReg(Z_R13), + // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.) + // RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer +}; + +static const RegisterSaver::LiveRegType RegisterSaver_LiveIntRegs[] = { + // Live registers which get spilled to the stack. Register positions + // in this array correspond directly to the stack layout. + // + // live float registers: All excluded, but still they get a stack slot to get same frame size. + // + RegisterSaver_ExcludedFloatReg(Z_F0 ), + // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1) + RegisterSaver_ExcludedFloatReg(Z_F2 ), + RegisterSaver_ExcludedFloatReg(Z_F3 ), + RegisterSaver_ExcludedFloatReg(Z_F4 ), + RegisterSaver_ExcludedFloatReg(Z_F5 ), + RegisterSaver_ExcludedFloatReg(Z_F6 ), + RegisterSaver_ExcludedFloatReg(Z_F7 ), + RegisterSaver_ExcludedFloatReg(Z_F8 ), + RegisterSaver_ExcludedFloatReg(Z_F9 ), + RegisterSaver_ExcludedFloatReg(Z_F10), + RegisterSaver_ExcludedFloatReg(Z_F11), + RegisterSaver_ExcludedFloatReg(Z_F12), + RegisterSaver_ExcludedFloatReg(Z_F13), + RegisterSaver_ExcludedFloatReg(Z_F14), + RegisterSaver_ExcludedFloatReg(Z_F15), + // + // RegisterSaver_ExcludedIntReg(Z_R0), // scratch + // RegisterSaver_ExcludedIntReg(Z_R1), // scratch + RegisterSaver_LiveIntReg(Z_R2 ), + RegisterSaver_LiveIntReg(Z_R3 ), + RegisterSaver_LiveIntReg(Z_R4 ), + RegisterSaver_LiveIntReg(Z_R5 ), + RegisterSaver_LiveIntReg(Z_R6 ), + RegisterSaver_LiveIntReg(Z_R7 ), + RegisterSaver_LiveIntReg(Z_R8 ), + RegisterSaver_LiveIntReg(Z_R9 ), + RegisterSaver_LiveIntReg(Z_R10), + RegisterSaver_LiveIntReg(Z_R11), + RegisterSaver_LiveIntReg(Z_R12), + RegisterSaver_LiveIntReg(Z_R13), + // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.) + // RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer +}; + +static const RegisterSaver::LiveRegType RegisterSaver_LiveRegsWithoutR2[] = { + // Live registers which get spilled to the stack. Register positions + // in this array correspond directly to the stack layout. + // + // live float registers: + // + RegisterSaver_LiveFloatReg(Z_F0 ), + // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1) + RegisterSaver_LiveFloatReg(Z_F2 ), + RegisterSaver_LiveFloatReg(Z_F3 ), + RegisterSaver_LiveFloatReg(Z_F4 ), + RegisterSaver_LiveFloatReg(Z_F5 ), + RegisterSaver_LiveFloatReg(Z_F6 ), + RegisterSaver_LiveFloatReg(Z_F7 ), + RegisterSaver_LiveFloatReg(Z_F8 ), + RegisterSaver_LiveFloatReg(Z_F9 ), + RegisterSaver_LiveFloatReg(Z_F10), + RegisterSaver_LiveFloatReg(Z_F11), + RegisterSaver_LiveFloatReg(Z_F12), + RegisterSaver_LiveFloatReg(Z_F13), + RegisterSaver_LiveFloatReg(Z_F14), + RegisterSaver_LiveFloatReg(Z_F15), + // + // RegisterSaver_ExcludedIntReg(Z_R0), // scratch + // RegisterSaver_ExcludedIntReg(Z_R1), // scratch + RegisterSaver_ExcludedIntReg(Z_R2), // Omit saving R2. + RegisterSaver_LiveIntReg(Z_R3 ), + RegisterSaver_LiveIntReg(Z_R4 ), + RegisterSaver_LiveIntReg(Z_R5 ), + RegisterSaver_LiveIntReg(Z_R6 ), + RegisterSaver_LiveIntReg(Z_R7 ), + RegisterSaver_LiveIntReg(Z_R8 ), + RegisterSaver_LiveIntReg(Z_R9 ), + RegisterSaver_LiveIntReg(Z_R10), + RegisterSaver_LiveIntReg(Z_R11), + RegisterSaver_LiveIntReg(Z_R12), + RegisterSaver_LiveIntReg(Z_R13), + // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.) + // RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer +}; + +// Live argument registers which get spilled to the stack. +static const RegisterSaver::LiveRegType RegisterSaver_LiveArgRegs[] = { + RegisterSaver_LiveFloatReg(Z_FARG1), + RegisterSaver_LiveFloatReg(Z_FARG2), + RegisterSaver_LiveFloatReg(Z_FARG3), + RegisterSaver_LiveFloatReg(Z_FARG4), + RegisterSaver_LiveIntReg(Z_ARG1), + RegisterSaver_LiveIntReg(Z_ARG2), + RegisterSaver_LiveIntReg(Z_ARG3), + RegisterSaver_LiveIntReg(Z_ARG4), + RegisterSaver_LiveIntReg(Z_ARG5) +}; + +static const RegisterSaver::LiveRegType RegisterSaver_LiveVolatileRegs[] = { + // Live registers which get spilled to the stack. Register positions + // in this array correspond directly to the stack layout. + // + // live float registers: + // + RegisterSaver_LiveFloatReg(Z_F0 ), + // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1) + RegisterSaver_LiveFloatReg(Z_F2 ), + RegisterSaver_LiveFloatReg(Z_F3 ), + RegisterSaver_LiveFloatReg(Z_F4 ), + RegisterSaver_LiveFloatReg(Z_F5 ), + RegisterSaver_LiveFloatReg(Z_F6 ), + RegisterSaver_LiveFloatReg(Z_F7 ), + // RegisterSaver_LiveFloatReg(Z_F8 ), // non-volatile + // RegisterSaver_LiveFloatReg(Z_F9 ), // non-volatile + // RegisterSaver_LiveFloatReg(Z_F10), // non-volatile + // RegisterSaver_LiveFloatReg(Z_F11), // non-volatile + // RegisterSaver_LiveFloatReg(Z_F12), // non-volatile + // RegisterSaver_LiveFloatReg(Z_F13), // non-volatile + // RegisterSaver_LiveFloatReg(Z_F14), // non-volatile + // RegisterSaver_LiveFloatReg(Z_F15), // non-volatile + // + // RegisterSaver_ExcludedIntReg(Z_R0), // scratch + // RegisterSaver_ExcludedIntReg(Z_R1), // scratch + RegisterSaver_LiveIntReg(Z_R2 ), + RegisterSaver_LiveIntReg(Z_R3 ), + RegisterSaver_LiveIntReg(Z_R4 ), + RegisterSaver_LiveIntReg(Z_R5 ), + // RegisterSaver_LiveIntReg(Z_R6 ), // non-volatile + // RegisterSaver_LiveIntReg(Z_R7 ), // non-volatile + // RegisterSaver_LiveIntReg(Z_R8 ), // non-volatile + // RegisterSaver_LiveIntReg(Z_R9 ), // non-volatile + // RegisterSaver_LiveIntReg(Z_R10), // non-volatile + // RegisterSaver_LiveIntReg(Z_R11), // non-volatile + // RegisterSaver_LiveIntReg(Z_R12), // non-volatile + // RegisterSaver_LiveIntReg(Z_R13), // non-volatile + // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.) + // RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer +}; + +int RegisterSaver::live_reg_save_size(RegisterSet reg_set) { + int reg_space = -1; + switch (reg_set) { + case all_registers: reg_space = sizeof(RegisterSaver_LiveRegs); break; + case all_registers_except_r2: reg_space = sizeof(RegisterSaver_LiveRegsWithoutR2); break; + case all_integer_registers: reg_space = sizeof(RegisterSaver_LiveIntRegs); break; + case all_volatile_registers: reg_space = sizeof(RegisterSaver_LiveVolatileRegs); break; + case arg_registers: reg_space = sizeof(RegisterSaver_LiveArgRegs); break; + default: ShouldNotReachHere(); + } + return (reg_space / sizeof(RegisterSaver::LiveRegType)) * reg_size; +} + + +int RegisterSaver::live_reg_frame_size(RegisterSet reg_set) { + return live_reg_save_size(reg_set) + frame::z_abi_160_size; +} + + +// return_pc: Specify the register that should be stored as the return pc in the current frame. +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc) { + // Record volatile registers as callee-save values in an OopMap so + // their save locations will be propagated to the caller frame's + // RegisterMap during StackFrameStream construction (needed for + // deoptimization; see compiledVFrame::create_stack_value). + + // Calculate frame size. + const int frame_size_in_bytes = live_reg_frame_size(reg_set); + const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint); + const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set); + + // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words. + OopMap* map = new OopMap(frame_size_in_slots, 0); + + int regstosave_num = 0; + const RegisterSaver::LiveRegType* live_regs = NULL; + + switch (reg_set) { + case all_registers: + regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType); + live_regs = RegisterSaver_LiveRegs; + break; + case all_registers_except_r2: + regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);; + live_regs = RegisterSaver_LiveRegsWithoutR2; + break; + case all_integer_registers: + regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType); + live_regs = RegisterSaver_LiveIntRegs; + break; + case all_volatile_registers: + regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType); + live_regs = RegisterSaver_LiveVolatileRegs; + break; + case arg_registers: + regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);; + live_regs = RegisterSaver_LiveArgRegs; + break; + default: ShouldNotReachHere(); + } + + // Save return pc in old frame. + __ save_return_pc(return_pc); + + // Push a new frame (includes stack linkage). + __ push_frame(frame_size_in_bytes); + + // Register save area in new frame starts above z_abi_160 area. + int offset = register_save_offset; + + Register first = noreg; + Register last = noreg; + int first_offset = -1; + bool float_spilled = false; + + for (int i = 0; i < regstosave_num; i++, offset += reg_size) { + int reg_num = live_regs[i].reg_num; + int reg_type = live_regs[i].reg_type; + + switch (reg_type) { + case RegisterSaver::int_reg: { + Register reg = as_Register(reg_num); + if (last != reg->predecessor()) { + if (first != noreg) { + __ z_stmg(first, last, first_offset, Z_SP); + } + first = reg; + first_offset = offset; + DEBUG_ONLY(float_spilled = false); + } + last = reg; + assert(last != Z_R0, "r0 would require special treatment"); + assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]"); + break; + } + + case RegisterSaver::excluded_reg: // Not saved/restored, but with dedicated slot. + continue; // Continue with next loop iteration. + + case RegisterSaver::float_reg: { + FloatRegister freg = as_FloatRegister(reg_num); + __ z_std(freg, offset, Z_SP); + DEBUG_ONLY(float_spilled = true); + break; + } + + default: + ShouldNotReachHere(); + break; + } + + // Second set_callee_saved is really a waste but we'll keep things as they were for now + map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), live_regs[i].vmreg); + map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size) >> 2), live_regs[i].vmreg->next()); + } + assert(first != noreg, "Should spill at least one int reg."); + __ z_stmg(first, last, first_offset, Z_SP); + + // And we're done. + return map; +} + + +// Generate the OopMap (again, regs where saved before). +OopMap* RegisterSaver::generate_oop_map(MacroAssembler* masm, RegisterSet reg_set) { + // Calculate frame size. + const int frame_size_in_bytes = live_reg_frame_size(reg_set); + const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint); + const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set); + + // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words. + OopMap* map = new OopMap(frame_size_in_slots, 0); + + int regstosave_num = 0; + const RegisterSaver::LiveRegType* live_regs = NULL; + + switch (reg_set) { + case all_registers: + regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType); + live_regs = RegisterSaver_LiveRegs; + break; + case all_registers_except_r2: + regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);; + live_regs = RegisterSaver_LiveRegsWithoutR2; + break; + case all_integer_registers: + regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType); + live_regs = RegisterSaver_LiveIntRegs; + break; + case all_volatile_registers: + regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType); + live_regs = RegisterSaver_LiveVolatileRegs; + break; + case arg_registers: + regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);; + live_regs = RegisterSaver_LiveArgRegs; + break; + default: ShouldNotReachHere(); + } + + // Register save area in new frame starts above z_abi_160 area. + int offset = register_save_offset; + for (int i = 0; i < regstosave_num; i++) { + if (live_regs[i].reg_type < RegisterSaver::excluded_reg) { + map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), live_regs[i].vmreg); + map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), live_regs[i].vmreg->next()); + } + offset += reg_size; + } + return map; +} + + +// Pop the current frame and restore all the registers that we saved. +void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set) { + int offset; + const int register_save_offset = live_reg_frame_size(reg_set) - live_reg_save_size(reg_set); + + Register first = noreg; + Register last = noreg; + int first_offset = -1; + bool float_spilled = false; + + int regstosave_num = 0; + const RegisterSaver::LiveRegType* live_regs = NULL; + + switch (reg_set) { + case all_registers: + regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);; + live_regs = RegisterSaver_LiveRegs; + break; + case all_registers_except_r2: + regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);; + live_regs = RegisterSaver_LiveRegsWithoutR2; + break; + case all_integer_registers: + regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType); + live_regs = RegisterSaver_LiveIntRegs; + break; + case all_volatile_registers: + regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);; + live_regs = RegisterSaver_LiveVolatileRegs; + break; + case arg_registers: + regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);; + live_regs = RegisterSaver_LiveArgRegs; + break; + default: ShouldNotReachHere(); + } + + // Restore all registers (ints and floats). + + // Register save area in new frame starts above z_abi_160 area. + offset = register_save_offset; + + for (int i = 0; i < regstosave_num; i++, offset += reg_size) { + int reg_num = live_regs[i].reg_num; + int reg_type = live_regs[i].reg_type; + + switch (reg_type) { + case RegisterSaver::excluded_reg: + continue; // Continue with next loop iteration. + + case RegisterSaver::int_reg: { + Register reg = as_Register(reg_num); + if (last != reg->predecessor()) { + if (first != noreg) { + __ z_lmg(first, last, first_offset, Z_SP); + } + first = reg; + first_offset = offset; + DEBUG_ONLY(float_spilled = false); + } + last = reg; + assert(last != Z_R0, "r0 would require special treatment"); + assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]"); + break; + } + + case RegisterSaver::float_reg: { + FloatRegister freg = as_FloatRegister(reg_num); + __ z_ld(freg, offset, Z_SP); + DEBUG_ONLY(float_spilled = true); + break; + } + + default: + ShouldNotReachHere(); + } + } + assert(first != noreg, "Should spill at least one int reg."); + __ z_lmg(first, last, first_offset, Z_SP); + + // Pop the frame. + __ pop_frame(); + + // Restore the flags. + __ restore_return_pc(); +} + + +// Pop the current frame and restore the registers that might be holding a result. +void RegisterSaver::restore_result_registers(MacroAssembler* masm) { + int i; + int offset; + const int regstosave_num = sizeof(RegisterSaver_LiveRegs) / + sizeof(RegisterSaver::LiveRegType); + const int register_save_offset = live_reg_frame_size(all_registers) - live_reg_save_size(all_registers); + + // Restore all result registers (ints and floats). + offset = register_save_offset; + for (int i = 0; i < regstosave_num; i++, offset += reg_size) { + int reg_num = RegisterSaver_LiveRegs[i].reg_num; + int reg_type = RegisterSaver_LiveRegs[i].reg_type; + switch (reg_type) { + case RegisterSaver::excluded_reg: + continue; // Continue with next loop iteration. + case RegisterSaver::int_reg: { + if (as_Register(reg_num) == Z_RET) { // int result_reg + __ z_lg(as_Register(reg_num), offset, Z_SP); + } + break; + } + case RegisterSaver::float_reg: { + if (as_FloatRegister(reg_num) == Z_FRET) { // float result_reg + __ z_ld(as_FloatRegister(reg_num), offset, Z_SP); + } + break; + } + default: + ShouldNotReachHere(); + } + } +} + +#if INCLUDE_CDS +size_t SharedRuntime::trampoline_size() { + return MacroAssembler::load_const_size() + 2; +} + +void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) { + // Think about using pc-relative branch. + __ load_const(Z_R1_scratch, destination); + __ z_br(Z_R1_scratch); +} +#endif + +// --------------------------------------------------------------------------- +void SharedRuntime::save_native_result(MacroAssembler * masm, + BasicType ret_type, + int frame_slots) { + Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size); + + switch (ret_type) { + case T_BOOLEAN: // Save shorter types as int. Do we need sign extension at restore?? + case T_BYTE: + case T_CHAR: + case T_SHORT: + case T_INT: + __ reg2mem_opt(Z_RET, memaddr, false); + break; + case T_OBJECT: // Save pointer types as long. + case T_ARRAY: + case T_ADDRESS: + case T_VOID: + case T_LONG: + __ reg2mem_opt(Z_RET, memaddr); + break; + case T_FLOAT: + __ freg2mem_opt(Z_FRET, memaddr, false); + break; + case T_DOUBLE: + __ freg2mem_opt(Z_FRET, memaddr); + break; + } +} + +void SharedRuntime::restore_native_result(MacroAssembler *masm, + BasicType ret_type, + int frame_slots) { + Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size); + + switch (ret_type) { + case T_BOOLEAN: // Restore shorter types as int. Do we need sign extension at restore?? + case T_BYTE: + case T_CHAR: + case T_SHORT: + case T_INT: + __ mem2reg_opt(Z_RET, memaddr, false); + break; + case T_OBJECT: // Restore pointer types as long. + case T_ARRAY: + case T_ADDRESS: + case T_VOID: + case T_LONG: + __ mem2reg_opt(Z_RET, memaddr); + break; + case T_FLOAT: + __ mem2freg_opt(Z_FRET, memaddr, false); + break; + case T_DOUBLE: + __ mem2freg_opt(Z_FRET, memaddr); + break; + } +} + +// --------------------------------------------------------------------------- +// Read the array of BasicTypes from a signature, and compute where the +// arguments should go. Values in the VMRegPair regs array refer to 4-byte +// quantities. Values less than VMRegImpl::stack0 are registers, those above +// refer to 4-byte stack slots. All stack slots are based off of the stack pointer +// as framesizes are fixed. +// VMRegImpl::stack0 refers to the first slot 0(sp). +// VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Registers +// up to RegisterImpl::number_of_registers are the 64-bit integer registers. + +// Note: the INPUTS in sig_bt are in units of Java argument words, which are +// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit +// units regardless of build. + +// The Java calling convention is a "shifted" version of the C ABI. +// By skipping the first C ABI register we can call non-static jni methods +// with small numbers of arguments without having to shuffle the arguments +// at all. Since we control the java ABI we ought to at least get some +// advantage out of it. +int SharedRuntime::java_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + int total_args_passed, + int is_outgoing) { + // c2c calling conventions for compiled-compiled calls. + + // An int/float occupies 1 slot here. + const int inc_stk_for_intfloat = 1; // 1 slots for ints and floats. + const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles. + + const VMReg z_iarg_reg[5] = { + Z_R2->as_VMReg(), + Z_R3->as_VMReg(), + Z_R4->as_VMReg(), + Z_R5->as_VMReg(), + Z_R6->as_VMReg() + }; + const VMReg z_farg_reg[4] = { + Z_F0->as_VMReg(), + Z_F2->as_VMReg(), + Z_F4->as_VMReg(), + Z_F6->as_VMReg() + }; + const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]); + const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]); + + assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch"); + assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch"); + + int i; + int stk = 0; + int ireg = 0; + int freg = 0; + + for (int i = 0; i < total_args_passed; ++i) { + switch (sig_bt[i]) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (ireg < z_num_iarg_registers) { + // Put int/ptr in register. + regs[i].set1(z_iarg_reg[ireg]); + ++ireg; + } else { + // Put int/ptr on stack. + regs[i].set1(VMRegImpl::stack2reg(stk)); + stk += inc_stk_for_intfloat; + } + break; + case T_LONG: + assert(sig_bt[i+1] == T_VOID, "expecting half"); + if (ireg < z_num_iarg_registers) { + // Put long in register. + regs[i].set2(z_iarg_reg[ireg]); + ++ireg; + } else { + // Put long on stack and align to 2 slots. + if (stk & 0x1) { ++stk; } + regs[i].set2(VMRegImpl::stack2reg(stk)); + stk += inc_stk_for_longdouble; + } + break; + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + if (ireg < z_num_iarg_registers) { + // Put ptr in register. + regs[i].set2(z_iarg_reg[ireg]); + ++ireg; + } else { + // Put ptr on stack and align to 2 slots, because + // "64-bit pointers record oop-ishness on 2 aligned adjacent + // registers." (see OopFlow::build_oop_map). + if (stk & 0x1) { ++stk; } + regs[i].set2(VMRegImpl::stack2reg(stk)); + stk += inc_stk_for_longdouble; + } + break; + case T_FLOAT: + if (freg < z_num_farg_registers) { + // Put float in register. + regs[i].set1(z_farg_reg[freg]); + ++freg; + } else { + // Put float on stack. + regs[i].set1(VMRegImpl::stack2reg(stk)); + stk += inc_stk_for_intfloat; + } + break; + case T_DOUBLE: + assert(sig_bt[i+1] == T_VOID, "expecting half"); + if (freg < z_num_farg_registers) { + // Put double in register. + regs[i].set2(z_farg_reg[freg]); + ++freg; + } else { + // Put double on stack and align to 2 slots. + if (stk & 0x1) { ++stk; } + regs[i].set2(VMRegImpl::stack2reg(stk)); + stk += inc_stk_for_longdouble; + } + break; + case T_VOID: + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + // Do not count halves. + regs[i].set_bad(); + break; + default: + ShouldNotReachHere(); + } + } + return round_to(stk, 2); +} + +int SharedRuntime::c_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + VMRegPair *regs2, + int total_args_passed) { + assert(regs2 == NULL, "second VMRegPair array not used on this platform"); + + // Calling conventions for C runtime calls and calls to JNI native methods. + const VMReg z_iarg_reg[5] = { + Z_R2->as_VMReg(), + Z_R3->as_VMReg(), + Z_R4->as_VMReg(), + Z_R5->as_VMReg(), + Z_R6->as_VMReg() + }; + const VMReg z_farg_reg[4] = { + Z_F0->as_VMReg(), + Z_F2->as_VMReg(), + Z_F4->as_VMReg(), + Z_F6->as_VMReg() + }; + const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]); + const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]); + + // Check calling conventions consistency. + assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch"); + assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch"); + + // Avoid passing C arguments in the wrong stack slots. + + // 'Stk' counts stack slots. Due to alignment, 32 bit values occupy + // 2 such slots, like 64 bit values do. + const int inc_stk_for_intfloat = 2; // 2 slots for ints and floats. + const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles. + + int i; + // Leave room for C-compatible ABI + int stk = (frame::z_abi_160_size - frame::z_jit_out_preserve_size) / VMRegImpl::stack_slot_size; + int freg = 0; + int ireg = 0; + + // We put the first 5 arguments into registers and the rest on the + // stack. Float arguments are already in their argument registers + // due to c2c calling conventions (see calling_convention). + for (int i = 0; i < total_args_passed; ++i) { + switch (sig_bt[i]) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + // Fall through, handle as long. + case T_LONG: + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + case T_METADATA: + // Oops are already boxed if required (JNI). + if (ireg < z_num_iarg_registers) { + regs[i].set2(z_iarg_reg[ireg]); + ++ireg; + } else { + regs[i].set2(VMRegImpl::stack2reg(stk)); + stk += inc_stk_for_longdouble; + } + break; + case T_FLOAT: + if (freg < z_num_farg_registers) { + regs[i].set1(z_farg_reg[freg]); + ++freg; + } else { + regs[i].set1(VMRegImpl::stack2reg(stk+1)); + stk += inc_stk_for_intfloat; + } + break; + case T_DOUBLE: + assert(sig_bt[i+1] == T_VOID, "expecting half"); + if (freg < z_num_farg_registers) { + regs[i].set2(z_farg_reg[freg]); + ++freg; + } else { + // Put double on stack. + regs[i].set2(VMRegImpl::stack2reg(stk)); + stk += inc_stk_for_longdouble; + } + break; + case T_VOID: + // Do not count halves. + regs[i].set_bad(); + break; + default: + ShouldNotReachHere(); + } + } + return round_to(stk, 2); +} + +//////////////////////////////////////////////////////////////////////// +// +// Argument shufflers +// +//////////////////////////////////////////////////////////////////////// + +//---------------------------------------------------------------------- +// The java_calling_convention describes stack locations as ideal slots on +// a frame with no abi restrictions. Since we must observe abi restrictions +// (like the placement of the register window) the slots must be biased by +// the following value. +//---------------------------------------------------------------------- +static int reg2slot(VMReg r) { + return r->reg2stack() + SharedRuntime::out_preserve_stack_slots(); +} + +static int reg2offset(VMReg r) { + return reg2slot(r) * VMRegImpl::stack_slot_size; +} + +static void verify_oop_args(MacroAssembler *masm, + int total_args_passed, + const BasicType *sig_bt, + const VMRegPair *regs) { + if (!VerifyOops) { return; } + + for (int i = 0; i < total_args_passed; i++) { + if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) { + VMReg r = regs[i].first(); + assert(r->is_valid(), "bad oop arg"); + + if (r->is_stack()) { + __ z_lg(Z_R0_scratch, + Address(Z_SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); + __ verify_oop(Z_R0_scratch); + } else { + __ verify_oop(r->as_Register()); + } + } + } +} + +static void gen_special_dispatch(MacroAssembler *masm, + int total_args_passed, + vmIntrinsics::ID special_dispatch, + const BasicType *sig_bt, + const VMRegPair *regs) { + verify_oop_args(masm, total_args_passed, sig_bt, regs); + + // Now write the args into the outgoing interpreter space. + bool has_receiver = false; + Register receiver_reg = noreg; + int member_arg_pos = -1; + Register member_reg = noreg; + int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch); + + if (ref_kind != 0) { + member_arg_pos = total_args_passed - 1; // trailing MemberName argument + member_reg = Z_R9; // Known to be free at this point. + has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); + } else { + guarantee(special_dispatch == vmIntrinsics::_invokeBasic, "special_dispatch=%d", special_dispatch); + has_receiver = true; + } + + if (member_reg != noreg) { + // Load the member_arg into register, if necessary. + assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob"); + assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object"); + + VMReg r = regs[member_arg_pos].first(); + assert(r->is_valid(), "bad member arg"); + + if (r->is_stack()) { + __ z_lg(member_reg, Address(Z_SP, reg2offset(r))); + } else { + // No data motion is needed. + member_reg = r->as_Register(); + } + } + + if (has_receiver) { + // Make sure the receiver is loaded into a register. + assert(total_args_passed > 0, "oob"); + assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); + + VMReg r = regs[0].first(); + assert(r->is_valid(), "bad receiver arg"); + + if (r->is_stack()) { + // Porting note: This assumes that compiled calling conventions always + // pass the receiver oop in a register. If this is not true on some + // platform, pick a temp and load the receiver from stack. + assert(false, "receiver always in a register"); + receiver_reg = Z_R13; // Known to be free at this point. + __ z_lg(receiver_reg, Address(Z_SP, reg2offset(r))); + } else { + // No data motion is needed. + receiver_reg = r->as_Register(); + } + } + + // Figure out which address we are really jumping to: + MethodHandles::generate_method_handle_dispatch(masm, special_dispatch, + receiver_reg, member_reg, + /*for_compiler_entry:*/ true); +} + +//////////////////////////////////////////////////////////////////////// +// +// Argument shufflers +// +//////////////////////////////////////////////////////////////////////// + +// Is the size of a vector size (in bytes) bigger than a size saved by default? +// 8 bytes registers are saved by default on z/Architecture. +bool SharedRuntime::is_wide_vector(int size) { + // Note, MaxVectorSize == 8 on this platform. + assert(size <= 8, "%d bytes vectors are not supported", size); + return size > 8; +} + +//---------------------------------------------------------------------- +// An oop arg. Must pass a handle not the oop itself +//---------------------------------------------------------------------- +static void object_move(MacroAssembler *masm, + OopMap *map, + int oop_handle_offset, + int framesize_in_slots, + VMRegPair src, + VMRegPair dst, + bool is_receiver, + int *receiver_offset) { + int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size; + + assert(!is_receiver || (is_receiver && (*receiver_offset == -1)), "only one receiving object per call, please."); + + // Must pass a handle. First figure out the location we use as a handle. + + if (src.first()->is_stack()) { + // Oop is already on the stack, put handle on stack or in register + // If handle will be on the stack, use temp reg to calculate it. + Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register(); + Label skip; + int slot_in_older_frame = reg2slot(src.first()); + + guarantee(!is_receiver, "expecting receiver in register"); + map->set_oop(VMRegImpl::stack2reg(slot_in_older_frame + framesize_in_slots)); + + __ add2reg(rHandle, reg2offset(src.first())+frame_offset, Z_SP); + __ load_and_test_long(Z_R0, Address(rHandle)); + __ z_brne(skip); + // Use a NULL handle if oop is NULL. + __ clear_reg(rHandle, true, false); + __ bind(skip); + + // Copy handle to the right place (register or stack). + if (dst.first()->is_stack()) { + __ z_stg(rHandle, reg2offset(dst.first()), Z_SP); + } // else + // nothing to do. rHandle uses the correct register + } else { + // Oop is passed in an input register. We must flush it to the stack. + const Register rOop = src.first()->as_Register(); + const Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register(); + int oop_slot = (rOop->encoding()-Z_ARG1->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset; + int oop_slot_offset = oop_slot*VMRegImpl::stack_slot_size; + NearLabel skip; + + if (is_receiver) { + *receiver_offset = oop_slot_offset; + } + map->set_oop(VMRegImpl::stack2reg(oop_slot)); + + // Flush Oop to stack, calculate handle. + __ z_stg(rOop, oop_slot_offset, Z_SP); + __ add2reg(rHandle, oop_slot_offset, Z_SP); + + // If Oop == NULL, use a NULL handle. + __ compare64_and_branch(rOop, (RegisterOrConstant)0L, Assembler::bcondNotEqual, skip); + __ clear_reg(rHandle, true, false); + __ bind(skip); + + // Copy handle to the right place (register or stack). + if (dst.first()->is_stack()) { + __ z_stg(rHandle, reg2offset(dst.first()), Z_SP); + } // else + // nothing to do here, since rHandle = dst.first()->as_Register in this case. + } +} + +//---------------------------------------------------------------------- +// A float arg. May have to do float reg to int reg conversion +//---------------------------------------------------------------------- +static void float_move(MacroAssembler *masm, + VMRegPair src, + VMRegPair dst, + int framesize_in_slots, + int workspace_slot_offset) { + int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size; + int workspace_offset = workspace_slot_offset * VMRegImpl::stack_slot_size; + + // We do not accept an argument in a VMRegPair to be spread over two slots, + // no matter what physical location (reg or stack) the slots may have. + // We just check for the unaccepted slot to be invalid. + assert(!src.second()->is_valid(), "float in arg spread over two slots"); + assert(!dst.second()->is_valid(), "float out arg spread over two slots"); + + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack -> stack. The easiest of the bunch. + __ z_mvc(Address(Z_SP, reg2offset(dst.first())), + Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(float)); + } else { + // stack to reg + Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset); + if (dst.first()->is_Register()) { + __ mem2reg_opt(dst.first()->as_Register(), memaddr, false); + } else { + __ mem2freg_opt(dst.first()->as_FloatRegister(), memaddr, false); + } + } + } else if (src.first()->is_Register()) { + if (dst.first()->is_stack()) { + // gpr -> stack + __ reg2mem_opt(src.first()->as_Register(), + Address(Z_SP, reg2offset(dst.first()), false )); + } else { + if (dst.first()->is_Register()) { + // gpr -> gpr + __ move_reg_if_needed(dst.first()->as_Register(), T_INT, + src.first()->as_Register(), T_INT); + } else { + if (VM_Version::has_FPSupportEnhancements()) { + // gpr -> fpr. Exploit z10 capability of direct transfer. + __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register()); + } else { + // gpr -> fpr. Use work space on stack to transfer data. + Address stackaddr(Z_SP, workspace_offset); + + __ reg2mem_opt(src.first()->as_Register(), stackaddr, false); + __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr, false); + } + } + } + } else { + if (dst.first()->is_stack()) { + // fpr -> stack + __ freg2mem_opt(src.first()->as_FloatRegister(), + Address(Z_SP, reg2offset(dst.first())), false); + } else { + if (dst.first()->is_Register()) { + if (VM_Version::has_FPSupportEnhancements()) { + // fpr -> gpr. + __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister()); + } else { + // fpr -> gpr. Use work space on stack to transfer data. + Address stackaddr(Z_SP, workspace_offset); + + __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr, false); + __ mem2reg_opt(dst.first()->as_Register(), stackaddr, false); + } + } else { + // fpr -> fpr + __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_FLOAT, + src.first()->as_FloatRegister(), T_FLOAT); + } + } + } +} + +//---------------------------------------------------------------------- +// A double arg. May have to do double reg to long reg conversion +//---------------------------------------------------------------------- +static void double_move(MacroAssembler *masm, + VMRegPair src, + VMRegPair dst, + int framesize_in_slots, + int workspace_slot_offset) { + int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size; + int workspace_offset = workspace_slot_offset*VMRegImpl::stack_slot_size; + + // Since src is always a java calling convention we know that the + // src pair is always either all registers or all stack (and aligned?) + + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack -> stack. The easiest of the bunch. + __ z_mvc(Address(Z_SP, reg2offset(dst.first())), + Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(double)); + } else { + // stack to reg + Address stackaddr(Z_SP, reg2offset(src.first()) + frame_offset); + + if (dst.first()->is_Register()) { + __ mem2reg_opt(dst.first()->as_Register(), stackaddr); + } else { + __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr); + } + } + } else if (src.first()->is_Register()) { + if (dst.first()->is_stack()) { + // gpr -> stack + __ reg2mem_opt(src.first()->as_Register(), + Address(Z_SP, reg2offset(dst.first()))); + } else { + if (dst.first()->is_Register()) { + // gpr -> gpr + __ move_reg_if_needed(dst.first()->as_Register(), T_LONG, + src.first()->as_Register(), T_LONG); + } else { + if (VM_Version::has_FPSupportEnhancements()) { + // gpr -> fpr. Exploit z10 capability of direct transfer. + __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register()); + } else { + // gpr -> fpr. Use work space on stack to transfer data. + Address stackaddr(Z_SP, workspace_offset); + __ reg2mem_opt(src.first()->as_Register(), stackaddr); + __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr); + } + } + } + } else { + if (dst.first()->is_stack()) { + // fpr -> stack + __ freg2mem_opt(src.first()->as_FloatRegister(), + Address(Z_SP, reg2offset(dst.first()))); + } else { + if (dst.first()->is_Register()) { + if (VM_Version::has_FPSupportEnhancements()) { + // fpr -> gpr. Exploit z10 capability of direct transfer. + __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister()); + } else { + // fpr -> gpr. Use work space on stack to transfer data. + Address stackaddr(Z_SP, workspace_offset); + + __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr); + __ mem2reg_opt(dst.first()->as_Register(), stackaddr); + } + } else { + // fpr -> fpr + // In theory these overlap but the ordering is such that this is likely a nop. + __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_DOUBLE, + src.first()->as_FloatRegister(), T_DOUBLE); + } + } + } +} + +//---------------------------------------------------------------------- +// A long arg. +//---------------------------------------------------------------------- +static void long_move(MacroAssembler *masm, + VMRegPair src, + VMRegPair dst, + int framesize_in_slots) { + int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size; + + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack -> stack. The easiest of the bunch. + __ z_mvc(Address(Z_SP, reg2offset(dst.first())), + Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(long)); + } else { + // stack to reg + assert(dst.first()->is_Register(), "long dst value must be in GPR"); + __ mem2reg_opt(dst.first()->as_Register(), + Address(Z_SP, reg2offset(src.first()) + frame_offset)); + } + } else { + // reg to reg + assert(src.first()->is_Register(), "long src value must be in GPR"); + if (dst.first()->is_stack()) { + // reg -> stack + __ reg2mem_opt(src.first()->as_Register(), + Address(Z_SP, reg2offset(dst.first()))); + } else { + // reg -> reg + assert(dst.first()->is_Register(), "long dst value must be in GPR"); + __ move_reg_if_needed(dst.first()->as_Register(), + T_LONG, src.first()->as_Register(), T_LONG); + } + } +} + + +//---------------------------------------------------------------------- +// A int-like arg. +//---------------------------------------------------------------------- +// On z/Architecture we will store integer like items to the stack as 64 bit +// items, according to the z/Architecture ABI, even though Java would only store +// 32 bits for a parameter. +// We do sign extension for all base types. That is ok since the only +// unsigned base type is T_CHAR, and T_CHAR uses only 16 bits of an int. +// Sign extension 32->64 bit will thus not affect the value. +//---------------------------------------------------------------------- +static void move32_64(MacroAssembler *masm, + VMRegPair src, + VMRegPair dst, + int framesize_in_slots) { + int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size; + + if (src.first()->is_stack()) { + Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset); + if (dst.first()->is_stack()) { + // stack -> stack. MVC not posible due to sign extension. + Address firstaddr(Z_SP, reg2offset(dst.first())); + __ mem2reg_signed_opt(Z_R0_scratch, memaddr); + __ reg2mem_opt(Z_R0_scratch, firstaddr); + } else { + // stack -> reg, sign extended + __ mem2reg_signed_opt(dst.first()->as_Register(), memaddr); + } + } else { + if (dst.first()->is_stack()) { + // reg -> stack, sign extended + Address firstaddr(Z_SP, reg2offset(dst.first())); + __ z_lgfr(src.first()->as_Register(), src.first()->as_Register()); + __ reg2mem_opt(src.first()->as_Register(), firstaddr); + } else { + // reg -> reg, sign extended + __ z_lgfr(dst.first()->as_Register(), src.first()->as_Register()); + } + } +} + +static void save_or_restore_arguments(MacroAssembler *masm, + const int stack_slots, + const int total_in_args, + const int arg_save_area, + OopMap *map, + VMRegPair *in_regs, + BasicType *in_sig_bt) { + + // If map is non-NULL then the code should store the values, + // otherwise it should load them. + int slot = arg_save_area; + // Handle double words first. + for (int i = 0; i < total_in_args; i++) { + if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) { + int offset = slot * VMRegImpl::stack_slot_size; + slot += VMRegImpl::slots_per_word; + assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)"); + const FloatRegister freg = in_regs[i].first()->as_FloatRegister(); + Address stackaddr(Z_SP, offset); + if (map != NULL) { + __ freg2mem_opt(freg, stackaddr); + } else { + __ mem2freg_opt(freg, stackaddr); + } + } else if (in_regs[i].first()->is_Register() && + (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) { + int offset = slot * VMRegImpl::stack_slot_size; + const Register reg = in_regs[i].first()->as_Register(); + if (map != NULL) { + __ z_stg(reg, offset, Z_SP); + if (in_sig_bt[i] == T_ARRAY) { + map->set_oop(VMRegImpl::stack2reg(slot)); + } + } else { + __ z_lg(reg, offset, Z_SP); + slot += VMRegImpl::slots_per_word; + assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)"); + } + } + } + + // Save or restore single word registers. + for (int i = 0; i < total_in_args; i++) { + if (in_regs[i].first()->is_FloatRegister()) { + if (in_sig_bt[i] == T_FLOAT) { + int offset = slot * VMRegImpl::stack_slot_size; + slot++; + assert(slot <= stack_slots, "overflow (after FLOAT stack slot)"); + const FloatRegister freg = in_regs[i].first()->as_FloatRegister(); + Address stackaddr(Z_SP, offset); + if (map != NULL) { + __ freg2mem_opt(freg, stackaddr, false); + } else { + __ mem2freg_opt(freg, stackaddr, false); + } + } + } else if (in_regs[i].first()->is_stack() && + in_sig_bt[i] == T_ARRAY && map != NULL) { + int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); + map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots)); + } + } +} + +// Check GCLocker::needs_gc and enter the runtime if it's true. This +// keeps a new JNI critical region from starting until a GC has been +// forced. Save down any oops in registers and describe them in an OopMap. +static void check_needs_gc_for_critical_native(MacroAssembler *masm, + const int stack_slots, + const int total_in_args, + const int arg_save_area, + OopMapSet *oop_maps, + VMRegPair *in_regs, + BasicType *in_sig_bt) { + __ block_comment("check GCLocker::needs_gc"); + Label cont; + + // Check GCLocker::_needs_gc flag. + __ load_const_optimized(Z_R1_scratch, (long) GCLocker::needs_gc_address()); + __ z_cli(0, Z_R1_scratch, 0); + __ z_bre(cont); + + // Save down any values that are live in registers and call into the + // runtime to halt for a GC. + OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); + + save_or_restore_arguments(masm, stack_slots, total_in_args, + arg_save_area, map, in_regs, in_sig_bt); + address the_pc = __ pc(); + __ set_last_Java_frame(Z_SP, noreg); + + __ block_comment("block_for_jni_critical"); + __ z_lgr(Z_ARG1, Z_thread); + + address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical); + __ call_c(entry_point); + oop_maps->add_gc_map(__ offset(), map); + + __ reset_last_Java_frame(); + + // Reload all the register arguments. + save_or_restore_arguments(masm, stack_slots, total_in_args, + arg_save_area, NULL, in_regs, in_sig_bt); + + __ bind(cont); + + if (StressCriticalJNINatives) { + // Stress register saving + OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); + save_or_restore_arguments(masm, stack_slots, total_in_args, + arg_save_area, map, in_regs, in_sig_bt); + + // Destroy argument registers. + for (int i = 0; i < total_in_args; i++) { + if (in_regs[i].first()->is_Register()) { + // Don't set CC. + __ clear_reg(in_regs[i].first()->as_Register(), true, false); + } else { + if (in_regs[i].first()->is_FloatRegister()) { + FloatRegister fr = in_regs[i].first()->as_FloatRegister(); + __ z_lcdbr(fr, fr); + } + } + } + + save_or_restore_arguments(masm, stack_slots, total_in_args, + arg_save_area, NULL, in_regs, in_sig_bt); + } +} + +static void move_ptr(MacroAssembler *masm, + VMRegPair src, + VMRegPair dst, + int framesize_in_slots) { + int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size; + + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + __ mem2reg_opt(Z_R0_scratch, Address(Z_SP, reg2offset(src.first()) + frame_offset)); + __ reg2mem_opt(Z_R0_scratch, Address(Z_SP, reg2offset(dst.first()))); + } else { + // stack to reg + __ mem2reg_opt(dst.first()->as_Register(), + Address(Z_SP, reg2offset(src.first()) + frame_offset)); + } + } else { + if (dst.first()->is_stack()) { + // reg to stack + __ reg2mem_opt(src.first()->as_Register(), Address(Z_SP, reg2offset(dst.first()))); + } else { + __ lgr_if_needed(dst.first()->as_Register(), src.first()->as_Register()); + } + } +} + +// Unpack an array argument into a pointer to the body and the length +// if the array is non-null, otherwise pass 0 for both. +static void unpack_array_argument(MacroAssembler *masm, + VMRegPair reg, + BasicType in_elem_type, + VMRegPair body_arg, + VMRegPair length_arg, + int framesize_in_slots) { + Register tmp_reg = Z_tmp_2; + Register tmp2_reg = Z_tmp_1; + + assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg, + "possible collision"); + assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg, + "possible collision"); + + // Pass the length, ptr pair. + NearLabel set_out_args; + VMRegPair tmp, tmp2; + + tmp.set_ptr(tmp_reg->as_VMReg()); + tmp2.set_ptr(tmp2_reg->as_VMReg()); + if (reg.first()->is_stack()) { + // Load the arg up from the stack. + move_ptr(masm, reg, tmp, framesize_in_slots); + reg = tmp; + } + + const Register first = reg.first()->as_Register(); + + // Don't set CC, indicate unused result. + (void) __ clear_reg(tmp2_reg, true, false); + if (tmp_reg != first) { + __ clear_reg(tmp_reg, true, false); // Don't set CC. + } + __ compare64_and_branch(first, (RegisterOrConstant)0L, Assembler::bcondEqual, set_out_args); + __ z_lgf(tmp2_reg, Address(first, arrayOopDesc::length_offset_in_bytes())); + __ add2reg(tmp_reg, arrayOopDesc::base_offset_in_bytes(in_elem_type), first); + + __ bind(set_out_args); + move_ptr(masm, tmp, body_arg, framesize_in_slots); + move32_64(masm, tmp2, length_arg, framesize_in_slots); +} + +//---------------------------------------------------------------------- +// Wrap a JNI call. +//---------------------------------------------------------------------- +#undef USE_RESIZE_FRAME +nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, + const methodHandle& method, + int compile_id, + BasicType *in_sig_bt, + VMRegPair *in_regs, + BasicType ret_type) { +#ifdef COMPILER2 + int total_in_args = method->size_of_parameters(); + if (method->is_method_handle_intrinsic()) { + vmIntrinsics::ID iid = method->intrinsic_id(); + intptr_t start = (intptr_t) __ pc(); + int vep_offset = ((intptr_t) __ pc()) - start; + + gen_special_dispatch(masm, total_in_args, + method->intrinsic_id(), in_sig_bt, in_regs); + + int frame_complete = ((intptr_t)__ pc()) - start; // Not complete, period. + + __ flush(); + + int stack_slots = SharedRuntime::out_preserve_stack_slots(); // No out slots at all, actually. + + return nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_offset, + frame_complete, + stack_slots / VMRegImpl::slots_per_word, + in_ByteSize(-1), + in_ByteSize(-1), + (OopMapSet *) NULL); + } + + + /////////////////////////////////////////////////////////////////////// + // + // Precalculations before generating any code + // + /////////////////////////////////////////////////////////////////////// + + bool is_critical_native = true; + address native_func = method->critical_native_function(); + if (native_func == NULL) { + native_func = method->native_function(); + is_critical_native = false; + } + assert(native_func != NULL, "must have function"); + + //--------------------------------------------------------------------- + // We have received a description of where all the java args are located + // on entry to the wrapper. We need to convert these args to where + // the jni function will expect them. To figure out where they go + // we convert the java signature to a C signature by inserting + // the hidden arguments as arg[0] and possibly arg[1] (static method). + // + // The first hidden argument arg[0] is a pointer to the JNI environment. + // It is generated for every call. + // The second argument arg[1] to the JNI call, which is hidden for static + // methods, is the boxed lock object. For static calls, the lock object + // is the static method itself. The oop is constructed here. for instance + // calls, the lock is performed on the object itself, the pointer of + // which is passed as the first visible argument. + //--------------------------------------------------------------------- + + // Additionally, on z/Architecture we must convert integers + // to longs in the C signature. We do this in advance in order to have + // no trouble with indexes into the bt-arrays. + // So convert the signature and registers now, and adjust the total number + // of in-arguments accordingly. + bool method_is_static = method->is_static(); + int total_c_args = total_in_args; + + if (!is_critical_native) { + int n_hidden_args = method_is_static ? 2 : 1; + total_c_args += n_hidden_args; + } else { + // No JNIEnv*, no this*, but unpacked arrays (base+length). + for (int i = 0; i < total_in_args; i++) { + if (in_sig_bt[i] == T_ARRAY) { + total_c_args ++; + } + } + } + + BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); + VMRegPair *out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); + BasicType* in_elem_bt = NULL; + + // Create the signature for the C call: + // 1) add the JNIEnv* + // 2) add the class if the method is static + // 3) copy the rest of the incoming signature (shifted by the number of + // hidden arguments) + + int argc = 0; + if (!is_critical_native) { + out_sig_bt[argc++] = T_ADDRESS; + if (method->is_static()) { + out_sig_bt[argc++] = T_OBJECT; + } + + for (int i = 0; i < total_in_args; i++) { + out_sig_bt[argc++] = in_sig_bt[i]; + } + } else { + Thread* THREAD = Thread::current(); + in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); + SignatureStream ss(method->signature()); + int o = 0; + for (int i = 0; i < total_in_args; i++, o++) { + if (in_sig_bt[i] == T_ARRAY) { + // Arrays are passed as tuples (int, elem*). + Symbol* atype = ss.as_symbol(CHECK_NULL); + const char* at = atype->as_C_string(); + if (strlen(at) == 2) { + assert(at[0] == '[', "must be"); + switch (at[1]) { + case 'B': in_elem_bt[o] = T_BYTE; break; + case 'C': in_elem_bt[o] = T_CHAR; break; + case 'D': in_elem_bt[o] = T_DOUBLE; break; + case 'F': in_elem_bt[o] = T_FLOAT; break; + case 'I': in_elem_bt[o] = T_INT; break; + case 'J': in_elem_bt[o] = T_LONG; break; + case 'S': in_elem_bt[o] = T_SHORT; break; + case 'Z': in_elem_bt[o] = T_BOOLEAN; break; + default: ShouldNotReachHere(); + } + } + } else { + in_elem_bt[o] = T_VOID; + } + if (in_sig_bt[i] != T_VOID) { + assert(in_sig_bt[i] == ss.type(), "must match"); + ss.next(); + } + } + assert(total_in_args == o, "must match"); + + for (int i = 0; i < total_in_args; i++) { + if (in_sig_bt[i] == T_ARRAY) { + // Arrays are passed as tuples (int, elem*). + out_sig_bt[argc++] = T_INT; + out_sig_bt[argc++] = T_ADDRESS; + } else { + out_sig_bt[argc++] = in_sig_bt[i]; + } + } + } + + /////////////////////////////////////////////////////////////////////// + // Now figure out where the args must be stored and how much stack space + // they require (neglecting out_preserve_stack_slots but providing space + // for storing the first five register arguments). + // It's weird, see int_stk_helper. + /////////////////////////////////////////////////////////////////////// + + //--------------------------------------------------------------------- + // Compute framesize for the wrapper. + // + // - We need to handlize all oops passed in registers. + // - We must create space for them here that is disjoint from the save area. + // - We always just allocate 5 words for storing down these object. + // This allows us to simply record the base and use the Ireg number to + // decide which slot to use. + // - Note that the reg number used to index the stack slot is the inbound + // number, not the outbound number. + // - We must shuffle args to match the native convention, + // and to include var-args space. + //--------------------------------------------------------------------- + + //--------------------------------------------------------------------- + // Calculate the total number of stack slots we will need: + // - 1) abi requirements + // - 2) outgoing args + // - 3) space for inbound oop handle area + // - 4) space for handlizing a klass if static method + // - 5) space for a lock if synchronized method + // - 6) workspace (save rtn value, int<->float reg moves, ...) + // - 7) filler slots for alignment + //--------------------------------------------------------------------- + // Here is how the space we have allocated will look like. + // Since we use resize_frame, we do not create a new stack frame, + // but just extend the one we got with our own data area. + // + // If an offset or pointer name points to a separator line, it is + // assumed that addressing with offset 0 selects storage starting + // at the first byte above the separator line. + // + // + // ... ... + // | caller's frame | + // FP-> |---------------------| + // | filler slots, if any| + // 7| #slots == mult of 2 | + // |---------------------| + // | work space | + // 6| 2 slots = 8 bytes | + // |---------------------| + // 5| lock box (if sync) | + // |---------------------| <- lock_slot_offset + // 4| klass (if static) | + // |---------------------| <- klass_slot_offset + // 3| oopHandle area | + // | (save area for | + // | critical natives) | + // | | + // | | + // |---------------------| <- oop_handle_offset + // 2| outbound memory | + // ... ... + // | based arguments | + // |---------------------| + // | vararg | + // ... ... + // | area | + // |---------------------| <- out_arg_slot_offset + // 1| out_preserved_slots | + // ... ... + // | (z_abi spec) | + // SP-> |---------------------| <- FP_slot_offset (back chain) + // ... ... + // + //--------------------------------------------------------------------- + + // *_slot_offset indicates offset from SP in #stack slots + // *_offset indicates offset from SP in #bytes + + int stack_slots = c_calling_convention(out_sig_bt, out_regs, /*regs2=*/NULL, total_c_args) + // 1+2 + SharedRuntime::out_preserve_stack_slots(); // see c_calling_convention + + // Now the space for the inbound oop handle area. + int total_save_slots = RegisterImpl::number_of_arg_registers * VMRegImpl::slots_per_word; + if (is_critical_native) { + // Critical natives may have to call out so they need a save area + // for register arguments. + int double_slots = 0; + int single_slots = 0; + for (int i = 0; i < total_in_args; i++) { + if (in_regs[i].first()->is_Register()) { + const Register reg = in_regs[i].first()->as_Register(); + switch (in_sig_bt[i]) { + case T_BOOLEAN: + case T_BYTE: + case T_SHORT: + case T_CHAR: + case T_INT: + // Fall through. + case T_ARRAY: + case T_LONG: double_slots++; break; + default: ShouldNotReachHere(); + } + } else { + if (in_regs[i].first()->is_FloatRegister()) { + switch (in_sig_bt[i]) { + case T_FLOAT: single_slots++; break; + case T_DOUBLE: double_slots++; break; + default: ShouldNotReachHere(); + } + } + } + } // for + total_save_slots = double_slots * 2 + round_to(single_slots, 2); // Round to even. + } + + int oop_handle_slot_offset = stack_slots; + stack_slots += total_save_slots; // 3) + + int klass_slot_offset = 0; + int klass_offset = -1; + if (method_is_static && !is_critical_native) { // 4) + klass_slot_offset = stack_slots; + klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; + stack_slots += VMRegImpl::slots_per_word; + } + + int lock_slot_offset = 0; + int lock_offset = -1; + if (method->is_synchronized()) { // 5) + lock_slot_offset = stack_slots; + lock_offset = lock_slot_offset * VMRegImpl::stack_slot_size; + stack_slots += VMRegImpl::slots_per_word; + } + + int workspace_slot_offset= stack_slots; // 6) + stack_slots += 2; + + // Now compute actual number of stack words we need. + // Round to align stack properly. + stack_slots = round_to(stack_slots, // 7) + frame::alignment_in_bytes / VMRegImpl::stack_slot_size); + int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size; + + + /////////////////////////////////////////////////////////////////////// + // Now we can start generating code + /////////////////////////////////////////////////////////////////////// + + unsigned int wrapper_CodeStart = __ offset(); + unsigned int wrapper_UEPStart; + unsigned int wrapper_VEPStart; + unsigned int wrapper_FrameDone; + unsigned int wrapper_CRegsSet; + Label handle_pending_exception; + Label ic_miss; + + //--------------------------------------------------------------------- + // Unverified entry point (UEP) + //--------------------------------------------------------------------- + wrapper_UEPStart = __ offset(); + + // check ic: object class <-> cached class + if (!method_is_static) __ nmethod_UEP(ic_miss); + // Fill with nops (alignment of verified entry point). + __ align(CodeEntryAlignment); + + //--------------------------------------------------------------------- + // Verified entry point (VEP) + //--------------------------------------------------------------------- + wrapper_VEPStart = __ offset(); + + __ save_return_pc(); + __ generate_stack_overflow_check(frame_size_in_bytes); // Check before creating frame. +#ifndef USE_RESIZE_FRAME + __ push_frame(frame_size_in_bytes); // Create a new frame for the wrapper. +#else + __ resize_frame(-frame_size_in_bytes, Z_R0_scratch); // No new frame for the wrapper. + // Just resize the existing one. +#endif + + wrapper_FrameDone = __ offset(); + + __ verify_thread(); + + // Native nmethod wrappers never take possession of the oop arguments. + // So the caller will gc the arguments. + // The only thing we need an oopMap for is if the call is static. + // + // An OopMap for lock (and class if static), and one for the VM call itself + OopMapSet *oop_maps = new OopMapSet(); + OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); + + if (is_critical_native) { + check_needs_gc_for_critical_native(masm, stack_slots, total_in_args, + oop_handle_slot_offset, oop_maps, in_regs, in_sig_bt); + } + + + ////////////////////////////////////////////////////////////////////// + // + // The Grand Shuffle + // + ////////////////////////////////////////////////////////////////////// + // + // We immediately shuffle the arguments so that for any vm call we have + // to make from here on out (sync slow path, jvmti, etc.) we will have + // captured the oops from our caller and have a valid oopMap for them. + // + //-------------------------------------------------------------------- + // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* + // (derived from JavaThread* which is in Z_thread) and, if static, + // the class mirror instead of a receiver. This pretty much guarantees that + // register layout will not match. We ignore these extra arguments during + // the shuffle. The shuffle is described by the two calling convention + // vectors we have in our possession. We simply walk the java vector to + // get the source locations and the c vector to get the destinations. + // + // This is a trick. We double the stack slots so we can claim + // the oops in the caller's frame. Since we are sure to have + // more args than the caller doubling is enough to make + // sure we can capture all the incoming oop args from the caller. + //-------------------------------------------------------------------- + + // Record sp-based slot for receiver on stack for non-static methods. + int receiver_offset = -1; + + //-------------------------------------------------------------------- + // We move the arguments backwards because the floating point registers + // destination will always be to a register with a greater or equal + // register number or the stack. + // jix is the index of the incoming Java arguments. + // cix is the index of the outgoing C arguments. + //-------------------------------------------------------------------- + +#ifdef ASSERT + bool reg_destroyed[RegisterImpl::number_of_registers]; + bool freg_destroyed[FloatRegisterImpl::number_of_registers]; + for (int r = 0; r < RegisterImpl::number_of_registers; r++) { + reg_destroyed[r] = false; + } + for (int f = 0; f < FloatRegisterImpl::number_of_registers; f++) { + freg_destroyed[f] = false; + } +#endif // ASSERT + + for (int jix = total_in_args - 1, cix = total_c_args - 1; jix >= 0; jix--, cix--) { +#ifdef ASSERT + if (in_regs[jix].first()->is_Register()) { + assert(!reg_destroyed[in_regs[jix].first()->as_Register()->encoding()], "ack!"); + } else { + if (in_regs[jix].first()->is_FloatRegister()) { + assert(!freg_destroyed[in_regs[jix].first()->as_FloatRegister()->encoding()], "ack!"); + } + } + if (out_regs[cix].first()->is_Register()) { + reg_destroyed[out_regs[cix].first()->as_Register()->encoding()] = true; + } else { + if (out_regs[cix].first()->is_FloatRegister()) { + freg_destroyed[out_regs[cix].first()->as_FloatRegister()->encoding()] = true; + } + } +#endif // ASSERT + + switch (in_sig_bt[jix]) { + // Due to casting, small integers should only occur in pairs with type T_LONG. + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + // Move int and do sign extension. + move32_64(masm, in_regs[jix], out_regs[cix], stack_slots); + break; + + case T_LONG : + long_move(masm, in_regs[jix], out_regs[cix], stack_slots); + break; + + case T_ARRAY: + if (is_critical_native) { + int body_arg = cix; + cix -= 2; // Point to length arg. + unpack_array_argument(masm, in_regs[jix], in_elem_bt[jix], out_regs[body_arg], out_regs[cix], stack_slots); + break; + } + // else fallthrough + case T_OBJECT: + assert(!is_critical_native, "no oop arguments"); + object_move(masm, map, oop_handle_slot_offset, stack_slots, in_regs[jix], out_regs[cix], + ((jix == 0) && (!method_is_static)), + &receiver_offset); + break; + case T_VOID: + break; + + case T_FLOAT: + float_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset); + break; + + case T_DOUBLE: + assert(jix+1 < total_in_args && in_sig_bt[jix+1] == T_VOID && out_sig_bt[cix+1] == T_VOID, "bad arg list"); + double_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset); + break; + + case T_ADDRESS: + assert(false, "found T_ADDRESS in java args"); + break; + + default: + ShouldNotReachHere(); + } + } + + //-------------------------------------------------------------------- + // Pre-load a static method's oop into ARG2. + // Used both by locking code and the normal JNI call code. + //-------------------------------------------------------------------- + if (method_is_static && !is_critical_native) { + __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), Z_ARG2); + + // Now handlize the static class mirror in ARG2. It's known not-null. + __ z_stg(Z_ARG2, klass_offset, Z_SP); + map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); + __ add2reg(Z_ARG2, klass_offset, Z_SP); + } + + // Get JNIEnv* which is first argument to native. + if (!is_critical_native) { + __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread); + } + + ////////////////////////////////////////////////////////////////////// + // We have all of the arguments setup at this point. + // We MUST NOT touch any outgoing regs from this point on. + // So if we must call out we must push a new frame. + ////////////////////////////////////////////////////////////////////// + + + // Calc the current pc into Z_R10 and into wrapper_CRegsSet. + // Both values represent the same position. + __ get_PC(Z_R10); // PC into register + wrapper_CRegsSet = __ offset(); // and into into variable. + + // Z_R10 now has the pc loaded that we will use when we finally call to native. + + // We use the same pc/oopMap repeatedly when we call out. + oop_maps->add_gc_map((int)(wrapper_CRegsSet-wrapper_CodeStart), map); + + // Lock a synchronized method. + + if (method->is_synchronized()) { + assert(!is_critical_native, "unhandled"); + + // ATTENTION: args and Z_R10 must be preserved. + Register r_oop = Z_R11; + Register r_box = Z_R12; + Register r_tmp1 = Z_R13; + Register r_tmp2 = Z_R7; + Label done; + + // Load the oop for the object or class. R_carg2_classorobject contains + // either the handlized oop from the incoming arguments or the handlized + // class mirror (if the method is static). + __ z_lg(r_oop, 0, Z_ARG2); + + lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size); + // Get the lock box slot's address. + __ add2reg(r_box, lock_offset, Z_SP); + +#ifdef ASSERT + if (UseBiasedLocking) + // Making the box point to itself will make it clear it went unused + // but also be obviously invalid. + __ z_stg(r_box, 0, r_box); +#endif // ASSERT + + // Try fastpath for locking. + // Fast_lock kills r_temp_1, r_temp_2. (Don't use R1 as temp, won't work!) + __ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2); + __ z_bre(done); + + //------------------------------------------------------------------------- + // None of the above fast optimizations worked so we have to get into the + // slow case of monitor enter. Inline a special case of call_VM that + // disallows any pending_exception. + //------------------------------------------------------------------------- + + Register oldSP = Z_R11; + + __ z_lgr(oldSP, Z_SP); + + RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers); + + // Prepare arguments for call. + __ z_lg(Z_ARG1, 0, Z_ARG2); // Ynboxed class mirror or unboxed object. + __ add2reg(Z_ARG2, lock_offset, oldSP); + __ z_lgr(Z_ARG3, Z_thread); + + __ set_last_Java_frame(oldSP, Z_R10 /* gc map pc */); + + // Do the call. + __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C)); + __ call(Z_R1_scratch); + + __ reset_last_Java_frame(); + + RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers); +#ifdef ASSERT + { Label L; + __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset())); + __ z_bre(L); + __ stop("no pending exception allowed on exit from IR::monitorenter"); + __ bind(L); + } +#endif + __ bind(done); + } // lock for synchronized methods + + + ////////////////////////////////////////////////////////////////////// + // Finally just about ready to make the JNI call. + ////////////////////////////////////////////////////////////////////// + + // Use that pc we placed in Z_R10 a while back as the current frame anchor. + __ set_last_Java_frame(Z_SP, Z_R10); + + // Transition from _thread_in_Java to _thread_in_native. + __ set_thread_state(_thread_in_native); + + + ////////////////////////////////////////////////////////////////////// + // This is the JNI call. + ////////////////////////////////////////////////////////////////////// + + __ call_c(native_func); + + + ////////////////////////////////////////////////////////////////////// + // We have survived the call once we reach here. + ////////////////////////////////////////////////////////////////////// + + + //-------------------------------------------------------------------- + // Unpack native results. + //-------------------------------------------------------------------- + // For int-types, we do any needed sign-extension required. + // Care must be taken that the return value (in Z_ARG1 = Z_RET = Z_R2 + // or in Z_FARG0 = Z_FRET = Z_F0) will survive any VM calls for + // blocking or unlocking. + // An OOP result (handle) is done specially in the slow-path code. + //-------------------------------------------------------------------- + switch (ret_type) { //GLGLGL + case T_VOID: break; // Nothing to do! + case T_FLOAT: break; // Got it where we want it (unless slow-path) + case T_DOUBLE: break; // Got it where we want it (unless slow-path) + case T_LONG: break; // Got it where we want it (unless slow-path) + case T_OBJECT: break; // Really a handle. + // Cannot de-handlize until after reclaiming jvm_lock. + case T_ARRAY: break; + + case T_BOOLEAN: // 0 -> false(0); !0 -> true(1) + __ z_lngfr(Z_RET, Z_RET); // Force sign bit on except for zero. + __ z_srlg(Z_RET, Z_RET, 63); // Shift sign bit into least significant pos. + break; + case T_BYTE: __ z_lgbr(Z_RET, Z_RET); break; // sign extension + case T_CHAR: __ z_llghr(Z_RET, Z_RET); break; // unsigned result + case T_SHORT: __ z_lghr(Z_RET, Z_RET); break; // sign extension + case T_INT: __ z_lgfr(Z_RET, Z_RET); break; // sign-extend for beauty. + + default: + ShouldNotReachHere(); + break; + } + + + // Switch thread to "native transition" state before reading the synchronization state. + // This additional state is necessary because reading and testing the synchronization + // state is not atomic w.r.t. GC, as this scenario demonstrates: + // - Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. + // - VM thread changes sync state to synchronizing and suspends threads for GC. + // - Thread A is resumed to finish this native method, but doesn't block here since it + // didn't see any synchronization in progress, and escapes. + + // Transition from _thread_in_native to _thread_in_native_trans. + __ set_thread_state(_thread_in_native_trans); + + // Safepoint synchronization + //-------------------------------------------------------------------- + // Must we block? + //-------------------------------------------------------------------- + // Block, if necessary, before resuming in _thread_in_Java state. + // In order for GC to work, don't clear the last_Java_sp until after blocking. + //-------------------------------------------------------------------- + Label after_transition; + { + Label no_block, sync; + + save_native_result(masm, ret_type, workspace_slot_offset); // Make Z_R2 available as work reg. + + if (os::is_MP()) { + if (UseMembar) { + // Force this write out before the read below. + __ z_fence(); + } else { + // Write serialization page so VM thread can do a pseudo remote membar. + // We use the current thread pointer to calculate a thread specific + // offset to write to within the page. This minimizes bus traffic + // due to cache line collision. + __ serialize_memory(Z_thread, Z_R1, Z_R2); + } + } + __ generate_safepoint_check(sync, Z_R1, true); + + __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset())); + __ z_bre(no_block); + + // Block. Save any potential method result value before the operation and + // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this + // lets us share the oopMap we used when we went native rather than create + // a distinct one for this pc. + // + __ bind(sync); + __ z_acquire(); + + address entry_point = is_critical_native ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition) + : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans); + + __ call_VM_leaf(entry_point, Z_thread); + + if (is_critical_native) { + restore_native_result(masm, ret_type, workspace_slot_offset); + __ z_bru(after_transition); // No thread state transition here. + } + __ bind(no_block); + restore_native_result(masm, ret_type, workspace_slot_offset); + } + + //-------------------------------------------------------------------- + // Thread state is thread_in_native_trans. Any safepoint blocking has + // already happened so we can now change state to _thread_in_Java. + //-------------------------------------------------------------------- + // Transition from _thread_in_native_trans to _thread_in_Java. + __ set_thread_state(_thread_in_Java); + __ bind(after_transition); + + + //-------------------------------------------------------------------- + // Reguard any pages if necessary. + // Protect native result from being destroyed. + //-------------------------------------------------------------------- + + Label no_reguard; + + __ z_cli(Address(Z_thread, JavaThread::stack_guard_state_offset() + in_ByteSize(sizeof(JavaThread::StackGuardState) - 1)), + JavaThread::stack_guard_yellow_reserved_disabled); + + __ z_bre(no_reguard); + + save_native_result(masm, ret_type, workspace_slot_offset); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), Z_method); + restore_native_result(masm, ret_type, workspace_slot_offset); + + __ bind(no_reguard); + + + // Synchronized methods (slow path only) + // No pending exceptions for now. + //-------------------------------------------------------------------- + // Handle possibly pending exception (will unlock if necessary). + // Native result is, if any is live, in Z_FRES or Z_RES. + //-------------------------------------------------------------------- + // Unlock + //-------------------------------------------------------------------- + if (method->is_synchronized()) { + const Register r_oop = Z_R11; + const Register r_box = Z_R12; + const Register r_tmp1 = Z_R13; + const Register r_tmp2 = Z_R7; + Label done; + + // Get unboxed oop of class mirror or object ... + int offset = method_is_static ? klass_offset : receiver_offset; + + assert(offset != -1, ""); + __ z_lg(r_oop, offset, Z_SP); + + // ... and address of lock object box. + __ add2reg(r_box, lock_offset, Z_SP); + + // Try fastpath for unlocking. + __ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2); // Don't use R1 as temp. + __ z_bre(done); + + // Slow path for unlocking. + // Save and restore any potential method result value around the unlocking operation. + const Register R_exc = Z_R11; + + save_native_result(masm, ret_type, workspace_slot_offset); + + // Must save pending exception around the slow-path VM call. Since it's a + // leaf call, the pending exception (if any) can be kept in a register. + __ z_lg(R_exc, Address(Z_thread, Thread::pending_exception_offset())); + assert(R_exc->is_nonvolatile(), "exception register must be non-volatile"); + + // Must clear pending-exception before re-entering the VM. Since this is + // a leaf call, pending-exception-oop can be safely kept in a register. + __ clear_mem(Address(Z_thread, Thread::pending_exception_offset()), sizeof(intptr_t)); + + // Inline a special case of call_VM that disallows any pending_exception. + + // Get locked oop from the handle we passed to jni. + __ z_lg(Z_ARG1, offset, Z_SP); + __ add2reg(Z_ARG2, lock_offset, Z_SP); + __ z_lgr(Z_ARG3, Z_thread); + + __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)); + + __ call(Z_R1_scratch); + +#ifdef ASSERT + { + Label L; + __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset())); + __ z_bre(L); + __ stop("no pending exception allowed on exit from IR::monitorexit"); + __ bind(L); + } +#endif + + // Check_forward_pending_exception jump to forward_exception if any pending + // exception is set. The forward_exception routine expects to see the + // exception in pending_exception and not in a register. Kind of clumsy, + // since all folks who branch to forward_exception must have tested + // pending_exception first and hence have it in a register already. + __ z_stg(R_exc, Address(Z_thread, Thread::pending_exception_offset())); + restore_native_result(masm, ret_type, workspace_slot_offset); + __ z_bru(done); + __ z_illtrap(0x66); + + __ bind(done); + } + + + //-------------------------------------------------------------------- + // Clear "last Java frame" SP and PC. + //-------------------------------------------------------------------- + __ verify_thread(); // Z_thread must be correct. + + __ reset_last_Java_frame(); + + // Unpack oop result + if (ret_type == T_OBJECT || ret_type == T_ARRAY) { + NearLabel L; + __ compare64_and_branch(Z_RET, (RegisterOrConstant)0L, Assembler::bcondEqual, L); + __ z_lg(Z_RET, 0, Z_RET); + __ bind(L); + __ verify_oop(Z_RET); + } + + if (CheckJNICalls) { + // clear_pending_jni_exception_check + __ clear_mem(Address(Z_thread, JavaThread::pending_jni_exception_check_fn_offset()), sizeof(oop)); + } + + // Reset handle block. + if (!is_critical_native) { + __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::active_handles_offset())); + __ clear_mem(Address(Z_R1_scratch, JNIHandleBlock::top_offset_in_bytes()), 4); + + // Check for pending exceptions. + __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset())); + __ z_brne(handle_pending_exception); + } + + + ////////////////////////////////////////////////////////////////////// + // Return + ////////////////////////////////////////////////////////////////////// + + +#ifndef USE_RESIZE_FRAME + __ pop_frame(); // Pop wrapper frame. +#else + __ resize_frame(frame_size_in_bytes, Z_R0_scratch); // Revert stack extension. +#endif + __ restore_return_pc(); // This is the way back to the caller. + __ z_br(Z_R14); + + + ////////////////////////////////////////////////////////////////////// + // Out-of-line calls to the runtime. + ////////////////////////////////////////////////////////////////////// + + + if (!is_critical_native) { + + //--------------------------------------------------------------------- + // Handler for pending exceptions (out-of-line). + //--------------------------------------------------------------------- + // Since this is a native call, we know the proper exception handler + // is the empty function. We just pop this frame and then jump to + // forward_exception_entry. Z_R14 will contain the native caller's + // return PC. + __ bind(handle_pending_exception); + __ pop_frame(); + __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry()); + __ restore_return_pc(); + __ z_br(Z_R1_scratch); + + //--------------------------------------------------------------------- + // Handler for a cache miss (out-of-line) + //--------------------------------------------------------------------- + __ call_ic_miss_handler(ic_miss, 0x77, 0, Z_R1_scratch); + } + __ flush(); + + + ////////////////////////////////////////////////////////////////////// + // end of code generation + ////////////////////////////////////////////////////////////////////// + + + nmethod *nm = nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + (int)(wrapper_VEPStart-wrapper_CodeStart), + (int)(wrapper_FrameDone-wrapper_CodeStart), + stack_slots / VMRegImpl::slots_per_word, + (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), + in_ByteSize(lock_offset), + oop_maps); + + if (is_critical_native) { + nm->set_lazy_critical_native(true); + } + + return nm; +#else + ShouldNotReachHere(); + return NULL; +#endif // COMPILER2 +} + +static address gen_c2i_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + Label &skip_fixup) { + // Before we get into the guts of the C2I adapter, see if we should be here + // at all. We've come from compiled code and are attempting to jump to the + // interpreter, which means the caller made a static call to get here + // (vcalls always get a compiled target if there is one). Check for a + // compiled target. If there is one, we need to patch the caller's call. + + // These two defs MUST MATCH code in gen_i2c2i_adapter! + const Register ientry = Z_R11; + const Register code = Z_R11; + + address c2i_entrypoint; + Label patch_callsite; + + // Regular (verified) c2i entry point. + c2i_entrypoint = __ pc(); + + // Call patching needed? + __ load_and_test_long(Z_R0_scratch, method_(code)); + __ z_lg(ientry, method_(interpreter_entry)); // Preload interpreter entry (also if patching). + __ z_brne(patch_callsite); // Patch required if code != NULL (compiled target exists). + + __ bind(skip_fixup); // Return point from patch_callsite. + + // Since all args are passed on the stack, total_args_passed*wordSize is the + // space we need. We need ABI scratch area but we use the caller's since + // it has already been allocated. + + const int abi_scratch = frame::z_top_ijava_frame_abi_size; + int extraspace = round_to(total_args_passed, 2)*wordSize + abi_scratch; + Register sender_SP = Z_R10; + Register value = Z_R12; + + // Remember the senderSP so we can pop the interpreter arguments off of the stack. + // In addition, frame manager expects initial_caller_sp in Z_R10. + __ z_lgr(sender_SP, Z_SP); + + // This should always fit in 14 bit immediate. + __ resize_frame(-extraspace, Z_R0_scratch); + + // We use the caller's ABI scratch area (out_preserved_stack_slots) for the initial + // args. This essentially moves the callers ABI scratch area from the top to the + // bottom of the arg area. + + int st_off = extraspace - wordSize; + + // Now write the args into the outgoing interpreter space. + for (int i = 0; i < total_args_passed; i++) { + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_stack()) { + // The calling convention produces OptoRegs that ignore the preserve area (abi scratch). + // We must account for it here. + int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; + + if (!r_2->is_valid()) { + __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*)); + } else { + // longs are given 2 64-bit slots in the interpreter, + // but the data is passed in only 1 slot. + if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { +#ifdef ASSERT + __ clear_mem(Address(Z_SP, st_off), sizeof(void *)); +#endif + st_off -= wordSize; + } + __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*)); + } + } else { + if (r_1->is_Register()) { + if (!r_2->is_valid()) { + __ z_st(r_1->as_Register(), st_off, Z_SP); + } else { + // longs are given 2 64-bit slots in the interpreter, but the + // data is passed in only 1 slot. + if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { +#ifdef ASSERT + __ clear_mem(Address(Z_SP, st_off), sizeof(void *)); +#endif + st_off -= wordSize; + } + __ z_stg(r_1->as_Register(), st_off, Z_SP); + } + } else { + assert(r_1->is_FloatRegister(), ""); + if (!r_2->is_valid()) { + __ z_ste(r_1->as_FloatRegister(), st_off, Z_SP); + } else { + // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the + // data is passed in only 1 slot. + // One of these should get known junk... +#ifdef ASSERT + __ z_lzdr(Z_F1); + __ z_std(Z_F1, st_off, Z_SP); +#endif + st_off-=wordSize; + __ z_std(r_1->as_FloatRegister(), st_off, Z_SP); + } + } + } + st_off -= wordSize; + } + + + // Jump to the interpreter just as if interpreter was doing it. + __ add2reg(Z_esp, st_off, Z_SP); + + // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in Z_R10. + __ z_br(ientry); + + + // Prevent illegal entry to out-of-line code. + __ z_illtrap(0x22); + + // Generate out-of-line runtime call to patch caller, + // then continue as interpreted. + + // IF you lose the race you go interpreted. + // We don't see any possible endless c2i -> i2c -> c2i ... + // transitions no matter how rare. + __ bind(patch_callsite); + + RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), Z_method, Z_R14); + RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers); + __ z_bru(skip_fixup); + + // end of out-of-line code + + return c2i_entrypoint; +} + +// On entry, the following registers are set +// +// Z_thread r8 - JavaThread* +// Z_method r9 - callee's method (method to be invoked) +// Z_esp r7 - operand (or expression) stack pointer of caller. one slot above last arg. +// Z_SP r15 - SP prepared by call stub such that caller's outgoing args are near top +// +void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs) { + const Register value = Z_R12; + const Register ld_ptr= Z_esp; + + int ld_offset = total_args_passed * wordSize; + + // Cut-out for having no stack args. + if (comp_args_on_stack) { + // Sig words on the stack are greater than VMRegImpl::stack0. Those in + // registers are below. By subtracting stack0, we either get a negative + // number (all values in registers) or the maximum stack slot accessed. + // Convert VMRegImpl (4 byte) stack slots to words. + int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; + // Round up to miminum stack alignment, in wordSize + comp_words_on_stack = round_to(comp_words_on_stack, 2); + + __ resize_frame(-comp_words_on_stack*wordSize, Z_R0_scratch); + } + + // Now generate the shuffle code. Pick up all register args and move the + // rest through register value=Z_R12. + for (int i = 0; i < total_args_passed; i++) { + if (sig_bt[i] == T_VOID) { + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + continue; + } + + // Pick up 0, 1 or 2 words from ld_ptr. + assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), + "scrambled load targets?"); + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_FloatRegister()) { + if (!r_2->is_valid()) { + __ z_le(r_1->as_FloatRegister(), ld_offset, ld_ptr); + ld_offset-=wordSize; + } else { + // Skip the unused interpreter slot. + __ z_ld(r_1->as_FloatRegister(), ld_offset - wordSize, ld_ptr); + ld_offset -= 2 * wordSize; + } + } else { + if (r_1->is_stack()) { + // Must do a memory to memory move. + int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; + + if (!r_2->is_valid()) { + __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*)); + } else { + // In 64bit, longs are given 2 64-bit slots in the interpreter, but the + // data is passed in only 1 slot. + if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { + ld_offset -= wordSize; + } + __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*)); + } + } else { + if (!r_2->is_valid()) { + // Not sure we need to do this but it shouldn't hurt. + if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ADDRESS || sig_bt[i] == T_ARRAY) { + __ z_lg(r_1->as_Register(), ld_offset, ld_ptr); + } else { + __ z_l(r_1->as_Register(), ld_offset, ld_ptr); + } + } else { + // In 64bit, longs are given 2 64-bit slots in the interpreter, but the + // data is passed in only 1 slot. + if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { + ld_offset -= wordSize; + } + __ z_lg(r_1->as_Register(), ld_offset, ld_ptr); + } + } + ld_offset -= wordSize; + } + } + + // Jump to the compiled code just as if compiled code was doing it. + // load target address from method oop: + __ z_lg(Z_R1_scratch, Address(Z_method, Method::from_compiled_offset())); + + // Store method oop into thread->callee_target. + // 6243940: We might end up in handle_wrong_method if + // the callee is deoptimized as we race thru here. If that + // happens we don't want to take a safepoint because the + // caller frame will look interpreted and arguments are now + // "compiled" so it is much better to make this transition + // invisible to the stack walking code. Unfortunately, if + // we try and find the callee by normal means a safepoint + // is possible. So we stash the desired callee in the thread + // and the vm will find it there should this case occur. + __ z_stg(Z_method, thread_(callee_target)); + + __ z_br(Z_R1_scratch); +} + +AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + AdapterFingerPrint* fingerprint) { + __ align(CodeEntryAlignment); + address i2c_entry = __ pc(); + gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); + + address c2i_unverified_entry; + + Label skip_fixup; + { + Label ic_miss; + const int klass_offset = oopDesc::klass_offset_in_bytes(); + const int holder_klass_offset = CompiledICHolder::holder_klass_offset(); + const int holder_method_offset = CompiledICHolder::holder_method_offset(); + + // Out-of-line call to ic_miss handler. + __ call_ic_miss_handler(ic_miss, 0x11, 0, Z_R1_scratch); + + // Unverified Entry Point UEP + __ align(CodeEntryAlignment); + c2i_unverified_entry = __ pc(); + + // Check the pointers. + if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) { + __ z_ltgr(Z_ARG1, Z_ARG1); + __ z_bre(ic_miss); + } + __ verify_oop(Z_ARG1); + + // Check ic: object class <-> cached class + // Compress cached class for comparison. That's more efficient. + if (UseCompressedClassPointers) { + __ z_lg(Z_R11, holder_klass_offset, Z_method); // Z_R11 is overwritten a few instructions down anyway. + __ compare_klass_ptr(Z_R11, klass_offset, Z_ARG1, false); // Cached class can't be zero. + } else { + __ z_clc(klass_offset, sizeof(void *)-1, Z_ARG1, holder_klass_offset, Z_method); + } + __ z_brne(ic_miss); // Cache miss: call runtime to handle this. + + // This def MUST MATCH code in gen_c2i_adapter! + const Register code = Z_R11; + + __ z_lg(Z_method, holder_method_offset, Z_method); + __ load_and_test_long(Z_R0, method_(code)); + __ z_brne(ic_miss); // Cache miss: call runtime to handle this. + + // Fallthru to VEP. Duplicate LTG, but saved taken branch. + } + + address c2i_entry; + c2i_entry = gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + + return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); +} + +// This function returns the adjust size (in number of words) to a c2i adapter +// activation for use during deoptimization. +// +// Actually only compiled frames need to be adjusted, but it +// doesn't harm to adjust entry and interpreter frames, too. +// +int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { + assert(callee_locals >= callee_parameters, + "test and remove; got more parms than locals"); + // Handle the abi adjustment here instead of doing it in push_skeleton_frames. + return (callee_locals - callee_parameters) * Interpreter::stackElementWords + + frame::z_parent_ijava_frame_abi_size / BytesPerWord; +} + +uint SharedRuntime::out_preserve_stack_slots() { + return frame::z_jit_out_preserve_size/VMRegImpl::stack_slot_size; +} + +// +// Frame generation for deopt and uncommon trap blobs. +// +static void push_skeleton_frame(MacroAssembler* masm, + /* Unchanged */ + Register frame_sizes_reg, + Register pcs_reg, + /* Invalidate */ + Register frame_size_reg, + Register pc_reg) { + BLOCK_COMMENT(" push_skeleton_frame {"); + __ z_lg(pc_reg, 0, pcs_reg); + __ z_lg(frame_size_reg, 0, frame_sizes_reg); + __ z_stg(pc_reg, _z_abi(return_pc), Z_SP); + Register fp = pc_reg; + __ push_frame(frame_size_reg, fp); +#ifdef ASSERT + // The magic is required for successful walking skeletal frames. + __ load_const_optimized(frame_size_reg/*tmp*/, frame::z_istate_magic_number); + __ z_stg(frame_size_reg, _z_ijava_state_neg(magic), fp); + // Fill other slots that are supposedly not necessary with eye catchers. + __ load_const_optimized(frame_size_reg/*use as tmp*/, 0xdeadbad1); + __ z_stg(frame_size_reg, _z_ijava_state_neg(top_frame_sp), fp); + // The sender_sp of the bottom frame is set before pushing it. + // The sender_sp of non bottom frames is their caller's top_frame_sp, which + // is unknown here. Luckily it is not needed before filling the frame in + // layout_activation(), we assert this by setting an eye catcher (see + // comments on sender_sp in frame_s390.hpp). + __ z_stg(frame_size_reg, _z_ijava_state_neg(sender_sp), Z_SP); +#endif // ASSERT + BLOCK_COMMENT(" } push_skeleton_frame"); +} + +// Loop through the UnrollBlock info and create new frames. +static void push_skeleton_frames(MacroAssembler* masm, bool deopt, + /* read */ + Register unroll_block_reg, + /* invalidate */ + Register frame_sizes_reg, + Register number_of_frames_reg, + Register pcs_reg, + Register tmp1, + Register tmp2) { + BLOCK_COMMENT("push_skeleton_frames {"); + // _number_of_frames is of type int (deoptimization.hpp). + __ z_lgf(number_of_frames_reg, + Address(unroll_block_reg, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); + __ z_lg(pcs_reg, + Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); + __ z_lg(frame_sizes_reg, + Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); + + // stack: (caller_of_deoptee, ...). + + // If caller_of_deoptee is a compiled frame, then we extend it to make + // room for the callee's locals and the frame::z_parent_ijava_frame_abi. + // See also Deoptimization::last_frame_adjust() above. + // Note: entry and interpreted frames are adjusted, too. But this doesn't harm. + + __ z_lgf(Z_R1_scratch, + Address(unroll_block_reg, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes())); + __ z_lgr(tmp1, Z_SP); // Save the sender sp before extending the frame. + __ resize_frame_sub(Z_R1_scratch, tmp2/*tmp*/); + // The oldest skeletal frame requires a valid sender_sp to make it walkable + // (it is required to find the original pc of caller_of_deoptee if it is marked + // for deoptimization - see nmethod::orig_pc_addr()). + __ z_stg(tmp1, _z_ijava_state_neg(sender_sp), Z_SP); + + // Now push the new interpreter frames. + Label loop, loop_entry; + + // Make sure that there is at least one entry in the array. + DEBUG_ONLY(__ z_ltgr(number_of_frames_reg, number_of_frames_reg)); + __ asm_assert_ne("array_size must be > 0", 0x205); + + __ z_bru(loop_entry); + + __ bind(loop); + + __ add2reg(frame_sizes_reg, wordSize); + __ add2reg(pcs_reg, wordSize); + + __ bind(loop_entry); + + // Allocate a new frame, fill in the pc. + push_skeleton_frame(masm, frame_sizes_reg, pcs_reg, tmp1, tmp2); + + __ z_aghi(number_of_frames_reg, -1); // Emit AGHI, because it sets the condition code + __ z_brne(loop); + + // Set the top frame's return pc. + __ add2reg(pcs_reg, wordSize); + __ z_lg(Z_R0_scratch, 0, pcs_reg); + __ z_stg(Z_R0_scratch, _z_abi(return_pc), Z_SP); + BLOCK_COMMENT("} push_skeleton_frames"); +} + +//------------------------------generate_deopt_blob---------------------------- +void SharedRuntime::generate_deopt_blob() { + // Allocate space for the code. + ResourceMark rm; + // Setup code generation tools. + CodeBuffer buffer("deopt_blob", 2048, 1024); + InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer); + Label exec_mode_initialized; + OopMap* map = NULL; + OopMapSet *oop_maps = new OopMapSet(); + + unsigned int start_off = __ offset(); + Label cont; + + // -------------------------------------------------------------------------- + // Normal entry (non-exception case) + // + // We have been called from the deopt handler of the deoptee. + // Z_R14 points behind the call in the deopt handler. We adjust + // it such that it points to the start of the deopt handler. + // The return_pc has been stored in the frame of the deoptee and + // will replace the address of the deopt_handler in the call + // to Deoptimization::fetch_unroll_info below. + // The (int) cast is necessary, because -((unsigned int)14) + // is an unsigned int. + __ add2reg(Z_R14, -(int)HandlerImpl::size_deopt_handler()); + + const Register exec_mode_reg = Z_tmp_1; + + // stack: (deoptee, caller of deoptee, ...) + + // pushes an "unpack" frame + // R14 contains the return address pointing into the deoptimized + // nmethod that was valid just before the nmethod was deoptimized. + // save R14 into the deoptee frame. the `fetch_unroll_info' + // procedure called below will read it from there. + map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers); + + // note the entry point. + __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_deopt); + __ z_bru(exec_mode_initialized); + +#ifndef COMPILER1 + int reexecute_offset = 1; // odd offset will produce odd pc, which triggers an hardware trap +#else + // -------------------------------------------------------------------------- + // Reexecute entry + // - Z_R14 = Deopt Handler in nmethod + + int reexecute_offset = __ offset() - start_off; + + // No need to update map as each call to save_live_registers will produce identical oopmap + (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers); + + __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_reexecute); + __ z_bru(exec_mode_initialized); +#endif + + + // -------------------------------------------------------------------------- + // Exception entry. We reached here via a branch. Registers on entry: + // - Z_EXC_OOP (Z_ARG1) = exception oop + // - Z_EXC_PC (Z_ARG2) = the exception pc. + + int exception_offset = __ offset() - start_off; + + // all registers are dead at this entry point, except for Z_EXC_OOP, and + // Z_EXC_PC which contain the exception oop and exception pc + // respectively. Set them in TLS and fall thru to the + // unpack_with_exception_in_tls entry point. + + // Store exception oop and pc in thread (location known to GC). + // Need this since the call to "fetch_unroll_info()" may safepoint. + __ z_stg(Z_EXC_OOP, Address(Z_thread, JavaThread::exception_oop_offset())); + __ z_stg(Z_EXC_PC, Address(Z_thread, JavaThread::exception_pc_offset())); + + // fall through + + int exception_in_tls_offset = __ offset() - start_off; + + // new implementation because exception oop is now passed in JavaThread + + // Prolog for exception case + // All registers must be preserved because they might be used by LinearScan + // Exceptiop oop and throwing PC are passed in JavaThread + + // load throwing pc from JavaThread and us it as the return address of the current frame. + __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::exception_pc_offset())); + + // Save everything in sight. + (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R1_scratch); + + // Now it is safe to overwrite any register + + // Clear the exception pc field in JavaThread + __ clear_mem(Address(Z_thread, JavaThread::exception_pc_offset()), 8); + + // Deopt during an exception. Save exec mode for unpack_frames. + __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_exception); + + +#ifdef ASSERT + // verify that there is really an exception oop in JavaThread + __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset())); + __ verify_oop(Z_ARG1); + + // verify that there is no pending exception + __ asm_assert_mem8_is_zero(in_bytes(Thread::pending_exception_offset()), Z_thread, + "must not have pending exception here", __LINE__); +#endif + + // -------------------------------------------------------------------------- + // At this point, the live registers are saved and + // the exec_mode_reg has been set up correctly. + __ bind(exec_mode_initialized); + + // stack: ("unpack" frame, deoptee, caller_of_deoptee, ...). + + { + const Register unroll_block_reg = Z_tmp_2; + + // we need to set `last_Java_frame' because `fetch_unroll_info' will + // call `last_Java_frame()'. however we can't block and no gc will + // occur so we don't need an oopmap. the value of the pc in the + // frame is not particularly important. it just needs to identify the blob. + + // Don't set last_Java_pc anymore here (is implicitly NULL then). + // the correct PC is retrieved in pd_last_frame() in that case. + __ set_last_Java_frame(/*sp*/Z_SP, noreg); + // With EscapeAnalysis turned on, this call may safepoint + // despite it's marked as "leaf call"! + __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), Z_thread, exec_mode_reg); + // Set an oopmap for the call site this describes all our saved volatile registers + int offs = __ offset(); + oop_maps->add_gc_map(offs, map); + + __ reset_last_Java_frame(); + // save the return value. + __ z_lgr(unroll_block_reg, Z_RET); + // restore the return registers that have been saved + // (among other registers) by save_live_registers(...). + RegisterSaver::restore_result_registers(masm); + + // reload the exec mode from the UnrollBlock (it might have changed) + __ z_llgf(exec_mode_reg, Address(unroll_block_reg, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes())); + + // In excp_deopt_mode, restore and clear exception oop which we + // stored in the thread during exception entry above. The exception + // oop will be the return value of this stub. + NearLabel skip_restore_excp; + __ compare64_and_branch(exec_mode_reg, Deoptimization::Unpack_exception, Assembler::bcondNotEqual, skip_restore_excp); + __ z_lg(Z_RET, thread_(exception_oop)); + __ clear_mem(thread_(exception_oop), 8); + __ bind(skip_restore_excp); + + // remove the "unpack" frame + __ pop_frame(); + + // stack: (deoptee, caller of deoptee, ...). + + // pop the deoptee's frame + __ pop_frame(); + + // stack: (caller_of_deoptee, ...). + + // loop through the `UnrollBlock' info and create interpreter frames. + push_skeleton_frames(masm, true/*deopt*/, + unroll_block_reg, + Z_tmp_3, + Z_tmp_4, + Z_ARG5, + Z_ARG4, + Z_ARG3); + + // stack: (skeletal interpreter frame, ..., optional skeletal + // interpreter frame, caller of deoptee, ...). + } + + // push an "unpack" frame taking care of float / int return values. + __ push_frame(RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)); + + // stack: (unpack frame, skeletal interpreter frame, ..., optional + // skeletal interpreter frame, caller of deoptee, ...). + + // spill live volatile registers since we'll do a call. + __ z_stg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP); + __ z_std(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP); + + // let the unpacker layout information in the skeletal frames just allocated. + __ get_PC(Z_RET); + __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_RET); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), + Z_thread/*thread*/, exec_mode_reg/*exec_mode*/); + + __ reset_last_Java_frame(); + + // restore the volatiles saved above. + __ z_lg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP); + __ z_ld(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP); + + // pop the "unpack" frame. + __ pop_frame(); + __ restore_return_pc(); + + // stack: (top interpreter frame, ..., optional interpreter frame, + // caller of deoptee, ...). + + __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer + __ restore_bcp(); + __ restore_locals(); + __ restore_esp(); + + // return to the interpreter entry point. + __ z_br(Z_R14); + + // Make sure all code is generated + masm->flush(); + + _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize); + _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); +} + + +#ifdef COMPILER2 +//------------------------------generate_uncommon_trap_blob-------------------- +void SharedRuntime::generate_uncommon_trap_blob() { + // Allocate space for the code + ResourceMark rm; + // Setup code generation tools + CodeBuffer buffer("uncommon_trap_blob", 2048, 1024); + InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer); + + Register unroll_block_reg = Z_tmp_1; + Register klass_index_reg = Z_ARG2; + Register unc_trap_reg = Z_ARG2; + + // stack: (deoptee, caller_of_deoptee, ...). + + // push a dummy "unpack" frame and call + // `Deoptimization::uncommon_trap' to pack the compiled frame into a + // vframe array and return the `UnrollBlock' information. + + // save R14 to compiled frame. + __ save_return_pc(); + // push the "unpack_frame". + __ push_frame_abi160(0); + + // stack: (unpack frame, deoptee, caller_of_deoptee, ...). + + // set the "unpack" frame as last_Java_frame. + // `Deoptimization::uncommon_trap' expects it and considers its + // sender frame as the deoptee frame. + __ get_PC(Z_R1_scratch); + __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch); + + __ z_lgr(klass_index_reg, Z_ARG1); // passed implicitly as ARG2 + __ z_lghi(Z_ARG3, Deoptimization::Unpack_uncommon_trap); // passed implicitly as ARG3 + BLOCK_COMMENT("call Deoptimization::uncommon_trap()"); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), Z_thread); + + __ reset_last_Java_frame(); + + // pop the "unpack" frame + __ pop_frame(); + + // stack: (deoptee, caller_of_deoptee, ...). + + // save the return value. + __ z_lgr(unroll_block_reg, Z_RET); + + // pop the deoptee frame. + __ pop_frame(); + + // stack: (caller_of_deoptee, ...). + +#ifdef ASSERT + assert(Immediate::is_uimm8(Deoptimization::Unpack_LIMIT), "Code not fit for larger immediates"); + assert(Immediate::is_uimm8(Deoptimization::Unpack_uncommon_trap), "Code not fit for larger immediates"); + const int unpack_kind_byte_offset = Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes() +#ifndef VM_LITTLE_ENDIAN + + 3 +#endif + ; + if (Displacement::is_shortDisp(unpack_kind_byte_offset)) { + __ z_cli(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap); + } else { + __ z_cliy(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap); + } + __ asm_assert_eq("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0); +#endif + + __ zap_from_to(Z_SP, Z_SP, Z_R0_scratch, Z_R1, 500, -1); + + // allocate new interpreter frame(s) and possibly resize the caller's frame + // (no more adapters !) + push_skeleton_frames(masm, false/*deopt*/, + unroll_block_reg, + Z_tmp_2, + Z_tmp_3, + Z_tmp_4, + Z_ARG5, + Z_ARG4); + + // stack: (skeletal interpreter frame, ..., optional skeletal + // interpreter frame, (resized) caller of deoptee, ...). + + // push a dummy "unpack" frame taking care of float return values. + // call `Deoptimization::unpack_frames' to layout information in the + // interpreter frames just created + + // push the "unpack" frame + const unsigned int framesize_in_bytes = __ push_frame_abi160(0); + + // stack: (unpack frame, skeletal interpreter frame, ..., optional + // skeletal interpreter frame, (resized) caller of deoptee, ...). + + // set the "unpack" frame as last_Java_frame + __ get_PC(Z_R1_scratch); + __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch); + + // indicate it is the uncommon trap case + BLOCK_COMMENT("call Deoptimization::Unpack_uncommon_trap()"); + __ load_const_optimized(unc_trap_reg, Deoptimization::Unpack_uncommon_trap); + // let the unpacker layout information in the skeletal frames just allocated. + __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), Z_thread); + + __ reset_last_Java_frame(); + // pop the "unpack" frame + __ pop_frame(); + // restore LR from top interpreter frame + __ restore_return_pc(); + + // stack: (top interpreter frame, ..., optional interpreter frame, + // (resized) caller of deoptee, ...). + + __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer + __ restore_bcp(); + __ restore_locals(); + __ restore_esp(); + + // return to the interpreter entry point + __ z_br(Z_R14); + + masm->flush(); + _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, framesize_in_bytes/wordSize); +} +#endif // COMPILER2 + + +//------------------------------generate_handler_blob------ +// +// Generate a special Compile2Runtime blob that saves all registers, +// and setup oopmap. +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { + assert(StubRoutines::forward_exception_entry() != NULL, + "must be generated before"); + + ResourceMark rm; + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map; + + // Allocate space for the code. Setup code generation tools. + CodeBuffer buffer("handler_blob", 2048, 1024); + MacroAssembler* masm = new MacroAssembler(&buffer); + + unsigned int start_off = __ offset(); + address call_pc = NULL; + int frame_size_in_bytes; + + bool cause_return = (poll_type == POLL_AT_RETURN); + // Make room for return address (or push it again) + if (!cause_return) + __ z_lg(Z_R14, Address(Z_thread, JavaThread::saved_exception_pc_offset())); + + // Save registers, fpu state, and flags + map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers); + + // The following is basically a call_VM. However, we need the precise + // address of the call in order to generate an oopmap. Hence, we do all the + // work outselves. + __ set_last_Java_frame(Z_SP, noreg); + + // call into the runtime to handle the safepoint poll + __ call_VM_leaf(call_ptr, Z_thread); + + + // Set an oopmap for the call site. This oopmap will map all + // oop-registers and debug-info registers as callee-saved. This + // will allow deoptimization at this safepoint to find all possible + // debug-info recordings, as well as let GC find all oops. + + oop_maps->add_gc_map((int)(__ offset()-start_off), map); + + Label noException; + + __ reset_last_Java_frame(); + + __ load_and_test_long(Z_R1, thread_(pending_exception)); + __ z_bre(noException); + + // Pending exception case, used (sporadically) by + // api/java_lang/Thread.State/index#ThreadState et al. + RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers); + + // Jump to forward_exception_entry, with the issuing PC in Z_R14 + // so it looks like the original nmethod called forward_exception_entry. + __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry()); + __ z_br(Z_R1_scratch); + + // No exception case + __ bind(noException); + + // Normal exit, restore registers and exit. + RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers); + + __ z_br(Z_R14); + + // Make sure all code is generated + masm->flush(); + + // Fill-out other meta info + return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize); +} + + +// +// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss +// +// Generate a stub that calls into vm to find out the proper destination +// of a Java call. All the argument registers are live at this point +// but since this is generic code we don't know what they are and the caller +// must do any gc of the args. +// +RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); + + // allocate space for the code + ResourceMark rm; + + CodeBuffer buffer(name, 1000, 512); + MacroAssembler* masm = new MacroAssembler(&buffer); + + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map = NULL; + + unsigned int start_off = __ offset(); + + map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers); + + // We must save a PC from within the stub as return PC + // C code doesn't store the LR where we expect the PC, + // so we would run into trouble upon stack walking. + __ get_PC(Z_R1_scratch); + + unsigned int frame_complete = __ offset(); + + __ set_last_Java_frame(/*sp*/Z_SP, Z_R1_scratch); + + __ call_VM_leaf(destination, Z_thread, Z_method); + + + // Set an oopmap for the call site. + // We need this not only for callee-saved registers, but also for volatile + // registers that the compiler might be keeping live across a safepoint. + + oop_maps->add_gc_map((int)(frame_complete-start_off), map); + + // clear last_Java_sp + __ reset_last_Java_frame(); + + // check for pending exceptions + Label pending; + __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset())); + __ z_brne(pending); + + __ z_lgr(Z_R1_scratch, Z_R2); // r1 is neither saved nor restored, r2 contains the continuation. + RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers); + + // get the returned method + __ get_vm_result_2(Z_method); + + // We are back the the original state on entry and ready to go. + __ z_br(Z_R1_scratch); + + // Pending exception after the safepoint + + __ bind(pending); + + RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers); + + // exception pending => remove activation and forward to exception handler + + __ z_lgr(Z_R2, Z_R0); // pending_exception + __ clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(jlong)); + __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry()); + __ z_br(Z_R1_scratch); + + // ------------- + // make sure all code is generated + masm->flush(); + + // return the blob + // frame_size_words or bytes?? + return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize, + oop_maps, true); + +} + +//------------------------------Montgomery multiplication------------------------ +// + +// Subtract 0:b from carry:a. Return carry. +static unsigned long +sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) { + unsigned long i, c = 8 * (unsigned long)(len - 1); + __asm__ __volatile__ ( + "SLGR %[i], %[i] \n" // initialize to 0 and pre-set carry + "LGHI 0, 8 \n" // index increment (for BRXLG) + "LGR 1, %[c] \n" // index limit (for BRXLG) + "0: \n" + "LG %[c], 0(%[i],%[a]) \n" + "SLBG %[c], 0(%[i],%[b]) \n" // subtract with borrow + "STG %[c], 0(%[i],%[a]) \n" + "BRXLG %[i], 0, 0b \n" // while ((i+=8) 0) { + d--; + unsigned long s_val = *s; + // Swap words in a longword on little endian machines. +#ifdef VM_LITTLE_ENDIAN + Unimplemented(); +#endif + *d = s_val; + s++; + } +} + +void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, + jint len, jlong inv, + jint *m_ints) { + len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls. + assert(len % 2 == 0, "array length in montgomery_multiply must be even"); + int longwords = len/2; + + // Make very sure we don't use so much space that the stack might + // overflow. 512 jints corresponds to an 16384-bit integer and + // will use here a total of 8k bytes of stack space. + int total_allocation = longwords * sizeof (unsigned long) * 4; + guarantee(total_allocation <= 8192, "must be"); + unsigned long *scratch = (unsigned long *)alloca(total_allocation); + + // Local scratch arrays + unsigned long + *a = scratch + 0 * longwords, + *b = scratch + 1 * longwords, + *n = scratch + 2 * longwords, + *m = scratch + 3 * longwords; + + reverse_words((unsigned long *)a_ints, a, longwords); + reverse_words((unsigned long *)b_ints, b, longwords); + reverse_words((unsigned long *)n_ints, n, longwords); + + ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords); + + reverse_words(m, (unsigned long *)m_ints, longwords); +} + +void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, + jint len, jlong inv, + jint *m_ints) { + len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls. + assert(len % 2 == 0, "array length in montgomery_square must be even"); + int longwords = len/2; + + // Make very sure we don't use so much space that the stack might + // overflow. 512 jints corresponds to an 16384-bit integer and + // will use here a total of 6k bytes of stack space. + int total_allocation = longwords * sizeof (unsigned long) * 3; + guarantee(total_allocation <= 8192, "must be"); + unsigned long *scratch = (unsigned long *)alloca(total_allocation); + + // Local scratch arrays + unsigned long + *a = scratch + 0 * longwords, + *n = scratch + 1 * longwords, + *m = scratch + 2 * longwords; + + reverse_words((unsigned long *)a_ints, a, longwords); + reverse_words((unsigned long *)n_ints, n, longwords); + + if (len >= MONTGOMERY_SQUARING_THRESHOLD) { + ::montgomery_square(a, n, m, (unsigned long)inv, longwords); + } else { + ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords); + } + + reverse_words(m, (unsigned long *)m_ints, longwords); +} + +extern "C" +int SpinPause() { + return 0; +} diff --git a/hotspot/src/cpu/s390/vm/stubGenerator_s390.cpp b/hotspot/src/cpu/s390/vm/stubGenerator_s390.cpp new file mode 100644 index 00000000000..c2584c5cf98 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/stubGenerator_s390.cpp @@ -0,0 +1,2563 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "registerSaver_s390.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interp_masm.hpp" +#include "nativeInst_s390.hpp" +#include "oops/instanceOop.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" + +// Declaration and definition of StubGenerator (no .hpp file). +// For a more detailed description of the stub routine structure +// see the comment in stubRoutines.hpp. + +#ifdef PRODUCT +#define __ _masm-> +#else +#define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)-> +#endif + +#define BLOCK_COMMENT(str) if (PrintAssembly) __ block_comment(str) +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +// ----------------------------------------------------------------------- +// Stub Code definitions + +class StubGenerator: public StubCodeGenerator { + private: + + //---------------------------------------------------------------------- + // Call stubs are used to call Java from C. + + // + // Arguments: + // + // R2 - call wrapper address : address + // R3 - result : intptr_t* + // R4 - result type : BasicType + // R5 - method : method + // R6 - frame mgr entry point : address + // [SP+160] - parameter block : intptr_t* + // [SP+172] - parameter count in words : int + // [SP+176] - thread : Thread* + // + address generate_call_stub(address& return_address) { + // Set up a new C frame, copy Java arguments, call frame manager + // or native_entry, and process result. + + StubCodeMark mark(this, "StubRoutines", "call_stub"); + address start = __ pc(); + + Register r_arg_call_wrapper_addr = Z_ARG1; + Register r_arg_result_addr = Z_ARG2; + Register r_arg_result_type = Z_ARG3; + Register r_arg_method = Z_ARG4; + Register r_arg_entry = Z_ARG5; + + // offsets to fp + #define d_arg_thread 176 + #define d_arg_argument_addr 160 + #define d_arg_argument_count 168+4 + + Register r_entryframe_fp = Z_tmp_1; + Register r_top_of_arguments_addr = Z_ARG4; + Register r_new_arg_entry = Z_R14; + + // macros for frame offsets + #define call_wrapper_address_offset \ + _z_entry_frame_locals_neg(call_wrapper_address) + #define result_address_offset \ + _z_entry_frame_locals_neg(result_address) + #define result_type_offset \ + _z_entry_frame_locals_neg(result_type) + #define arguments_tos_address_offset \ + _z_entry_frame_locals_neg(arguments_tos_address) + + { + // + // STACK on entry to call_stub: + // + // F1 [C_FRAME] + // ... + // + + Register r_argument_addr = Z_tmp_3; + Register r_argumentcopy_addr = Z_tmp_4; + Register r_argument_size_in_bytes = Z_ARG5; + Register r_frame_size = Z_R1; + + Label arguments_copied; + + // Save non-volatile registers to ABI of caller frame. + BLOCK_COMMENT("save registers, push frame {"); + __ z_stmg(Z_R6, Z_R14, 16, Z_SP); + __ z_std(Z_F8, 96, Z_SP); + __ z_std(Z_F9, 104, Z_SP); + __ z_std(Z_F10, 112, Z_SP); + __ z_std(Z_F11, 120, Z_SP); + __ z_std(Z_F12, 128, Z_SP); + __ z_std(Z_F13, 136, Z_SP); + __ z_std(Z_F14, 144, Z_SP); + __ z_std(Z_F15, 152, Z_SP); + + // + // Push ENTRY_FRAME including arguments: + // + // F0 [TOP_IJAVA_FRAME_ABI] + // [outgoing Java arguments] + // [ENTRY_FRAME_LOCALS] + // F1 [C_FRAME] + // ... + // + + // Calculate new frame size and push frame. + #define abi_plus_locals_size \ + (frame::z_top_ijava_frame_abi_size + frame::z_entry_frame_locals_size) + if (abi_plus_locals_size % BytesPerWord == 0) { + // Preload constant part of frame size. + __ load_const_optimized(r_frame_size, -abi_plus_locals_size/BytesPerWord); + // Keep copy of our frame pointer (caller's SP). + __ z_lgr(r_entryframe_fp, Z_SP); + // Add space required by arguments to frame size. + __ z_slgf(r_frame_size, d_arg_argument_count, Z_R0, Z_SP); + // Move Z_ARG5 early, it will be used as a local. + __ z_lgr(r_new_arg_entry, r_arg_entry); + // Convert frame size from words to bytes. + __ z_sllg(r_frame_size, r_frame_size, LogBytesPerWord); + __ push_frame(r_frame_size, r_entryframe_fp, + false/*don't copy SP*/, true /*frame size sign inverted*/); + } else { + guarantee(false, "frame sizes should be multiples of word size (BytesPerWord)"); + } + BLOCK_COMMENT("} save, push"); + + // Load argument registers for call. + BLOCK_COMMENT("prepare/copy arguments {"); + __ z_lgr(Z_method, r_arg_method); + __ z_lg(Z_thread, d_arg_thread, r_entryframe_fp); + + // Calculate top_of_arguments_addr which will be tos (not prepushed) later. + // Wimply use SP + frame::top_ijava_frame_size. + __ add2reg(r_top_of_arguments_addr, + frame::z_top_ijava_frame_abi_size - BytesPerWord, Z_SP); + + // Initialize call_stub locals (step 1). + if ((call_wrapper_address_offset + BytesPerWord == result_address_offset) && + (result_address_offset + BytesPerWord == result_type_offset) && + (result_type_offset + BytesPerWord == arguments_tos_address_offset)) { + + __ z_stmg(r_arg_call_wrapper_addr, r_top_of_arguments_addr, + call_wrapper_address_offset, r_entryframe_fp); + } else { + __ z_stg(r_arg_call_wrapper_addr, + call_wrapper_address_offset, r_entryframe_fp); + __ z_stg(r_arg_result_addr, + result_address_offset, r_entryframe_fp); + __ z_stg(r_arg_result_type, + result_type_offset, r_entryframe_fp); + __ z_stg(r_top_of_arguments_addr, + arguments_tos_address_offset, r_entryframe_fp); + } + + // Copy Java arguments. + + // Any arguments to copy? + __ load_and_test_int2long(Z_R1, Address(r_entryframe_fp, d_arg_argument_count)); + __ z_bre(arguments_copied); + + // Prepare loop and copy arguments in reverse order. + { + // Calculate argument size in bytes. + __ z_sllg(r_argument_size_in_bytes, Z_R1, LogBytesPerWord); + + // Get addr of first incoming Java argument. + __ z_lg(r_argument_addr, d_arg_argument_addr, r_entryframe_fp); + + // Let r_argumentcopy_addr point to last outgoing Java argument. + __ add2reg(r_argumentcopy_addr, BytesPerWord, r_top_of_arguments_addr); // = Z_SP+160 effectively. + + // Let r_argument_addr point to last incoming Java argument. + __ add2reg_with_index(r_argument_addr, -BytesPerWord, + r_argument_size_in_bytes, r_argument_addr); + + // Now loop while Z_R1 > 0 and copy arguments. + { + Label next_argument; + __ bind(next_argument); + // Mem-mem move. + __ z_mvc(0, BytesPerWord-1, r_argumentcopy_addr, 0, r_argument_addr); + __ add2reg(r_argument_addr, -BytesPerWord); + __ add2reg(r_argumentcopy_addr, BytesPerWord); + __ z_brct(Z_R1, next_argument); + } + } // End of argument copy loop. + + __ bind(arguments_copied); + } + BLOCK_COMMENT("} arguments"); + + BLOCK_COMMENT("call {"); + { + // Call frame manager or native entry. + + // + // Register state on entry to frame manager / native entry: + // + // Z_ARG1 = r_top_of_arguments_addr - intptr_t *sender tos (prepushed) + // Lesp = (SP) + copied_arguments_offset - 8 + // Z_method - method + // Z_thread - JavaThread* + // + + // Here, the usual SP is the initial_caller_sp. + __ z_lgr(Z_R10, Z_SP); + + // Z_esp points to the slot below the last argument. + __ z_lgr(Z_esp, r_top_of_arguments_addr); + + // + // Stack on entry to frame manager / native entry: + // + // F0 [TOP_IJAVA_FRAME_ABI] + // [outgoing Java arguments] + // [ENTRY_FRAME_LOCALS] + // F1 [C_FRAME] + // ... + // + + // Do a light-weight C-call here, r_new_arg_entry holds the address + // of the interpreter entry point (frame manager or native entry) + // and save runtime-value of return_pc in return_address + // (call by reference argument). + return_address = __ call_stub(r_new_arg_entry); + } + BLOCK_COMMENT("} call"); + + { + BLOCK_COMMENT("restore registers {"); + // Returned from frame manager or native entry. + // Now pop frame, process result, and return to caller. + + // + // Stack on exit from frame manager / native entry: + // + // F0 [ABI] + // ... + // [ENTRY_FRAME_LOCALS] + // F1 [C_FRAME] + // ... + // + // Just pop the topmost frame ... + // + + Label ret_is_object; + Label ret_is_long; + Label ret_is_float; + Label ret_is_double; + + // Restore frame pointer. + __ z_lg(r_entryframe_fp, _z_abi(callers_sp), Z_SP); + // Pop frame. Done here to minimize stalls. + __ z_lg(Z_SP, _z_abi(callers_sp), Z_SP); + + // Reload some volatile registers which we've spilled before the call + // to frame manager / native entry. + // Access all locals via frame pointer, because we know nothing about + // the topmost frame's size. + __ z_lg(r_arg_result_addr, result_address_offset, r_entryframe_fp); + __ z_lg(r_arg_result_type, result_type_offset, r_entryframe_fp); + + // Restore non-volatiles. + __ z_lmg(Z_R6, Z_R14, 16, Z_SP); + __ z_ld(Z_F8, 96, Z_SP); + __ z_ld(Z_F9, 104, Z_SP); + __ z_ld(Z_F10, 112, Z_SP); + __ z_ld(Z_F11, 120, Z_SP); + __ z_ld(Z_F12, 128, Z_SP); + __ z_ld(Z_F13, 136, Z_SP); + __ z_ld(Z_F14, 144, Z_SP); + __ z_ld(Z_F15, 152, Z_SP); + BLOCK_COMMENT("} restore"); + + // + // Stack on exit from call_stub: + // + // 0 [C_FRAME] + // ... + // + // No call_stub frames left. + // + + // All non-volatiles have been restored at this point!! + + //------------------------------------------------------------------------ + // The following code makes some assumptions on the T_ enum values. + // The enum is defined in globalDefinitions.hpp. + // The validity of the assumptions is tested as far as possible. + // The assigned values should not be shuffled + // T_BOOLEAN==4 - lowest used enum value + // T_NARROWOOP==16 - largest used enum value + //------------------------------------------------------------------------ + BLOCK_COMMENT("process result {"); + Label firstHandler; + int handlerLen= 8; +#ifdef ASSERT + char assertMsg[] = "check BasicType definition in globalDefinitions.hpp"; + __ z_chi(r_arg_result_type, T_BOOLEAN); + __ asm_assert_low(assertMsg, 0x0234); + __ z_chi(r_arg_result_type, T_NARROWOOP); + __ asm_assert_high(assertMsg, 0x0235); +#endif + __ add2reg(r_arg_result_type, -T_BOOLEAN); // Remove offset. + __ z_larl(Z_R1, firstHandler); // location of first handler + __ z_sllg(r_arg_result_type, r_arg_result_type, 3); // Each handler is 8 bytes long. + __ z_bc(MacroAssembler::bcondAlways, 0, r_arg_result_type, Z_R1); + + __ align(handlerLen); + __ bind(firstHandler); + // T_BOOLEAN: + guarantee(T_BOOLEAN == 4, "check BasicType definition in globalDefinitions.hpp"); + __ z_st(Z_RET, 0, r_arg_result_addr); + __ z_br(Z_R14); // Return to caller. + __ align(handlerLen); + // T_CHAR: + guarantee(T_CHAR == T_BOOLEAN+1, "check BasicType definition in globalDefinitions.hpp"); + __ z_st(Z_RET, 0, r_arg_result_addr); + __ z_br(Z_R14); // Return to caller. + __ align(handlerLen); + // T_FLOAT: + guarantee(T_FLOAT == T_CHAR+1, "check BasicType definition in globalDefinitions.hpp"); + __ z_ste(Z_FRET, 0, r_arg_result_addr); + __ z_br(Z_R14); // Return to caller. + __ align(handlerLen); + // T_DOUBLE: + guarantee(T_DOUBLE == T_FLOAT+1, "check BasicType definition in globalDefinitions.hpp"); + __ z_std(Z_FRET, 0, r_arg_result_addr); + __ z_br(Z_R14); // Return to caller. + __ align(handlerLen); + // T_BYTE: + guarantee(T_BYTE == T_DOUBLE+1, "check BasicType definition in globalDefinitions.hpp"); + __ z_st(Z_RET, 0, r_arg_result_addr); + __ z_br(Z_R14); // Return to caller. + __ align(handlerLen); + // T_SHORT: + guarantee(T_SHORT == T_BYTE+1, "check BasicType definition in globalDefinitions.hpp"); + __ z_st(Z_RET, 0, r_arg_result_addr); + __ z_br(Z_R14); // Return to caller. + __ align(handlerLen); + // T_INT: + guarantee(T_INT == T_SHORT+1, "check BasicType definition in globalDefinitions.hpp"); + __ z_st(Z_RET, 0, r_arg_result_addr); + __ z_br(Z_R14); // Return to caller. + __ align(handlerLen); + // T_LONG: + guarantee(T_LONG == T_INT+1, "check BasicType definition in globalDefinitions.hpp"); + __ z_stg(Z_RET, 0, r_arg_result_addr); + __ z_br(Z_R14); // Return to caller. + __ align(handlerLen); + // T_OBJECT: + guarantee(T_OBJECT == T_LONG+1, "check BasicType definition in globalDefinitions.hpp"); + __ z_stg(Z_RET, 0, r_arg_result_addr); + __ z_br(Z_R14); // Return to caller. + __ align(handlerLen); + // T_ARRAY: + guarantee(T_ARRAY == T_OBJECT+1, "check BasicType definition in globalDefinitions.hpp"); + __ z_stg(Z_RET, 0, r_arg_result_addr); + __ z_br(Z_R14); // Return to caller. + __ align(handlerLen); + // T_VOID: + guarantee(T_VOID == T_ARRAY+1, "check BasicType definition in globalDefinitions.hpp"); + __ z_stg(Z_RET, 0, r_arg_result_addr); + __ z_br(Z_R14); // Return to caller. + __ align(handlerLen); + // T_ADDRESS: + guarantee(T_ADDRESS == T_VOID+1, "check BasicType definition in globalDefinitions.hpp"); + __ z_stg(Z_RET, 0, r_arg_result_addr); + __ z_br(Z_R14); // Return to caller. + __ align(handlerLen); + // T_NARROWOOP: + guarantee(T_NARROWOOP == T_ADDRESS+1, "check BasicType definition in globalDefinitions.hpp"); + __ z_st(Z_RET, 0, r_arg_result_addr); + __ z_br(Z_R14); // Return to caller. + __ align(handlerLen); + BLOCK_COMMENT("} process result"); + } + return start; + } + + // Return point for a Java call if there's an exception thrown in + // Java code. The exception is caught and transformed into a + // pending exception stored in JavaThread that can be tested from + // within the VM. + address generate_catch_exception() { + StubCodeMark mark(this, "StubRoutines", "catch_exception"); + + address start = __ pc(); + + // + // Registers alive + // + // Z_thread + // Z_ARG1 - address of pending exception + // Z_ARG2 - return address in call stub + // + + const Register exception_file = Z_R0; + const Register exception_line = Z_R1; + + __ load_const_optimized(exception_file, (void*)__FILE__); + __ load_const_optimized(exception_line, (void*)__LINE__); + + __ z_stg(Z_ARG1, thread_(pending_exception)); + // Store into `char *'. + __ z_stg(exception_file, thread_(exception_file)); + // Store into `int'. + __ z_st(exception_line, thread_(exception_line)); + + // Complete return to VM. + assert(StubRoutines::_call_stub_return_address != NULL, "must have been generated before"); + + // Continue in call stub. + __ z_br(Z_ARG2); + + return start; + } + + // Continuation point for runtime calls returning with a pending + // exception. The pending exception check happened in the runtime + // or native call stub. The pending exception in Thread is + // converted into a Java-level exception. + // + // Read: + // Z_R14: pc the runtime library callee wants to return to. + // Since the exception occurred in the callee, the return pc + // from the point of view of Java is the exception pc. + // + // Invalidate: + // Volatile registers (except below). + // + // Update: + // Z_ARG1: exception + // (Z_R14 is unchanged and is live out). + // + address generate_forward_exception() { + StubCodeMark mark(this, "StubRoutines", "forward_exception"); + address start = __ pc(); + + #define pending_exception_offset in_bytes(Thread::pending_exception_offset()) +#ifdef ASSERT + // Get pending exception oop. + __ z_lg(Z_ARG1, pending_exception_offset, Z_thread); + + // Make sure that this code is only executed if there is a pending exception. + { + Label L; + __ z_ltgr(Z_ARG1, Z_ARG1); + __ z_brne(L); + __ stop("StubRoutines::forward exception: no pending exception (1)"); + __ bind(L); + } + + __ verify_oop(Z_ARG1, "StubRoutines::forward exception: not an oop"); +#endif + + __ z_lgr(Z_ARG2, Z_R14); // Copy exception pc into Z_ARG2. + __ save_return_pc(); + __ push_frame_abi160(0); + // Find exception handler. + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), + Z_thread, + Z_ARG2); + // Copy handler's address. + __ z_lgr(Z_R1, Z_RET); + __ pop_frame(); + __ restore_return_pc(); + + // Set up the arguments for the exception handler: + // - Z_ARG1: exception oop + // - Z_ARG2: exception pc + + // Load pending exception oop. + __ z_lg(Z_ARG1, pending_exception_offset, Z_thread); + + // The exception pc is the return address in the caller, + // must load it into Z_ARG2 + __ z_lgr(Z_ARG2, Z_R14); + +#ifdef ASSERT + // Make sure exception is set. + { Label L; + __ z_ltgr(Z_ARG1, Z_ARG1); + __ z_brne(L); + __ stop("StubRoutines::forward exception: no pending exception (2)"); + __ bind(L); + } +#endif + // Clear the pending exception. + __ clear_mem(Address(Z_thread, pending_exception_offset), sizeof(void *)); + // Jump to exception handler + __ z_br(Z_R1 /*handler address*/); + + return start; + + #undef pending_exception_offset + } + + // Continuation point for throwing of implicit exceptions that are + // not handled in the current activation. Fabricates an exception + // oop and initiates normal exception dispatching in this + // frame. Only callee-saved registers are preserved (through the + // normal RegisterMap handling). If the compiler + // needs all registers to be preserved between the fault point and + // the exception handler then it must assume responsibility for that + // in AbstractCompiler::continuation_for_implicit_null_exception or + // continuation_for_implicit_division_by_zero_exception. All other + // implicit exceptions (e.g., NullPointerException or + // AbstractMethodError on entry) are either at call sites or + // otherwise assume that stack unwinding will be initiated, so + // caller saved registers were assumed volatile in the compiler. + + // Note that we generate only this stub into a RuntimeStub, because + // it needs to be properly traversed and ignored during GC, so we + // change the meaning of the "__" macro within this method. + + // Note: the routine set_pc_not_at_call_for_caller in + // SharedRuntime.cpp requires that this code be generated into a + // RuntimeStub. +#undef __ +#define __ masm-> + + address generate_throw_exception(const char* name, address runtime_entry, + bool restore_saved_exception_pc, + Register arg1 = noreg, Register arg2 = noreg) { + int insts_size = 256; + int locs_size = 0; + CodeBuffer code(name, insts_size, locs_size); + MacroAssembler* masm = new MacroAssembler(&code); + int framesize_in_bytes; + address start = __ pc(); + + __ save_return_pc(); + framesize_in_bytes = __ push_frame_abi160(0); + + address frame_complete_pc = __ pc(); + if (restore_saved_exception_pc) { + __ unimplemented("StubGenerator::throw_exception", 74); + } + + // Note that we always have a runtime stub frame on the top of stack at this point. + __ get_PC(Z_R1); + __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1); + + // Do the call. + BLOCK_COMMENT("call runtime_entry"); + __ call_VM_leaf(runtime_entry, Z_thread, arg1, arg2); + + __ reset_last_Java_frame(); + +#ifdef ASSERT + // Make sure that this code is only executed if there is a pending exception. + { Label L; + __ z_lg(Z_R0, + in_bytes(Thread::pending_exception_offset()), + Z_thread); + __ z_ltgr(Z_R0, Z_R0); + __ z_brne(L); + __ stop("StubRoutines::throw_exception: no pending exception"); + __ bind(L); + } +#endif + + __ pop_frame(); + __ restore_return_pc(); + + __ load_const_optimized(Z_R1, StubRoutines::forward_exception_entry()); + __ z_br(Z_R1); + + RuntimeStub* stub = + RuntimeStub::new_runtime_stub(name, &code, + frame_complete_pc - start, + framesize_in_bytes/wordSize, + NULL /*oop_maps*/, false); + + return stub->entry_point(); + } + +#undef __ +#ifdef PRODUCT +#define __ _masm-> +#else +#define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)-> +#endif + + //---------------------------------------------------------------------- + // The following routine generates a subroutine to throw an asynchronous + // UnknownError when an unsafe access gets a fault that could not be + // reasonably prevented by the programmer. (Example: SIGBUS/OBJERR.) + // + // Arguments: + // trapping PC: ?? + // + // Results: + // Posts an asynchronous exception, skips the trapping instruction. + // + address generate_handler_for_unsafe_access() { + StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access"); + { + address start = __ pc(); + __ unimplemented("StubRoutines::handler_for_unsafe_access", 86); + return start; + } + } + + // Support for uint StubRoutine::zarch::partial_subtype_check(Klass + // sub, Klass super); + // + // Arguments: + // ret : Z_RET, returned + // sub : Z_ARG2, argument, not changed + // super: Z_ARG3, argument, not changed + // + // raddr: Z_R14, blown by call + // + address generate_partial_subtype_check() { + StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); + Label miss; + + address start = __ pc(); + + const Register Rsubklass = Z_ARG2; // subklass + const Register Rsuperklass = Z_ARG3; // superklass + + // No args, but tmp registers that are killed. + const Register Rlength = Z_ARG4; // cache array length + const Register Rarray_ptr = Z_ARG5; // Current value from cache array. + + if (UseCompressedOops) { + assert(Universe::heap() != NULL, "java heap must be initialized to generate partial_subtype_check stub"); + } + + // Always take the slow path (see SPARC). + __ check_klass_subtype_slow_path(Rsubklass, Rsuperklass, + Rarray_ptr, Rlength, NULL, &miss); + + // Match falls through here. + __ clear_reg(Z_RET); // Zero indicates a match. Set EQ flag in CC. + __ z_br(Z_R14); + + __ BIND(miss); + __ load_const_optimized(Z_RET, 1); // One indicates a miss. + __ z_ltgr(Z_RET, Z_RET); // Set NE flag in CR. + __ z_br(Z_R14); + + return start; + } + + // Return address of code to be called from code generated by + // MacroAssembler::verify_oop. + // + // Don't generate, rather use C++ code. + address generate_verify_oop_subroutine() { + // Don't generate a StubCodeMark, because no code is generated! + // Generating the mark triggers notifying the oprofile jvmti agent + // about the dynamic code generation, but the stub without + // code (code_size == 0) confuses opjitconv + // StubCodeMark mark(this, "StubRoutines", "verify_oop_stub"); + + address start = 0; + return start; + } + + // Generate pre-write barrier for array. + // + // Input: + // addr - register containing starting address + // count - register containing element count + // + // The input registers are overwritten. + void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) { + + BarrierSet* const bs = Universe::heap()->barrier_set(); + switch (bs->kind()) { + case BarrierSet::G1SATBCTLogging: + // With G1, don't generate the call if we statically know that the target in uninitialized. + if (!dest_uninitialized) { + // Is marking active? + Label filtered; + Register Rtmp1 = Z_R0; + const int active_offset = in_bytes(JavaThread::satb_mark_queue_offset() + + SATBMarkQueue::byte_offset_of_active()); + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ load_and_test_int(Rtmp1, Address(Z_thread, active_offset)); + } else { + guarantee(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ load_and_test_byte(Rtmp1, Address(Z_thread, active_offset)); + } + __ z_bre(filtered); // Activity indicator is zero, so there is no marking going on currently. + + // __ push_frame_abi160(0); + (void) RegisterSaver::save_live_registers(_masm, RegisterSaver::arg_registers); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), addr, count); + (void) RegisterSaver::restore_live_registers(_masm, RegisterSaver::arg_registers); + // __ pop_frame(); + + __ bind(filtered); + } + break; + case BarrierSet::CardTableForRS: + case BarrierSet::CardTableExtension: + case BarrierSet::ModRef: + break; + default: + ShouldNotReachHere(); + } + } + + // Generate post-write barrier for array. + // + // Input: + // addr - register containing starting address + // count - register containing element count + // + // The input registers are overwritten. + void gen_write_ref_array_post_barrier(Register addr, Register count, bool branchToEnd) { + BarrierSet* const bs = Universe::heap()->barrier_set(); + switch (bs->kind()) { + case BarrierSet::G1SATBCTLogging: + { + if (branchToEnd) { + // __ push_frame_abi160(0); + (void) RegisterSaver::save_live_registers(_masm, RegisterSaver::arg_registers); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count); + (void) RegisterSaver::restore_live_registers(_masm, RegisterSaver::arg_registers); + // __ pop_frame(); + } else { + // Tail call: call c and return to stub caller. + address entry_point = CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post); + if (Z_ARG1 != addr) __ z_lgr(Z_ARG1, addr); + if (Z_ARG2 != count) __ z_lgr(Z_ARG2, count); + __ load_const(Z_R1, entry_point); + __ z_br(Z_R1); // Branch without linking, callee will return to stub caller. + } + } + break; + case BarrierSet::CardTableForRS: + case BarrierSet::CardTableExtension: + // These cases formerly known as + // void array_store_check(Register addr, Register count, bool branchToEnd). + { + NearLabel doXC, done; + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + assert_different_registers(Z_R0, Z_R1, addr, count); + + // Nothing to do if count <= 0. + if (branchToEnd) { + __ compare64_and_branch(count, (intptr_t) 0, Assembler::bcondNotHigh, done); + } else { + __ z_ltgr(count, count); + __ z_bcr(Assembler::bcondNotPositive, Z_R14); + } + + // Note: We can't combine the shifts. We could lose a carry + // from calculating the array end address. + // count = (count-1)*BytesPerHeapOop + addr + // Count holds addr of last oop in array then. + __ z_sllg(count, count, LogBytesPerHeapOop); + __ add2reg_with_index(count, -BytesPerHeapOop, count, addr); + + // Get base address of card table. + __ load_const_optimized(Z_R1, (address)ct->byte_map_base); + + // count = (count>>shift) - (addr>>shift) + __ z_srlg(addr, addr, CardTableModRefBS::card_shift); + __ z_srlg(count, count, CardTableModRefBS::card_shift); + + // Prefetch first elements of card table for update. + if (VM_Version::has_Prefetch()) { + __ z_pfd(0x02, 0, addr, Z_R1); + } + + // Special case: clear just one byte. + __ clear_reg(Z_R0, true, false); // Used for doOneByte. + __ z_sgr(count, addr); // Count = n-1 now, CC used for brc below. + __ z_stc(Z_R0, 0, addr, Z_R1); // Must preserve CC from z_sgr. + if (branchToEnd) { + __ z_brz(done); + } else { + __ z_bcr(Assembler::bcondZero, Z_R14); + } + + __ z_cghi(count, 255); + __ z_brnh(doXC); + + // MVCLE: clear a long area. + // Start addr of card table range = base + addr. + // # bytes in card table range = (count + 1) + __ add2reg_with_index(Z_R0, 0, Z_R1, addr); + __ add2reg(Z_R1, 1, count); + + // dirty hack: + // There are just two callers. Both pass + // count in Z_ARG3 = Z_R4 + // addr in Z_ARG2 = Z_R3 + // ==> use Z_ARG2 as src len reg = 0 + // Z_ARG1 as src addr (ignored) + assert(count == Z_ARG3, "count: unexpected register number"); + assert(addr == Z_ARG2, "addr: unexpected register number"); + __ clear_reg(Z_ARG2, true, false); + + __ MacroAssembler::move_long_ext(Z_R0, Z_ARG1, 0); + + if (branchToEnd) { + __ z_bru(done); + } else { + __ z_bcr(Assembler::bcondAlways, Z_R14); + } + + // XC: clear a short area. + Label XC_template; // Instr template, never exec directly! + __ bind(XC_template); + __ z_xc(0, 0, addr, 0, addr); + + __ bind(doXC); + // start addr of card table range = base + addr + // end addr of card table range = base + addr + count + __ add2reg_with_index(addr, 0, Z_R1, addr); + + if (VM_Version::has_ExecuteExtensions()) { + __ z_exrl(count, XC_template); // Execute XC with var. len. + } else { + __ z_larl(Z_R1, XC_template); + __ z_ex(count, 0, Z_R0, Z_R1); // Execute XC with var. len. + } + if (!branchToEnd) { + __ z_br(Z_R14); + } + + __ bind(done); + } + break; + case BarrierSet::ModRef: + if (!branchToEnd) { __ z_br(Z_R14); } + break; + default: + ShouldNotReachHere(); + } + } + + + // This is to test that the count register contains a positive int value. + // Required because C2 does not respect int to long conversion for stub calls. + void assert_positive_int(Register count) { +#ifdef ASSERT + __ z_srag(Z_R0, count, 31); // Just leave the sign (must be zero) in Z_R0. + __ asm_assert_eq("missing zero extend", 0xAFFE); +#endif + } + + // Generate overlap test for array copy stubs. + // If no actual overlap is detected, control is transferred to the + // "normal" copy stub (entry address passed in disjoint_copy_target). + // Otherwise, execution continues with the code generated by the + // caller of array_overlap_test. + // + // Input: + // Z_ARG1 - from + // Z_ARG2 - to + // Z_ARG3 - element count + void array_overlap_test(address disjoint_copy_target, int log2_elem_size) { + __ MacroAssembler::compare_and_branch_optimized(Z_ARG2, Z_ARG1, Assembler::bcondNotHigh, + disjoint_copy_target, /*len64=*/true, /*has_sign=*/false); + + Register index = Z_ARG3; + if (log2_elem_size > 0) { + __ z_sllg(Z_R1, Z_ARG3, log2_elem_size); // byte count + index = Z_R1; + } + __ add2reg_with_index(Z_R1, 0, index, Z_ARG1); // First byte after "from" range. + + __ MacroAssembler::compare_and_branch_optimized(Z_R1, Z_ARG2, Assembler::bcondNotHigh, + disjoint_copy_target, /*len64=*/true, /*has_sign=*/false); + + // Destructive overlap: let caller generate code for that. + } + + // Generate stub for disjoint array copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments for generated stub: + // from: Z_ARG1 + // to: Z_ARG2 + // count: Z_ARG3 treated as signed + void generate_disjoint_copy(bool aligned, int element_size, + bool branchToEnd, + bool restoreArgs) { + // This is the zarch specific stub generator for general array copy tasks. + // It has the following prereqs and features: + // + // - No destructive overlap allowed (else unpredictable results). + // - Destructive overlap does not exist if the leftmost byte of the target + // does not coincide with any of the source bytes (except the leftmost). + // + // Register usage upon entry: + // Z_ARG1 == Z_R2 : address of source array + // Z_ARG2 == Z_R3 : address of target array + // Z_ARG3 == Z_R4 : length of operands (# of elements on entry) + // + // Register usage within the generator: + // - Z_R0 and Z_R1 are KILLed by the stub routine (target addr/len). + // Used as pair register operand in complex moves, scratch registers anyway. + // - Z_R5 is KILLed by the stub routine (source register pair addr/len) (even/odd reg). + // Same as R0/R1, but no scratch register. + // - Z_ARG1, Z_ARG2, Z_ARG3 are USEd but preserved by the stub routine, + // but they might get temporarily overwritten. + + Register save_reg = Z_ARG4; // (= Z_R5), holds original target operand address for restore. + + { + Register llen_reg = Z_R1; // Holds left operand len (odd reg). + Register laddr_reg = Z_R0; // Holds left operand addr (even reg), overlaps with data_reg. + Register rlen_reg = Z_R5; // Holds right operand len (odd reg), overlaps with save_reg. + Register raddr_reg = Z_R4; // Holds right operand addr (even reg), overlaps with len_reg. + + Register data_reg = Z_R0; // Holds copied data chunk in alignment process and copy loop. + Register len_reg = Z_ARG3; // Holds operand len (#elements at entry, #bytes shortly after). + Register dst_reg = Z_ARG2; // Holds left (target) operand addr. + Register src_reg = Z_ARG1; // Holds right (source) operand addr. + + Label doMVCLOOP, doMVCLOOPcount, doMVCLOOPiterate; + Label doMVCUnrolled; + NearLabel doMVC, doMVCgeneral, done; + Label MVC_template; + address pcMVCblock_b, pcMVCblock_e; + + bool usedMVCLE = true; + bool usedMVCLOOP = true; + bool usedMVCUnrolled = false; + bool usedMVC = false; + bool usedMVCgeneral = false; + + int stride; + Register stride_reg; + Register ix_reg; + + assert((element_size<=256) && (256%element_size == 0), "element size must be <= 256, power of 2"); + unsigned int log2_size = exact_log2(element_size); + + switch (element_size) { + case 1: BLOCK_COMMENT("ARRAYCOPY DISJOINT byte {"); break; + case 2: BLOCK_COMMENT("ARRAYCOPY DISJOINT short {"); break; + case 4: BLOCK_COMMENT("ARRAYCOPY DISJOINT int {"); break; + case 8: BLOCK_COMMENT("ARRAYCOPY DISJOINT long {"); break; + default: BLOCK_COMMENT("ARRAYCOPY DISJOINT {"); break; + } + + assert_positive_int(len_reg); + + BLOCK_COMMENT("preparation {"); + + // No copying if len <= 0. + if (branchToEnd) { + __ compare64_and_branch(len_reg, (intptr_t) 0, Assembler::bcondNotHigh, done); + } else { + if (VM_Version::has_CompareBranch()) { + __ z_cgib(len_reg, 0, Assembler::bcondNotHigh, 0, Z_R14); + } else { + __ z_ltgr(len_reg, len_reg); + __ z_bcr(Assembler::bcondNotPositive, Z_R14); + } + } + + // Prefetch just one cache line. Speculative opt for short arrays. + // Do not use Z_R1 in prefetch. Is undefined here. + if (VM_Version::has_Prefetch()) { + __ z_pfd(0x01, 0, Z_R0, src_reg); // Fetch access. + __ z_pfd(0x02, 0, Z_R0, dst_reg); // Store access. + } + + BLOCK_COMMENT("} preparation"); + + // Save args only if really needed. + // Keep len test local to branch. Is generated only once. + + BLOCK_COMMENT("mode selection {"); + + // Special handling for arrays with only a few elements. + // Nothing fancy: just an executed MVC. + if (log2_size > 0) { + __ z_sllg(Z_R1, len_reg, log2_size); // Remember #bytes in Z_R1. + } + if (element_size != 8) { + __ z_cghi(len_reg, 256/element_size); + __ z_brnh(doMVC); + usedMVC = true; + } + if (element_size == 8) { // Long and oop arrays are always aligned. + __ z_cghi(len_reg, 256/element_size); + __ z_brnh(doMVCUnrolled); + usedMVCUnrolled = true; + } + + // Prefetch another cache line. We, for sure, have more than one line to copy. + if (VM_Version::has_Prefetch()) { + __ z_pfd(0x01, 256, Z_R0, src_reg); // Fetch access. + __ z_pfd(0x02, 256, Z_R0, dst_reg); // Store access. + } + + if (restoreArgs) { + // Remember entry value of ARG2 to restore all arguments later from that knowledge. + __ z_lgr(save_reg, dst_reg); + } + + __ z_cghi(len_reg, 4096/element_size); + if (log2_size == 0) { + __ z_lgr(Z_R1, len_reg); // Init Z_R1 with #bytes + } + __ z_brnh(doMVCLOOP); + + // Fall through to MVCLE case. + + BLOCK_COMMENT("} mode selection"); + + // MVCLE: for long arrays + // DW aligned: Best performance for sizes > 4kBytes. + // unaligned: Least complex for sizes > 256 bytes. + if (usedMVCLE) { + BLOCK_COMMENT("mode MVCLE {"); + + // Setup registers for mvcle. + //__ z_lgr(llen_reg, len_reg);// r1 <- r4 #bytes already in Z_R1, aka llen_reg. + __ z_lgr(laddr_reg, dst_reg); // r0 <- r3 + __ z_lgr(raddr_reg, src_reg); // r4 <- r2 + __ z_lgr(rlen_reg, llen_reg); // r5 <- r1 + + __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0xb0); // special: bypass cache + // __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0xb8); // special: Hold data in cache. + // __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0); + + if (restoreArgs) { + // MVCLE updates the source (Z_R4,Z_R5) and target (Z_R0,Z_R1) register pairs. + // Dst_reg (Z_ARG2) and src_reg (Z_ARG1) are left untouched. No restore required. + // Len_reg (Z_ARG3) is destroyed and must be restored. + __ z_slgr(laddr_reg, dst_reg); // copied #bytes + if (log2_size > 0) { + __ z_srag(Z_ARG3, laddr_reg, log2_size); // Convert back to #elements. + } else { + __ z_lgr(Z_ARG3, laddr_reg); + } + } + if (branchToEnd) { + __ z_bru(done); + } else { + __ z_br(Z_R14); + } + BLOCK_COMMENT("} mode MVCLE"); + } + // No fallthru possible here. + + // MVCUnrolled: for short, aligned arrays. + + if (usedMVCUnrolled) { + BLOCK_COMMENT("mode MVC unrolled {"); + stride = 8; + + // Generate unrolled MVC instructions. + for (int ii = 32; ii > 1; ii--) { + __ z_mvc(0, ii * stride-1, dst_reg, 0, src_reg); // ii*8 byte copy + if (branchToEnd) { + __ z_bru(done); + } else { + __ z_br(Z_R14); + } + } + + pcMVCblock_b = __ pc(); + __ z_mvc(0, 1 * stride-1, dst_reg, 0, src_reg); // 8 byte copy + if (branchToEnd) { + __ z_bru(done); + } else { + __ z_br(Z_R14); + } + + pcMVCblock_e = __ pc(); + Label MVC_ListEnd; + __ bind(MVC_ListEnd); + + // This is an absolute fast path: + // - Array len in bytes must be not greater than 256. + // - Array len in bytes must be an integer mult of DW + // to save expensive handling of trailing bytes. + // - Argument restore is not done, + // i.e. previous code must not alter arguments (this code doesn't either). + + __ bind(doMVCUnrolled); + + // Avoid mul, prefer shift where possible. + // Combine shift right (for #DW) with shift left (for block size). + // Set CC for zero test below (asm_assert). + // Note: #bytes comes in Z_R1, #DW in len_reg. + unsigned int MVCblocksize = pcMVCblock_e - pcMVCblock_b; + unsigned int logMVCblocksize = 0xffffffffU; // Pacify compiler ("used uninitialized" warning). + + if (log2_size > 0) { // Len was scaled into Z_R1. + switch (MVCblocksize) { + + case 8: logMVCblocksize = 3; + __ z_ltgr(Z_R0, Z_R1); // #bytes is index + break; // reasonable size, use shift + + case 16: logMVCblocksize = 4; + __ z_slag(Z_R0, Z_R1, logMVCblocksize-log2_size); + break; // reasonable size, use shift + + default: logMVCblocksize = 0; + __ z_ltgr(Z_R0, len_reg); // #DW for mul + break; // all other sizes: use mul + } + } else { + guarantee(log2_size, "doMVCUnrolled: only for DW entities"); + } + + // This test (and branch) is redundant. Previous code makes sure that + // - element count > 0 + // - element size == 8. + // Thus, len reg should never be zero here. We insert an asm_assert() here, + // just to double-check and to be on the safe side. + __ asm_assert(false, "zero len cannot occur", 99); + + __ z_larl(Z_R1, MVC_ListEnd); // Get addr of last instr block. + // Avoid mul, prefer shift where possible. + if (logMVCblocksize == 0) { + __ z_mghi(Z_R0, MVCblocksize); + } + __ z_slgr(Z_R1, Z_R0); + __ z_br(Z_R1); + BLOCK_COMMENT("} mode MVC unrolled"); + } + // No fallthru possible here. + + // MVC execute template + // Must always generate. Usage may be switched on below. + // There is no suitable place after here to put the template. + __ bind(MVC_template); + __ z_mvc(0,0,dst_reg,0,src_reg); // Instr template, never exec directly! + + + // MVC Loop: for medium-sized arrays + + // Only for DW aligned arrays (src and dst). + // #bytes to copy must be at least 256!!! + // Non-aligned cases handled separately. + stride = 256; + stride_reg = Z_R1; // Holds #bytes when control arrives here. + ix_reg = Z_ARG3; // Alias for len_reg. + + + if (usedMVCLOOP) { + BLOCK_COMMENT("mode MVC loop {"); + __ bind(doMVCLOOP); + + __ z_lcgr(ix_reg, Z_R1); // Ix runs from -(n-2)*stride to 1*stride (inclusive). + __ z_llill(stride_reg, stride); + __ add2reg(ix_reg, 2*stride); // Thus: increment ix by 2*stride. + + __ bind(doMVCLOOPiterate); + __ z_mvc(0, stride-1, dst_reg, 0, src_reg); + __ add2reg(dst_reg, stride); + __ add2reg(src_reg, stride); + __ bind(doMVCLOOPcount); + __ z_brxlg(ix_reg, stride_reg, doMVCLOOPiterate); + + // Don 't use add2reg() here, since we must set the condition code! + __ z_aghi(ix_reg, -2*stride); // Compensate incr from above: zero diff means "all copied". + + if (restoreArgs) { + __ z_lcgr(Z_R1, ix_reg); // Prepare ix_reg for copy loop, #bytes expected in Z_R1. + __ z_brnz(doMVCgeneral); // We're not done yet, ix_reg is not zero. + + // ARG1, ARG2, and ARG3 were altered by the code above, so restore them building on save_reg. + __ z_slgr(dst_reg, save_reg); // copied #bytes + __ z_slgr(src_reg, dst_reg); // = ARG1 (now restored) + if (log2_size) { + __ z_srag(Z_ARG3, dst_reg, log2_size); // Convert back to #elements to restore ARG3. + } else { + __ z_lgr(Z_ARG3, dst_reg); + } + __ z_lgr(Z_ARG2, save_reg); // ARG2 now restored. + + if (branchToEnd) { + __ z_bru(done); + } else { + __ z_br(Z_R14); + } + + } else { + if (branchToEnd) { + __ z_brz(done); // CC set by aghi instr. + } else { + __ z_bcr(Assembler::bcondZero, Z_R14); // We're all done if zero. + } + + __ z_lcgr(Z_R1, ix_reg); // Prepare ix_reg for copy loop, #bytes expected in Z_R1. + // __ z_bru(doMVCgeneral); // fallthru + } + usedMVCgeneral = true; + BLOCK_COMMENT("} mode MVC loop"); + } + // Fallthru to doMVCgeneral + + // MVCgeneral: for short, unaligned arrays, after other copy operations + + // Somewhat expensive due to use of EX instruction, but simple. + if (usedMVCgeneral) { + BLOCK_COMMENT("mode MVC general {"); + __ bind(doMVCgeneral); + + __ add2reg(len_reg, -1, Z_R1); // Get #bytes-1 for EXECUTE. + if (VM_Version::has_ExecuteExtensions()) { + __ z_exrl(len_reg, MVC_template); // Execute MVC with variable length. + } else { + __ z_larl(Z_R1, MVC_template); // Get addr of instr template. + __ z_ex(len_reg, 0, Z_R0, Z_R1); // Execute MVC with variable length. + } // penalty: 9 ticks + + if (restoreArgs) { + // ARG1, ARG2, and ARG3 were altered by code executed before, so restore them building on save_reg + __ z_slgr(dst_reg, save_reg); // Copied #bytes without the "doMVCgeneral" chunk + __ z_slgr(src_reg, dst_reg); // = ARG1 (now restored), was not advanced for "doMVCgeneral" chunk + __ add2reg_with_index(dst_reg, 1, len_reg, dst_reg); // Len of executed MVC was not accounted for, yet. + if (log2_size) { + __ z_srag(Z_ARG3, dst_reg, log2_size); // Convert back to #elements to restore ARG3 + } else { + __ z_lgr(Z_ARG3, dst_reg); + } + __ z_lgr(Z_ARG2, save_reg); // ARG2 now restored. + } + + if (usedMVC) { + if (branchToEnd) { + __ z_bru(done); + } else { + __ z_br(Z_R14); + } + } else { + if (!branchToEnd) __ z_br(Z_R14); + } + BLOCK_COMMENT("} mode MVC general"); + } + // Fallthru possible if following block not generated. + + // MVC: for short, unaligned arrays + + // Somewhat expensive due to use of EX instruction, but simple. penalty: 9 ticks. + // Differs from doMVCgeneral in reconstruction of ARG2, ARG3, and ARG4. + if (usedMVC) { + BLOCK_COMMENT("mode MVC {"); + __ bind(doMVC); + + // get #bytes-1 for EXECUTE + if (log2_size) { + __ add2reg(Z_R1, -1); // Length was scaled into Z_R1. + } else { + __ add2reg(Z_R1, -1, len_reg); // Length was not scaled. + } + + if (VM_Version::has_ExecuteExtensions()) { + __ z_exrl(Z_R1, MVC_template); // Execute MVC with variable length. + } else { + __ z_lgr(Z_R0, Z_R5); // Save ARG4, may be unnecessary. + __ z_larl(Z_R5, MVC_template); // Get addr of instr template. + __ z_ex(Z_R1, 0, Z_R0, Z_R5); // Execute MVC with variable length. + __ z_lgr(Z_R5, Z_R0); // Restore ARG4, may be unnecessary. + } + + if (!branchToEnd) { + __ z_br(Z_R14); + } + BLOCK_COMMENT("} mode MVC"); + } + + __ bind(done); + + switch (element_size) { + case 1: BLOCK_COMMENT("} ARRAYCOPY DISJOINT byte "); break; + case 2: BLOCK_COMMENT("} ARRAYCOPY DISJOINT short"); break; + case 4: BLOCK_COMMENT("} ARRAYCOPY DISJOINT int "); break; + case 8: BLOCK_COMMENT("} ARRAYCOPY DISJOINT long "); break; + default: BLOCK_COMMENT("} ARRAYCOPY DISJOINT "); break; + } + } + } + + // Generate stub for conjoint array copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments for generated stub: + // from: Z_ARG1 + // to: Z_ARG2 + // count: Z_ARG3 treated as signed + void generate_conjoint_copy(bool aligned, int element_size, bool branchToEnd) { + + // This is the zarch specific stub generator for general array copy tasks. + // It has the following prereqs and features: + // + // - Destructive overlap exists and is handled by reverse copy. + // - Destructive overlap exists if the leftmost byte of the target + // does coincide with any of the source bytes (except the leftmost). + // - Z_R0 and Z_R1 are KILLed by the stub routine (data and stride) + // - Z_ARG1 and Z_ARG2 are USEd but preserved by the stub routine. + // - Z_ARG3 is USED but preserved by the stub routine. + // - Z_ARG4 is used as index register and is thus KILLed. + // + { + Register stride_reg = Z_R1; // Stride & compare value in loop (negative element_size). + Register data_reg = Z_R0; // Holds value of currently processed element. + Register ix_reg = Z_ARG4; // Holds byte index of currently processed element. + Register len_reg = Z_ARG3; // Holds length (in #elements) of arrays. + Register dst_reg = Z_ARG2; // Holds left operand addr. + Register src_reg = Z_ARG1; // Holds right operand addr. + + assert(256%element_size == 0, "Element size must be power of 2."); + assert(element_size <= 8, "Can't handle more than DW units."); + + switch (element_size) { + case 1: BLOCK_COMMENT("ARRAYCOPY CONJOINT byte {"); break; + case 2: BLOCK_COMMENT("ARRAYCOPY CONJOINT short {"); break; + case 4: BLOCK_COMMENT("ARRAYCOPY CONJOINT int {"); break; + case 8: BLOCK_COMMENT("ARRAYCOPY CONJOINT long {"); break; + default: BLOCK_COMMENT("ARRAYCOPY CONJOINT {"); break; + } + + assert_positive_int(len_reg); + + if (VM_Version::has_Prefetch()) { + __ z_pfd(0x01, 0, Z_R0, src_reg); // Fetch access. + __ z_pfd(0x02, 0, Z_R0, dst_reg); // Store access. + } + + unsigned int log2_size = exact_log2(element_size); + if (log2_size) { + __ z_sllg(ix_reg, len_reg, log2_size); + } else { + __ z_lgr(ix_reg, len_reg); + } + + // Optimize reverse copy loop. + // Main loop copies DW units which may be unaligned. Unaligned access adds some penalty ticks. + // Unaligned DW access (neither fetch nor store) is DW-atomic, but should be alignment-atomic. + // Preceding the main loop, some bytes are copied to obtain a DW-multiple remaining length. + + Label countLoop1; + Label copyLoop1; + Label skipBY; + Label skipHW; + int stride = -8; + + __ load_const_optimized(stride_reg, stride); // Prepare for DW copy loop. + + if (element_size == 8) // Nothing to do here. + __ z_bru(countLoop1); + else { // Do not generate dead code. + __ z_tmll(ix_reg, 7); // Check the "odd" bits. + __ z_bre(countLoop1); // There are none, very good! + } + + if (log2_size == 0) { // Handle leftover Byte. + __ z_tmll(ix_reg, 1); + __ z_bre(skipBY); + __ z_lb(data_reg, -1, ix_reg, src_reg); + __ z_stcy(data_reg, -1, ix_reg, dst_reg); + __ add2reg(ix_reg, -1); // Decrement delayed to avoid AGI. + __ bind(skipBY); + // fallthru + } + if (log2_size <= 1) { // Handle leftover HW. + __ z_tmll(ix_reg, 2); + __ z_bre(skipHW); + __ z_lhy(data_reg, -2, ix_reg, src_reg); + __ z_sthy(data_reg, -2, ix_reg, dst_reg); + __ add2reg(ix_reg, -2); // Decrement delayed to avoid AGI. + __ bind(skipHW); + __ z_tmll(ix_reg, 4); + __ z_bre(countLoop1); + // fallthru + } + if (log2_size <= 2) { // There are just 4 bytes (left) that need to be copied. + __ z_ly(data_reg, -4, ix_reg, src_reg); + __ z_sty(data_reg, -4, ix_reg, dst_reg); + __ add2reg(ix_reg, -4); // Decrement delayed to avoid AGI. + __ z_bru(countLoop1); + } + + // Control can never get to here. Never! Never ever! + __ z_illtrap(0x99); + __ bind(copyLoop1); + __ z_lg(data_reg, 0, ix_reg, src_reg); + __ z_stg(data_reg, 0, ix_reg, dst_reg); + __ bind(countLoop1); + __ z_brxhg(ix_reg, stride_reg, copyLoop1); + + if (!branchToEnd) + __ z_br(Z_R14); + + switch (element_size) { + case 1: BLOCK_COMMENT("} ARRAYCOPY CONJOINT byte "); break; + case 2: BLOCK_COMMENT("} ARRAYCOPY CONJOINT short"); break; + case 4: BLOCK_COMMENT("} ARRAYCOPY CONJOINT int "); break; + case 8: BLOCK_COMMENT("} ARRAYCOPY CONJOINT long "); break; + default: BLOCK_COMMENT("} ARRAYCOPY CONJOINT "); break; + } + } + } + + // Generate stub for disjoint byte copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + address generate_disjoint_byte_copy(bool aligned, const char * name) { + StubCodeMark mark(this, "StubRoutines", name); + + // This is the zarch specific stub generator for byte array copy. + // Refer to generate_disjoint_copy for a list of prereqs and features: + unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + generate_disjoint_copy(aligned, 1, false, false); + return __ addr_at(start_off); + } + + + address generate_disjoint_short_copy(bool aligned, const char * name) { + StubCodeMark mark(this, "StubRoutines", name); + // This is the zarch specific stub generator for short array copy. + // Refer to generate_disjoint_copy for a list of prereqs and features: + unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + generate_disjoint_copy(aligned, 2, false, false); + return __ addr_at(start_off); + } + + + address generate_disjoint_int_copy(bool aligned, const char * name) { + StubCodeMark mark(this, "StubRoutines", name); + // This is the zarch specific stub generator for int array copy. + // Refer to generate_disjoint_copy for a list of prereqs and features: + unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + generate_disjoint_copy(aligned, 4, false, false); + return __ addr_at(start_off); + } + + + address generate_disjoint_long_copy(bool aligned, const char * name) { + StubCodeMark mark(this, "StubRoutines", name); + // This is the zarch specific stub generator for long array copy. + // Refer to generate_disjoint_copy for a list of prereqs and features: + unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + generate_disjoint_copy(aligned, 8, false, false); + return __ addr_at(start_off); + } + + + address generate_disjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) { + StubCodeMark mark(this, "StubRoutines", name); + // This is the zarch specific stub generator for oop array copy. + // Refer to generate_disjoint_copy for a list of prereqs and features. + unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + unsigned int size = UseCompressedOops ? 4 : 8; + + gen_write_ref_array_pre_barrier(Z_ARG2, Z_ARG3, dest_uninitialized); + + generate_disjoint_copy(aligned, size, true, true); + + gen_write_ref_array_post_barrier(Z_ARG2, Z_ARG3, false); + + return __ addr_at(start_off); + } + + + address generate_conjoint_byte_copy(bool aligned, const char * name) { + StubCodeMark mark(this, "StubRoutines", name); + // This is the zarch specific stub generator for overlapping byte array copy. + // Refer to generate_conjoint_copy for a list of prereqs and features: + unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + address nooverlap_target = aligned ? StubRoutines::arrayof_jbyte_disjoint_arraycopy() + : StubRoutines::jbyte_disjoint_arraycopy(); + + array_overlap_test(nooverlap_target, 0); // Branch away to nooverlap_target if disjoint. + generate_conjoint_copy(aligned, 1, false); + + return __ addr_at(start_off); + } + + + address generate_conjoint_short_copy(bool aligned, const char * name) { + StubCodeMark mark(this, "StubRoutines", name); + // This is the zarch specific stub generator for overlapping short array copy. + // Refer to generate_conjoint_copy for a list of prereqs and features: + unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + address nooverlap_target = aligned ? StubRoutines::arrayof_jshort_disjoint_arraycopy() + : StubRoutines::jshort_disjoint_arraycopy(); + + array_overlap_test(nooverlap_target, 1); // Branch away to nooverlap_target if disjoint. + generate_conjoint_copy(aligned, 2, false); + + return __ addr_at(start_off); + } + + address generate_conjoint_int_copy(bool aligned, const char * name) { + StubCodeMark mark(this, "StubRoutines", name); + // This is the zarch specific stub generator for overlapping int array copy. + // Refer to generate_conjoint_copy for a list of prereqs and features: + + unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + address nooverlap_target = aligned ? StubRoutines::arrayof_jint_disjoint_arraycopy() + : StubRoutines::jint_disjoint_arraycopy(); + + array_overlap_test(nooverlap_target, 2); // Branch away to nooverlap_target if disjoint. + generate_conjoint_copy(aligned, 4, false); + + return __ addr_at(start_off); + } + + address generate_conjoint_long_copy(bool aligned, const char * name) { + StubCodeMark mark(this, "StubRoutines", name); + // This is the zarch specific stub generator for overlapping long array copy. + // Refer to generate_conjoint_copy for a list of prereqs and features: + + unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + address nooverlap_target = aligned ? StubRoutines::arrayof_jlong_disjoint_arraycopy() + : StubRoutines::jlong_disjoint_arraycopy(); + + array_overlap_test(nooverlap_target, 3); // Branch away to nooverlap_target if disjoint. + generate_conjoint_copy(aligned, 8, false); + + return __ addr_at(start_off); + } + + address generate_conjoint_oop_copy(bool aligned, const char * name, bool dest_uninitialized) { + StubCodeMark mark(this, "StubRoutines", name); + // This is the zarch specific stub generator for overlapping oop array copy. + // Refer to generate_conjoint_copy for a list of prereqs and features. + unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + unsigned int size = UseCompressedOops ? 4 : 8; + unsigned int shift = UseCompressedOops ? 2 : 3; + + address nooverlap_target = aligned ? StubRoutines::arrayof_oop_disjoint_arraycopy(dest_uninitialized) + : StubRoutines::oop_disjoint_arraycopy(dest_uninitialized); + + // Branch to disjoint_copy (if applicable) before pre_barrier to avoid double pre_barrier. + array_overlap_test(nooverlap_target, shift); // Branch away to nooverlap_target if disjoint. + + gen_write_ref_array_pre_barrier(Z_ARG2, Z_ARG3, dest_uninitialized); + + generate_conjoint_copy(aligned, size, true); // Must preserve ARG2, ARG3. + + gen_write_ref_array_post_barrier(Z_ARG2, Z_ARG3, false); + + return __ addr_at(start_off); + } + + + void generate_arraycopy_stubs() { + + // Note: the disjoint stubs must be generated first, some of + // the conjoint stubs use them. + StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy (false, "jbyte_disjoint_arraycopy"); + StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); + StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy (false, "jint_disjoint_arraycopy"); + StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy (false, "jlong_disjoint_arraycopy"); + StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy (false, "oop_disjoint_arraycopy", false); + StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy (false, "oop_disjoint_arraycopy_uninit", true); + + StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy (true, "arrayof_jbyte_disjoint_arraycopy"); + StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, "arrayof_jshort_disjoint_arraycopy"); + StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy (true, "arrayof_jint_disjoint_arraycopy"); + StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy (true, "arrayof_jlong_disjoint_arraycopy"); + StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy (true, "arrayof_oop_disjoint_arraycopy", false); + StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy (true, "arrayof_oop_disjoint_arraycopy_uninit", true); + + StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy (false, "jbyte_arraycopy"); + StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy"); + StubRoutines::_jint_arraycopy = generate_conjoint_int_copy (false, "jint_arraycopy"); + StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy (false, "jlong_arraycopy"); + StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy (false, "oop_arraycopy", false); + StubRoutines::_oop_arraycopy_uninit = generate_conjoint_oop_copy (false, "oop_arraycopy_uninit", true); + + StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy (true, "arrayof_jbyte_arraycopy"); + StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, "arrayof_jshort_arraycopy"); + StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy (true, "arrayof_jint_arraycopy"); + StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy (true, "arrayof_jlong_arraycopy"); + StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_oop_copy (true, "arrayof_oop_arraycopy", false); + StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy (true, "arrayof_oop_arraycopy_uninit", true); + } + + void generate_safefetch(const char* name, int size, address* entry, address* fault_pc, address* continuation_pc) { + + // safefetch signatures: + // int SafeFetch32(int* adr, int errValue); + // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); + // + // arguments: + // Z_ARG1 = adr + // Z_ARG2 = errValue + // + // result: + // Z_RET = *adr or errValue + + StubCodeMark mark(this, "StubRoutines", name); + + // entry point + // Load *adr into Z_ARG2, may fault. + *entry = *fault_pc = __ pc(); + switch (size) { + case 4: + // Sign extended int32_t. + __ z_lgf(Z_ARG2, 0, Z_ARG1); + break; + case 8: + // int64_t + __ z_lg(Z_ARG2, 0, Z_ARG1); + break; + default: + ShouldNotReachHere(); + } + + // Return errValue or *adr. + *continuation_pc = __ pc(); + __ z_lgr(Z_RET, Z_ARG2); + __ z_br(Z_R14); + + } + + // Call interface for AES_encryptBlock, AES_decryptBlock stubs. + // + // Z_ARG1 - source data block. Ptr to leftmost byte to be processed. + // Z_ARG2 - destination data block. Ptr to leftmost byte to be stored. + // For in-place encryption/decryption, ARG1 and ARG2 can point + // to the same piece of storage. + // Z_ARG3 - Crypto key address (expanded key). The first n bits of + // the expanded key constitute the original AES- key (see below). + // + // Z_RET - return value. First unprocessed byte offset in src buffer. + // + // Some remarks: + // The crypto key, as passed from the caller to these encryption stubs, + // is a so-called expanded key. It is derived from the original key + // by the Rijndael key schedule, see http://en.wikipedia.org/wiki/Rijndael_key_schedule + // With the expanded key, the cipher/decipher task is decomposed in + // multiple, less complex steps, called rounds. Sun SPARC and Intel + // processors obviously implement support for those less complex steps. + // z/Architecture provides instructions for full cipher/decipher complexity. + // Therefore, we need the original, not the expanded key here. + // Luckily, the first n bits of an AES- expanded key are formed + // by the original key itself. That takes us out of trouble. :-) + // The key length (in bytes) relation is as follows: + // original expanded rounds key bit keylen + // key bytes key bytes length in words + // 16 176 11 128 44 + // 24 208 13 192 52 + // 32 240 15 256 60 + // + // The crypto instructions used in the AES* stubs have some specific register requirements. + // Z_R0 holds the crypto function code. Please refer to the KM/KMC instruction + // description in the "z/Architecture Principles of Operation" manual for details. + // Z_R1 holds the parameter block address. The parameter block contains the cryptographic key + // (KM instruction) and the chaining value (KMC instruction). + // dst must designate an even-numbered register, holding the address of the output message. + // src must designate an even/odd register pair, holding the address/length of the original message + + // Helper function which generates code to + // - load the function code in register fCode (== Z_R0) + // - load the data block length (depends on cipher function) in register srclen if requested. + // - is_decipher switches between cipher/decipher function codes + // - set_len requests (if true) loading the data block length in register srclen + void generate_load_AES_fCode(Register keylen, Register fCode, Register srclen, bool is_decipher) { + + BLOCK_COMMENT("Set fCode {"); { + Label fCode_set; + int mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher; + bool identical_dataBlk_len = (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES192_dataBlk) + && (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES256_dataBlk); + // Expanded key length is 44/52/60 * 4 bytes for AES-128/AES-192/AES-256. + __ z_cghi(keylen, 52); + __ z_lghi(fCode, VM_Version::Cipher::_AES256 + mode); + if (!identical_dataBlk_len) { + __ z_lghi(srclen, VM_Version::Cipher::_AES256_dataBlk); + } + __ z_brh(fCode_set); // keyLen > 52: AES256 + + __ z_lghi(fCode, VM_Version::Cipher::_AES192 + mode); + if (!identical_dataBlk_len) { + __ z_lghi(srclen, VM_Version::Cipher::_AES192_dataBlk); + } + __ z_bre(fCode_set); // keyLen == 52: AES192 + + __ z_lghi(fCode, VM_Version::Cipher::_AES128 + mode); + if (!identical_dataBlk_len) { + __ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk); + } + // __ z_brl(fCode_set); // keyLen < 52: AES128 // fallthru + __ bind(fCode_set); + if (identical_dataBlk_len) { + __ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk); + } + } + BLOCK_COMMENT("} Set fCode"); + } + + // Push a parameter block for the cipher/decipher instruction on the stack. + // NOTE: + // Before returning, the stub has to copy the chaining value from + // the parmBlk, where it was updated by the crypto instruction, back + // to the chaining value array the address of which was passed in the cv argument. + // As all the available registers are used and modified by KMC, we need to save + // the key length across the KMC instruction. We do so by spilling it to the stack, + // just preceding the parmBlk (at (parmBlk - 8)). + void generate_push_parmBlk(Register keylen, Register fCode, Register parmBlk, Register key, Register cv, bool is_decipher) { + const int AES_parmBlk_align = 32; + const int AES_parmBlk_addspace = AES_parmBlk_align; // Must be multiple of AES_parmblk_align. + int cv_len, key_len; + int mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher; + Label parmBlk_128, parmBlk_192, parmBlk_256, parmBlk_set; + + BLOCK_COMMENT("push parmBlk {"); + if (VM_Version::has_Crypto_AES() ) { __ z_cghi(keylen, 52); } + if (VM_Version::has_Crypto_AES256()) { __ z_brh(parmBlk_256); } // keyLen > 52: AES256 + if (VM_Version::has_Crypto_AES192()) { __ z_bre(parmBlk_192); } // keyLen == 52: AES192 + if (VM_Version::has_Crypto_AES128()) { __ z_brl(parmBlk_128); } // keyLen < 52: AES128 + + // Security net: requested AES function not available on this CPU. + // NOTE: + // As of now (March 2015), this safety net is not required. JCE policy files limit the + // cryptographic strength of the keys used to 128 bit. If we have AES hardware support + // at all, we have at least AES-128. + __ stop_static("AES key strength not supported by CPU. Use -XX:-UseAES as remedy.", 0); + + if (VM_Version::has_Crypto_AES128()) { + __ bind(parmBlk_128); + cv_len = VM_Version::Cipher::_AES128_dataBlk; + key_len = VM_Version::Cipher::_AES128_parmBlk_C - cv_len; + __ z_lay(parmBlk, -(VM_Version::Cipher::_AES128_parmBlk_C+AES_parmBlk_align)+(AES_parmBlk_align-1), Z_SP); + __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff); // align parameter block + + // Resize the frame to accommodate for the aligned parameter block and other stuff. + // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace, parmBlk). + __ z_stg(keylen, -8, parmBlk); // Spill keylen for later use. + __ z_stg(Z_SP, -16, parmBlk); // Spill SP for easy revert. + __ z_aghi(parmBlk, -AES_parmBlk_addspace); // Additional space for keylen, etc.. + __ resize_frame_absolute(parmBlk, keylen, true); // Resize frame with parmBlk being the new SP. + __ z_aghi(parmBlk, AES_parmBlk_addspace); // Restore parameter block address. + + __ z_mvc(0, cv_len-1, parmBlk, 0, cv); // Copy cv. + __ z_mvc(cv_len, key_len-1, parmBlk, 0, key); // Copy key. + __ z_lghi(fCode, VM_Version::Cipher::_AES128 + mode); + if (VM_Version::has_Crypto_AES192() || VM_Version::has_Crypto_AES256()) { + __ z_bru(parmBlk_set); // Fallthru otherwise. + } + } + + if (VM_Version::has_Crypto_AES192()) { + __ bind(parmBlk_192); + cv_len = VM_Version::Cipher::_AES192_dataBlk; + key_len = VM_Version::Cipher::_AES192_parmBlk_C - cv_len; + __ z_lay(parmBlk, -(VM_Version::Cipher::_AES192_parmBlk_C+AES_parmBlk_align)+(AES_parmBlk_align-1), Z_SP); + __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff); // Align parameter block. + + // Resize the frame to accommodate for the aligned parameter block and other stuff. + // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace, parmBlk). + __ z_stg(keylen, -8, parmBlk); // Spill keylen for later use. + __ z_stg(Z_SP, -16, parmBlk); // Spill SP for easy revert. + __ z_aghi(parmBlk, -AES_parmBlk_addspace); // Additional space for keylen, etc.. + __ resize_frame_absolute(parmBlk, keylen, true); // Resize frame with parmBlk being the new SP. + __ z_aghi(parmBlk, AES_parmBlk_addspace); // Restore parameter block address. + + __ z_mvc(0, cv_len-1, parmBlk, 0, cv); // Copy cv. + __ z_mvc(cv_len, key_len-1, parmBlk, 0, key); // Copy key. + __ z_lghi(fCode, VM_Version::Cipher::_AES192 + mode); + if (VM_Version::has_Crypto_AES256()) { + __ z_bru(parmBlk_set); // Fallthru otherwise. + } + } + + if (VM_Version::has_Crypto_AES256()) { + __ bind(parmBlk_256); + cv_len = VM_Version::Cipher::_AES256_dataBlk; + key_len = VM_Version::Cipher::_AES256_parmBlk_C - cv_len; + __ z_lay(parmBlk, -(VM_Version::Cipher::_AES256_parmBlk_C+AES_parmBlk_align)+(AES_parmBlk_align-1), Z_SP); + __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff); // Align parameter block. + + // Resize the frame to accommodate for the aligned parameter block and other stuff. + // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace, parmBlk). + __ z_stg(keylen, -8, parmBlk); // Spill keylen for later use. + __ z_stg(Z_SP, -16, parmBlk); // Spill SP for easy revert. + __ z_aghi(parmBlk, -AES_parmBlk_addspace); // Additional space for keylen, etc.. + __ resize_frame_absolute(parmBlk, keylen, true); // Resize frame with parmBlk being the new SP. + __ z_aghi(parmBlk, AES_parmBlk_addspace); // Restore parameter block address. + + __ z_mvc(0, cv_len-1, parmBlk, 0, cv); // Copy cv. + __ z_mvc(cv_len, key_len-1, parmBlk, 0, key); // Copy key. + __ z_lghi(fCode, VM_Version::Cipher::_AES256 + mode); + // __ z_bru(parmBlk_set); // fallthru + } + + __ bind(parmBlk_set); + BLOCK_COMMENT("} push parmBlk"); + } + + // Pop a parameter block from the stack. The chaining value portion of the parameter block + // is copied back to the cv array as it is needed for subsequent cipher steps. + // The keylen value as well as the original SP (before resizing) was pushed to the stack + // when pushing the parameter block. + void generate_pop_parmBlk(Register keylen, Register parmBlk, Register key, Register cv) { + + BLOCK_COMMENT("pop parmBlk {"); + bool identical_dataBlk_len = (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES192_dataBlk) && + (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES256_dataBlk); + if (identical_dataBlk_len) { + int cv_len = VM_Version::Cipher::_AES128_dataBlk; + __ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv. + } else { + int cv_len; + Label parmBlk_128, parmBlk_192, parmBlk_256, parmBlk_set; + __ z_lg(keylen, -8, parmBlk); // restore keylen + __ z_cghi(keylen, 52); + if (VM_Version::has_Crypto_AES256()) __ z_brh(parmBlk_256); // keyLen > 52: AES256 + if (VM_Version::has_Crypto_AES192()) __ z_bre(parmBlk_192); // keyLen == 52: AES192 + // if (VM_Version::has_Crypto_AES128()) __ z_brl(parmBlk_128); // keyLen < 52: AES128 // fallthru + + // Security net: there is no one here. If we would need it, we should have + // fallen into it already when pushing the parameter block. + if (VM_Version::has_Crypto_AES128()) { + __ bind(parmBlk_128); + cv_len = VM_Version::Cipher::_AES128_dataBlk; + __ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv. + if (VM_Version::has_Crypto_AES192() || VM_Version::has_Crypto_AES256()) { + __ z_bru(parmBlk_set); + } + } + + if (VM_Version::has_Crypto_AES192()) { + __ bind(parmBlk_192); + cv_len = VM_Version::Cipher::_AES192_dataBlk; + __ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv. + if (VM_Version::has_Crypto_AES256()) { + __ z_bru(parmBlk_set); + } + } + + if (VM_Version::has_Crypto_AES256()) { + __ bind(parmBlk_256); + cv_len = VM_Version::Cipher::_AES256_dataBlk; + __ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv. + // __ z_bru(parmBlk_set); // fallthru + } + __ bind(parmBlk_set); + } + __ z_lg(Z_SP, -16, parmBlk); // Revert resize_frame_absolute. + BLOCK_COMMENT("} pop parmBlk"); + } + + // Compute AES encrypt function. + address generate_AES_encryptBlock(const char* name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + + Register from = Z_ARG1; // source byte array + Register to = Z_ARG2; // destination byte array + Register key = Z_ARG3; // expanded key array + + const Register keylen = Z_R0; // Temporarily (until fCode is set) holds the expanded key array length. + const Register fCode = Z_R0; // crypto function code + const Register parmBlk = Z_R1; // parameter block address (points to crypto key) + const Register src = Z_ARG1; // is Z_R2 + const Register srclen = Z_ARG2; // Overwrites destination address. + const Register dst = Z_ARG3; // Overwrites expanded key address. + + // Read key len of expanded key (in 4-byte words). + __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + // Copy arguments to registers as required by crypto instruction. + __ z_lgr(parmBlk, key); // crypto key (in T_INT array). + // __ z_lgr(src, from); // Copy not needed, src/from are identical. + __ z_lgr(dst, to); // Copy destination address to even register. + + // Construct function code in Z_R0, data block length in Z_ARG2. + generate_load_AES_fCode(keylen, fCode, srclen, false); + + __ km(dst, src); // Cipher the message. + + __ z_br(Z_R14); + + return __ addr_at(start_off); + } + + // Compute AES decrypt function. + address generate_AES_decryptBlock(const char* name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + + Register from = Z_ARG1; // source byte array + Register to = Z_ARG2; // destination byte array + Register key = Z_ARG3; // expanded key array, not preset at entry!!! + + const Register keylen = Z_R0; // Temporarily (until fCode is set) holds the expanded key array length. + const Register fCode = Z_R0; // crypto function code + const Register parmBlk = Z_R1; // parameter block address (points to crypto key) + const Register src = Z_ARG1; // is Z_R2 + const Register srclen = Z_ARG2; // Overwrites destination address. + const Register dst = Z_ARG3; // Overwrites key address. + + // Read key len of expanded key (in 4-byte words). + __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + // Copy arguments to registers as required by crypto instruction. + __ z_lgr(parmBlk, key); // Copy crypto key address. + // __ z_lgr(src, from); // Copy not needed, src/from are identical. + __ z_lgr(dst, to); // Copy destination address to even register. + + // Construct function code in Z_R0, data block length in Z_ARG2. + generate_load_AES_fCode(keylen, fCode, srclen, true); + + __ km(dst, src); // Cipher the message. + + __ z_br(Z_R14); + + return __ addr_at(start_off); + } + + // These stubs receive the addresses of the cryptographic key and of the chaining value as two separate + // arguments (registers "key" and "cv", respectively). The KMC instruction, on the other hand, requires + // chaining value and key to be, in this sequence, adjacent in storage. Thus, we need to allocate some + // thread-local working storage. Using heap memory incurs all the hassles of allocating/freeing. + // Stack space, on the contrary, is deallocated automatically when we return from the stub to the caller. + // *** WARNING *** + // Please note that we do not formally allocate stack space, nor do we + // update the stack pointer. Therefore, no function calls are allowed + // and nobody else must use the stack range where the parameter block + // is located. + // We align the parameter block to the next available octoword. + // + // Compute chained AES encrypt function. + address generate_cipherBlockChaining_AES_encrypt(const char* name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + + Register from = Z_ARG1; // source byte array (clear text) + Register to = Z_ARG2; // destination byte array (ciphered) + Register key = Z_ARG3; // expanded key array. + Register cv = Z_ARG4; // chaining value + const Register msglen = Z_ARG5; // Total length of the msg to be encrypted. Value must be returned + // in Z_RET upon completion of this stub. Is 32-bit integer. + + const Register keylen = Z_R0; // Expanded key length, as read from key array. Temp only. + const Register fCode = Z_R0; // crypto function code + const Register parmBlk = Z_R1; // parameter block address (points to crypto key) + const Register src = Z_ARG1; // is Z_R2 + const Register srclen = Z_ARG2; // Overwrites destination address. + const Register dst = Z_ARG3; // Overwrites key address. + + // Read key len of expanded key (in 4-byte words). + __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + // Construct parm block address in parmBlk (== Z_R1), copy cv and key to parm block. + // Construct function code in Z_R0. + generate_push_parmBlk(keylen, fCode, parmBlk, key, cv, false); + + // Prepare other registers for instruction. + // __ z_lgr(src, from); // Not needed, registers are the same. + __ z_lgr(dst, to); + __ z_llgfr(srclen, msglen); // We pass the offsets as ints, not as longs as required. + + __ kmc(dst, src); // Cipher the message. + + generate_pop_parmBlk(keylen, parmBlk, key, cv); + + __ z_llgfr(Z_RET, msglen); // We pass the offsets as ints, not as longs as required. + __ z_br(Z_R14); + + return __ addr_at(start_off); + } + + // Compute chained AES encrypt function. + address generate_cipherBlockChaining_AES_decrypt(const char* name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + + Register from = Z_ARG1; // source byte array (ciphered) + Register to = Z_ARG2; // destination byte array (clear text) + Register key = Z_ARG3; // expanded key array, not preset at entry!!! + Register cv = Z_ARG4; // chaining value + const Register msglen = Z_ARG5; // Total length of the msg to be encrypted. Value must be returned + // in Z_RET upon completion of this stub. + + const Register keylen = Z_R0; // Expanded key length, as read from key array. Temp only. + const Register fCode = Z_R0; // crypto function code + const Register parmBlk = Z_R1; // parameter block address (points to crypto key) + const Register src = Z_ARG1; // is Z_R2 + const Register srclen = Z_ARG2; // Overwrites destination address. + const Register dst = Z_ARG3; // Overwrites key address. + + // Read key len of expanded key (in 4-byte words). + __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); + + // Construct parm block address in parmBlk (== Z_R1), copy cv and key to parm block. + // Construct function code in Z_R0. + generate_push_parmBlk(keylen, fCode, parmBlk, key, cv, true); + + // Prepare other registers for instruction. + // __ z_lgr(src, from); // Not needed, registers are the same. + __ z_lgr(dst, to); + __ z_lgr(srclen, msglen); + + __ kmc(dst, src); // Decipher the message. + + generate_pop_parmBlk(keylen, parmBlk, key, cv); + + __ z_lgr(Z_RET, msglen); + __ z_br(Z_R14); + + return __ addr_at(start_off); + } + + + // Call interface for all SHA* stubs. + // + // Z_ARG1 - source data block. Ptr to leftmost byte to be processed. + // Z_ARG2 - current SHA state. Ptr to state area. This area serves as + // parameter block as required by the crypto instruction. + // Z_ARG3 - current byte offset in source data block. + // Z_ARG4 - last byte offset in source data block. + // (Z_ARG4 - Z_ARG3) gives the #bytes remaining to be processed. + // + // Z_RET - return value. First unprocessed byte offset in src buffer. + // + // A few notes on the call interface: + // - All stubs, whether they are single-block or multi-block, are assumed to + // digest an integer multiple of the data block length of data. All data + // blocks are digested using the intermediate message digest (KIMD) instruction. + // Special end processing, as done by the KLMD instruction, seems to be + // emulated by the calling code. + // + // - Z_ARG1 addresses the first byte of source data. The offset (Z_ARG3) is + // already accounted for. + // + // - The current SHA state (the intermediate message digest value) is contained + // in an area addressed by Z_ARG2. The area size depends on the SHA variant + // and is accessible via the enum VM_Version::MsgDigest::_SHA_parmBlk_I + // + // - The single-block stub is expected to digest exactly one data block, starting + // at the address passed in Z_ARG1. + // + // - The multi-block stub is expected to digest all data blocks which start in + // the offset interval [srcOff(Z_ARG3), srcLimit(Z_ARG4)). The exact difference + // (srcLimit-srcOff), rounded up to the next multiple of the data block length, + // gives the number of blocks to digest. It must be assumed that the calling code + // provides for a large enough source data buffer. + // + // Compute SHA-1 function. + address generate_SHA1_stub(bool multiBlock, const char* name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + + const Register srcBuff = Z_ARG1; // Points to first block to process (offset already added). + const Register SHAState = Z_ARG2; // Only on entry. Reused soon thereafter for kimd register pairs. + const Register srcOff = Z_ARG3; // int + const Register srcLimit = Z_ARG4; // Only passed in multiBlock case. int + + const Register SHAState_local = Z_R1; + const Register SHAState_save = Z_ARG3; + const Register srcBufLen = Z_ARG2; // Destroys state address, must be copied before. + Label useKLMD, rtn; + + __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA1); // function code + __ z_lgr(SHAState_local, SHAState); // SHAState == parameter block + + if (multiBlock) { // Process everything from offset to limit. + + // The following description is valid if we get a raw (unpimped) source data buffer, + // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above, + // the calling convention for these stubs is different. We leave the description in + // to inform the reader what must be happening hidden in the calling code. + // + // The data block to be processed can have arbitrary length, i.e. its length does not + // need to be an integer multiple of SHA_datablk. Therefore, we need to implement + // two different paths. If the length is an integer multiple, we use KIMD, saving us + // to copy the SHA state back and forth. If the length is odd, we copy the SHA state + // to the stack, execute a KLMD instruction on it and copy the result back to the + // caller's SHA state location. + + // Total #srcBuff blocks to process. + if (VM_Version::has_DistinctOpnds()) { + __ z_srk(srcBufLen, srcLimit, srcOff); // exact difference + __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1); // round up + __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA1_dataBlk-1)) & 0xffff); + __ z_ark(srcLimit, srcOff, srcBufLen); // Srclimit temporarily holds return value. + __ z_llgfr(srcBufLen, srcBufLen); // Cast to 64-bit. + } else { + __ z_lgfr(srcBufLen, srcLimit); // Exact difference. srcLimit passed as int. + __ z_sgfr(srcBufLen, srcOff); // SrcOff passed as int, now properly casted to long. + __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1); // round up + __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA1_dataBlk-1)) & 0xffff); + __ z_lgr(srcLimit, srcOff); // SrcLimit temporarily holds return value. + __ z_agr(srcLimit, srcBufLen); + } + + // Integral #blocks to digest? + // As a result of the calculations above, srcBufLen MUST be an integer + // multiple of _SHA1_dataBlk, or else we are in big trouble. + // We insert an asm_assert into the KLMD case to guard against that. + __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1); + __ z_brc(Assembler::bcondNotAllZero, useKLMD); + + // Process all full blocks. + __ kimd(srcBuff); + + __ z_lgr(Z_RET, srcLimit); // Offset of first unprocessed byte in buffer. + } else { // Process one data block only. + __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA1_dataBlk); // #srcBuff bytes to process + __ kimd(srcBuff); + __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA1_dataBlk, srcOff); // Offset of first unprocessed byte in buffer. No 32 to 64 bit extension needed. + } + + __ bind(rtn); + __ z_br(Z_R14); + + if (multiBlock) { + __ bind(useKLMD); + +#if 1 + // Security net: this stub is believed to be called for full-sized data blocks only + // NOTE: The following code is believed to be correct, but is is not tested. + __ stop_static("SHA128 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0); +#endif + } + + return __ addr_at(start_off); + } + + // Compute SHA-256 function. + address generate_SHA256_stub(bool multiBlock, const char* name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + + const Register srcBuff = Z_ARG1; + const Register SHAState = Z_ARG2; // Only on entry. Reused soon thereafter. + const Register SHAState_local = Z_R1; + const Register SHAState_save = Z_ARG3; + const Register srcOff = Z_ARG3; + const Register srcLimit = Z_ARG4; + const Register srcBufLen = Z_ARG2; // Destroys state address, must be copied before. + Label useKLMD, rtn; + + __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA256); // function code + __ z_lgr(SHAState_local, SHAState); // SHAState == parameter block + + if (multiBlock) { // Process everything from offset to limit. + // The following description is valid if we get a raw (unpimped) source data buffer, + // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above, + // the calling convention for these stubs is different. We leave the description in + // to inform the reader what must be happening hidden in the calling code. + // + // The data block to be processed can have arbitrary length, i.e. its length does not + // need to be an integer multiple of SHA_datablk. Therefore, we need to implement + // two different paths. If the length is an integer multiple, we use KIMD, saving us + // to copy the SHA state back and forth. If the length is odd, we copy the SHA state + // to the stack, execute a KLMD instruction on it and copy the result back to the + // caller's SHA state location. + + // total #srcBuff blocks to process + if (VM_Version::has_DistinctOpnds()) { + __ z_srk(srcBufLen, srcLimit, srcOff); // exact difference + __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); // round up + __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA256_dataBlk-1)) & 0xffff); + __ z_ark(srcLimit, srcOff, srcBufLen); // Srclimit temporarily holds return value. + __ z_llgfr(srcBufLen, srcBufLen); // Cast to 64-bit. + } else { + __ z_lgfr(srcBufLen, srcLimit); // exact difference + __ z_sgfr(srcBufLen, srcOff); + __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); // round up + __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA256_dataBlk-1)) & 0xffff); + __ z_lgr(srcLimit, srcOff); // Srclimit temporarily holds return value. + __ z_agr(srcLimit, srcBufLen); + } + + // Integral #blocks to digest? + // As a result of the calculations above, srcBufLen MUST be an integer + // multiple of _SHA1_dataBlk, or else we are in big trouble. + // We insert an asm_assert into the KLMD case to guard against that. + __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); + __ z_brc(Assembler::bcondNotAllZero, useKLMD); + + // Process all full blocks. + __ kimd(srcBuff); + + __ z_lgr(Z_RET, srcLimit); // Offset of first unprocessed byte in buffer. + } else { // Process one data block only. + __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA256_dataBlk); // #srcBuff bytes to process + __ kimd(srcBuff); + __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA256_dataBlk, srcOff); // Offset of first unprocessed byte in buffer. + } + + __ bind(rtn); + __ z_br(Z_R14); + + if (multiBlock) { + __ bind(useKLMD); +#if 1 + // Security net: this stub is believed to be called for full-sized data blocks only. + // NOTE: + // The following code is believed to be correct, but is is not tested. + __ stop_static("SHA256 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0); +#endif + } + + return __ addr_at(start_off); + } + + // Compute SHA-512 function. + address generate_SHA512_stub(bool multiBlock, const char* name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + + const Register srcBuff = Z_ARG1; + const Register SHAState = Z_ARG2; // Only on entry. Reused soon thereafter. + const Register SHAState_local = Z_R1; + const Register SHAState_save = Z_ARG3; + const Register srcOff = Z_ARG3; + const Register srcLimit = Z_ARG4; + const Register srcBufLen = Z_ARG2; // Destroys state address, must be copied before. + Label useKLMD, rtn; + + __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA512); // function code + __ z_lgr(SHAState_local, SHAState); // SHAState == parameter block + + if (multiBlock) { // Process everything from offset to limit. + // The following description is valid if we get a raw (unpimped) source data buffer, + // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailled above, + // the calling convention for these stubs is different. We leave the description in + // to inform the reader what must be happening hidden in the calling code. + // + // The data block to be processed can have arbitrary length, i.e. its length does not + // need to be an integer multiple of SHA_datablk. Therefore, we need to implement + // two different paths. If the length is an integer multiple, we use KIMD, saving us + // to copy the SHA state back and forth. If the length is odd, we copy the SHA state + // to the stack, execute a KLMD instruction on it and copy the result back to the + // caller's SHA state location. + + // total #srcBuff blocks to process + if (VM_Version::has_DistinctOpnds()) { + __ z_srk(srcBufLen, srcLimit, srcOff); // exact difference + __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); // round up + __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA512_dataBlk-1)) & 0xffff); + __ z_ark(srcLimit, srcOff, srcBufLen); // Srclimit temporarily holds return value. + __ z_llgfr(srcBufLen, srcBufLen); // Cast to 64-bit. + } else { + __ z_lgfr(srcBufLen, srcLimit); // exact difference + __ z_sgfr(srcBufLen, srcOff); + __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); // round up + __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA512_dataBlk-1)) & 0xffff); + __ z_lgr(srcLimit, srcOff); // Srclimit temporarily holds return value. + __ z_agr(srcLimit, srcBufLen); + } + + // integral #blocks to digest? + // As a result of the calculations above, srcBufLen MUST be an integer + // multiple of _SHA1_dataBlk, or else we are in big trouble. + // We insert an asm_assert into the KLMD case to guard against that. + __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); + __ z_brc(Assembler::bcondNotAllZero, useKLMD); + + // Process all full blocks. + __ kimd(srcBuff); + + __ z_lgr(Z_RET, srcLimit); // Offset of first unprocessed byte in buffer. + } else { // Process one data block only. + __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA512_dataBlk); // #srcBuff bytes to process + __ kimd(srcBuff); + __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA512_dataBlk, srcOff); // Offset of first unprocessed byte in buffer. + } + + __ bind(rtn); + __ z_br(Z_R14); + + if (multiBlock) { + __ bind(useKLMD); +#if 1 + // Security net: this stub is believed to be called for full-sized data blocks only + // NOTE: + // The following code is believed to be correct, but is is not tested. + __ stop_static("SHA512 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0); +#endif + } + + return __ addr_at(start_off); + } + + + + // Arguments: + // Z_ARG1 - int crc + // Z_ARG2 - byte* buf + // Z_ARG3 - int length (of buffer) + // + // Result: + // Z_RET - int crc result + // + // Compute CRC32 function. + address generate_CRC32_updateBytes(const char* name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). + + // arguments to kernel_crc32: + Register crc = Z_ARG1; // Current checksum, preset by caller or result from previous call, int. + Register data = Z_ARG2; // source byte array + Register dataLen = Z_ARG3; // #bytes to process, int + Register table = Z_ARG4; // crc table address + const Register t0 = Z_R10; // work reg for kernel* emitters + const Register t1 = Z_R11; // work reg for kernel* emitters + const Register t2 = Z_R12; // work reg for kernel* emitters + const Register t3 = Z_R13; // work reg for kernel* emitters + + assert_different_registers(crc, data, dataLen, table); + + // We pass these values as ints, not as longs as required by C calling convention. + // Crc used as int. + __ z_llgfr(dataLen, dataLen); + + StubRoutines::zarch::generate_load_crc_table_addr(_masm, table); + + __ resize_frame(-(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers. + __ z_stmg(Z_R10, Z_R13, 1*8, Z_SP); // Spill regs 10..11 to make them available as work registers. + __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3); + __ z_lmg(Z_R10, Z_R13, 1*8, Z_SP); // Spill regs 10..11 back from stack. + __ resize_frame(+(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers. + + __ z_llgfr(Z_RET, crc); // Updated crc is function result. No copying required, just zero upper 32 bits. + __ z_br(Z_R14); // Result already in Z_RET == Z_ARG1. + + return __ addr_at(start_off); + } + + + // Arguments: + // Z_ARG1 - x address + // Z_ARG2 - x length + // Z_ARG3 - y address + // Z_ARG4 - y length + // Z_ARG5 - z address + // 160[Z_SP] - z length + address generate_multiplyToLen() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "multiplyToLen"); + + address start = __ pc(); + + const Register x = Z_ARG1; + const Register xlen = Z_ARG2; + const Register y = Z_ARG3; + const Register ylen = Z_ARG4; + const Register z = Z_ARG5; + // zlen is passed on the stack: + // Address zlen(Z_SP, _z_abi(remaining_cargs)); + + // Next registers will be saved on stack in multiply_to_len(). + const Register tmp1 = Z_tmp_1; + const Register tmp2 = Z_tmp_2; + const Register tmp3 = Z_tmp_3; + const Register tmp4 = Z_tmp_4; + const Register tmp5 = Z_R9; + + BLOCK_COMMENT("Entry:"); + + __ z_llgfr(xlen, xlen); + __ z_llgfr(ylen, ylen); + + __ multiply_to_len(x, xlen, y, ylen, z, tmp1, tmp2, tmp3, tmp4, tmp5); + + __ z_br(Z_R14); // Return to caller. + + return start; + } + + void generate_initial() { + // Generates all stubs and initializes the entry points. + + // Entry points that exist in all platforms. + // Note: This is code that could be shared among different + // platforms - however the benefit seems to be smaller than the + // disadvantage of having a much more complicated generator + // structure. See also comment in stubRoutines.hpp. + StubRoutines::_forward_exception_entry = generate_forward_exception(); + + StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); + StubRoutines::_catch_exception_entry = generate_catch_exception(); + + // Build this early so it's available for the interpreter. + StubRoutines::_throw_StackOverflowError_entry = + generate_throw_exception("StackOverflowError throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false); + + //---------------------------------------------------------------------- + // Entry points that are platform specific. + // Build this early so it's available for the interpreter. + StubRoutines::_throw_StackOverflowError_entry = + generate_throw_exception("StackOverflowError throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false); + + if (UseCRC32Intrinsics) { + // We have no CRC32 table on z/Architecture. + StubRoutines::_crc_table_adr = (address)StubRoutines::zarch::_crc_table; + StubRoutines::_updateBytesCRC32 = generate_CRC32_updateBytes("CRC32_updateBytes"); + } + + // Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction. + StubRoutines::zarch::_trot_table_addr = (address)StubRoutines::zarch::_trot_table; + } + + + void generate_all() { + // Generates all stubs and initializes the entry points. + + StubRoutines::zarch::_partial_subtype_check = generate_partial_subtype_check(); + + // These entry points require SharedInfo::stack0 to be set up in non-core builds. + StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); + StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError), false); + StubRoutines::_throw_NullPointerException_at_call_entry= generate_throw_exception("NullPointerException at call throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); + + StubRoutines::zarch::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access(); + + // Support for verify_oop (must happen after universe_init). + StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop_subroutine(); + + // Arraycopy stubs used by compilers. + generate_arraycopy_stubs(); + + // safefetch stubs + generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, &StubRoutines::_safefetch32_fault_pc, &StubRoutines::_safefetch32_continuation_pc); + generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, &StubRoutines::_safefetchN_fault_pc, &StubRoutines::_safefetchN_continuation_pc); + + // Generate AES intrinsics code. + if (UseAESIntrinsics) { + StubRoutines::_aescrypt_encryptBlock = generate_AES_encryptBlock("AES_encryptBlock"); + StubRoutines::_aescrypt_decryptBlock = generate_AES_decryptBlock("AES_decryptBlock"); + StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_AES_encrypt("AES_encryptBlock_chaining"); + StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_AES_decrypt("AES_decryptBlock_chaining"); + } + + // Generate SHA1/SHA256/SHA512 intrinsics code. + if (UseSHA1Intrinsics) { + StubRoutines::_sha1_implCompress = generate_SHA1_stub(false, "SHA1_singleBlock"); + StubRoutines::_sha1_implCompressMB = generate_SHA1_stub(true, "SHA1_multiBlock"); + } + if (UseSHA256Intrinsics) { + StubRoutines::_sha256_implCompress = generate_SHA256_stub(false, "SHA256_singleBlock"); + StubRoutines::_sha256_implCompressMB = generate_SHA256_stub(true, "SHA256_multiBlock"); + } + if (UseSHA512Intrinsics) { + StubRoutines::_sha512_implCompress = generate_SHA512_stub(false, "SHA512_singleBlock"); + StubRoutines::_sha512_implCompressMB = generate_SHA512_stub(true, "SHA512_multiBlock"); + } + +#ifdef COMPILER2 + if (UseMultiplyToLenIntrinsic) { + StubRoutines::_multiplyToLen = generate_multiplyToLen(); + } + if (UseMontgomeryMultiplyIntrinsic) { + StubRoutines::_montgomeryMultiply + = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply); + } + if (UseMontgomerySquareIntrinsic) { + StubRoutines::_montgomerySquare + = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square); + } +#endif + } + + public: + StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { + // Replace the standard masm with a special one: + _masm = new MacroAssembler(code); + + _stub_count = !all ? 0x100 : 0x200; + if (all) { + generate_all(); + } else { + generate_initial(); + } + } + + private: + int _stub_count; + void stub_prolog(StubCodeDesc* cdesc) { +#ifdef ASSERT + // Put extra information in the stub code, to make it more readable. + // Write the high part of the address. + // [RGV] Check if there is a dependency on the size of this prolog. + __ emit_32((intptr_t)cdesc >> 32); + __ emit_32((intptr_t)cdesc); + __ emit_32(++_stub_count); +#endif + align(true); + } + + void align(bool at_header = false) { + // z/Architecture cache line size is 256 bytes. + // There is no obvious benefit in aligning stub + // code to cache lines. Use CodeEntryAlignment instead. + const unsigned int icache_line_size = CodeEntryAlignment; + const unsigned int icache_half_line_size = MIN2(32, CodeEntryAlignment); + + if (at_header) { + while ((intptr_t)(__ pc()) % icache_line_size != 0) { + __ emit_16(0); + } + } else { + while ((intptr_t)(__ pc()) % icache_half_line_size != 0) { + __ z_nop(); + } + } + } + +}; + +void StubGenerator_generate(CodeBuffer* code, bool all) { + StubGenerator g(code, all); +} diff --git a/hotspot/src/cpu/s390/vm/stubRoutines_s390.cpp b/hotspot/src/cpu/s390/vm/stubRoutines_s390.cpp new file mode 100644 index 00000000000..8c60ae04350 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/stubRoutines_s390.cpp @@ -0,0 +1,569 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" + +// Implementation of the platform-specific part of StubRoutines - for +// a description of how to extend it, see the stubRoutines.hpp file. + +address StubRoutines::zarch::_handler_for_unsafe_access_entry = NULL; + +address StubRoutines::zarch::_partial_subtype_check = NULL; + +// Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction. +address StubRoutines::zarch::_trot_table_addr = NULL; + +int StubRoutines::zarch::_atomic_memory_operation_lock = StubRoutines::zarch::unlocked; + +#define __ masm-> + +void StubRoutines::zarch::generate_load_crc_table_addr(MacroAssembler* masm, Register table) { + + __ load_absolute_address(table, StubRoutines::_crc_table_adr); +#ifdef ASSERT + assert(_crc_table_adr != NULL, "CRC lookup table address must be initialized by now"); + { + Label L; + __ load_const_optimized(Z_R0, StubRoutines::_crc_table_adr); + __ z_cgr(table, Z_R0); // safety net + __ z_bre(L); + __ z_illtrap(); + __ asm_assert_eq("crc_table: external word relocation required for load_absolute_address", 0x33); + __ bind(L); + } + { + Label L; + __ load_const_optimized(Z_R0, 0x77073096UL); + __ z_cl(Z_R0, Address(table, 4)); // safety net + __ z_bre(L); + __ z_l(Z_R0, Address(table, 4)); // Load data from memory, we know the constant we compared against. + __ z_illtrap(); + __ asm_assert_eq("crc_table: address or contents seems to be messed up", 0x22); + __ bind(L); + } +#endif +} + +// Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction. +void StubRoutines::zarch::generate_load_trot_table_addr(MacroAssembler* masm, Register table) { + + RelocationHolder rspec = external_word_Relocation::spec((address)_trot_table); + __ relocate(rspec); + __ load_absolute_address(table, _trot_table_addr); +#ifdef ASSERT + assert(_trot_table_addr != NULL, "Translate table address must be initialized by now"); + assert((p2i(_trot_table_addr) & (TROT_ALIGNMENT-1)) == 0, "Translate table alignment error"); + for (int i = 0; i < 256; i++) { + assert(i == *((jshort*)(_trot_table_addr+2*i)), "trot_table[%d] = %d", i, *((jshort*)(_trot_table_addr+2*i))); + } + { + Label L; + __ load_const_optimized(Z_R0, StubRoutines::zarch::_trot_table_addr); + __ z_cgr(table, Z_R0); // safety net + __ z_bre(L); + __ z_illtrap(); + __ asm_assert_eq("crc_table: external word relocation does not work for load_absolute_address", 0x33); + __ bind(L); + } + { + Label L; + __ load_const_optimized(Z_R0, 0x0004000500060007UL); + __ z_clg(Z_R0, Address(table, 8)); // safety net + __ z_bre(L); + __ z_lg(Z_R0, Address(table, 8)); // Load data from memory, we know the constant we compared against. + __ z_illtrap(); + __ asm_assert_eq("trot_table: address or contents seems to be messed up", 0x22); + __ bind(L); + } +#endif +} + + +/** + * trot_table[] + */ + +jlong StubRoutines::zarch::_trot_table[TROT_COLUMN_SIZE] = { + 0x0000000100020003UL, 0x0004000500060007UL, 0x00080009000a000bUL, 0x000c000d000e000fUL, + 0x0010001100120013UL, 0x0014001500160017UL, 0x00180019001a001bUL, 0x001c001d001e001fUL, + 0x0020002100220023UL, 0x0024002500260027UL, 0x00280029002a002bUL, 0x002c002d002e002fUL, + 0x0030003100320033UL, 0x0034003500360037UL, 0x00380039003a003bUL, 0x003c003d003e003fUL, + 0x0040004100420043UL, 0x0044004500460047UL, 0x00480049004a004bUL, 0x004c004d004e004fUL, + 0x0050005100520053UL, 0x0054005500560057UL, 0x00580059005a005bUL, 0x005c005d005e005fUL, + 0x0060006100620063UL, 0x0064006500660067UL, 0x00680069006a006bUL, 0x006c006d006e006fUL, + 0x0070007100720073UL, 0x0074007500760077UL, 0x00780079007a007bUL, 0x007c007d007e007fUL, + 0x0080008100820083UL, 0x0084008500860087UL, 0x00880089008a008bUL, 0x008c008d008e008fUL, + 0x0090009100920093UL, 0x0094009500960097UL, 0x00980099009a009bUL, 0x009c009d009e009fUL, + 0x00a000a100a200a3UL, 0x00a400a500a600a7UL, 0x00a800a900aa00abUL, 0x00ac00ad00ae00afUL, + 0x00b000b100b200b3UL, 0x00b400b500b600b7UL, 0x00b800b900ba00bbUL, 0x00bc00bd00be00bfUL, + 0x00c000c100c200c3UL, 0x00c400c500c600c7UL, 0x00c800c900ca00cbUL, 0x00cc00cd00ce00cfUL, + 0x00d000d100d200d3UL, 0x00d400d500d600d7UL, 0x00d800d900da00dbUL, 0x00dc00dd00de00dfUL, + 0x00e000e100e200e3UL, 0x00e400e500e600e7UL, 0x00e800e900ea00ebUL, 0x00ec00ed00ee00efUL, + 0x00f000f100f200f3UL, 0x00f400f500f600f7UL, 0x00f800f900fa00fbUL, 0x00fc00fd00fe00ffUL + }; + + +// crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.h +juint StubRoutines::zarch::_crc_table[CRC32_TABLES][CRC32_COLUMN_SIZE] = { + { + 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, + 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, + 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, + 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, + 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, + 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, + 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, + 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, + 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, + 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, + 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, + 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, + 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, + 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, + 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, + 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, + 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, + 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, + 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, + 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, + 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, + 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, + 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, + 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, + 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, + 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, + 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, + 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, + 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, + 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, + 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, + 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, + 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, + 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, + 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, + 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, + 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, + 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, + 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, + 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, + 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, + 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, + 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, + 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, + 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, + 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, + 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, + 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, + 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, + 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, + 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, + 0x2d02ef8dUL +#ifdef CRC32_BYFOUR + }, + { + 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, + 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL, + 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, + 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL, + 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, + 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, + 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, + 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, + 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, + 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, + 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL, + 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, + 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL, + 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, + 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, + 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, + 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, + 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, + 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, + 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL, + 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, + 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL, + 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, + 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, + 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, + 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, + 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, + 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, + 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL, + 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, + 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL, + 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, + 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, + 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, + 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, + 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, + 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, + 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL, + 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, + 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL, + 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, + 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, + 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, + 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, + 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, + 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, + 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL, + 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, + 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL, + 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, + 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, + 0x9324fd72UL + }, + { + 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, + 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL, + 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, + 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL, + 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, + 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, + 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, + 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, + 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, + 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, + 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL, + 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, + 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL, + 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, + 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, + 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, + 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, + 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, + 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, + 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL, + 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, + 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL, + 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, + 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, + 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, + 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, + 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, + 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, + 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL, + 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, + 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL, + 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, + 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, + 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, + 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, + 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, + 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, + 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL, + 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, + 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL, + 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, + 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, + 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, + 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, + 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, + 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, + 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL, + 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, + 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL, + 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, + 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, + 0xbe9834edUL + }, + { + 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, + 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL, + 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, + 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL, + 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, + 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, + 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, + 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, + 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, + 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, + 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL, + 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, + 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL, + 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, + 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, + 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, + 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, + 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, + 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, + 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL, + 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, + 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL, + 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, + 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, + 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, + 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, + 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, + 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, + 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL, + 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, + 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL, + 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, + 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, + 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, + 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, + 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, + 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, + 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL, + 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, + 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL, + 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, + 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, + 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, + 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, + 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, + 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, + 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL, + 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, + 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL, + 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, + 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, + 0xde0506f1UL + }, + { + 0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL, + 0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL, + 0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL, + 0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL, + 0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL, + 0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL, + 0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL, + 0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL, + 0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL, + 0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL, + 0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL, + 0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL, + 0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL, + 0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL, + 0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL, + 0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL, + 0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL, + 0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL, + 0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL, + 0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL, + 0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL, + 0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL, + 0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL, + 0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL, + 0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL, + 0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL, + 0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL, + 0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL, + 0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL, + 0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL, + 0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL, + 0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL, + 0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL, + 0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL, + 0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL, + 0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL, + 0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL, + 0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL, + 0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL, + 0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL, + 0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL, + 0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL, + 0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL, + 0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL, + 0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL, + 0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL, + 0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL, + 0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL, + 0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL, + 0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL, + 0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL, + 0x8def022dUL + }, + { + 0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL, + 0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL, + 0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL, + 0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL, + 0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL, + 0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL, + 0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL, + 0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL, + 0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL, + 0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL, + 0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL, + 0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL, + 0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL, + 0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL, + 0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL, + 0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL, + 0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL, + 0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL, + 0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL, + 0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL, + 0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL, + 0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL, + 0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL, + 0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL, + 0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL, + 0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL, + 0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL, + 0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL, + 0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL, + 0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL, + 0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL, + 0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL, + 0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL, + 0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL, + 0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL, + 0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL, + 0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL, + 0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL, + 0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL, + 0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL, + 0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL, + 0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL, + 0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL, + 0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL, + 0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL, + 0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL, + 0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL, + 0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL, + 0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL, + 0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL, + 0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL, + 0x72fd2493UL + }, + { + 0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL, + 0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL, + 0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL, + 0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL, + 0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL, + 0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL, + 0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL, + 0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL, + 0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL, + 0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL, + 0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL, + 0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL, + 0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL, + 0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL, + 0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL, + 0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL, + 0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL, + 0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL, + 0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL, + 0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL, + 0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL, + 0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL, + 0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL, + 0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL, + 0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL, + 0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL, + 0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL, + 0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL, + 0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL, + 0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL, + 0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL, + 0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL, + 0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL, + 0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL, + 0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL, + 0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL, + 0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL, + 0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL, + 0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL, + 0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL, + 0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL, + 0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL, + 0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL, + 0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL, + 0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL, + 0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL, + 0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL, + 0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL, + 0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL, + 0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL, + 0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL, + 0xed3498beUL + }, + { + 0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL, + 0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL, + 0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL, + 0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL, + 0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL, + 0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL, + 0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL, + 0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL, + 0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL, + 0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL, + 0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL, + 0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL, + 0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL, + 0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL, + 0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL, + 0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL, + 0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL, + 0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL, + 0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL, + 0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL, + 0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL, + 0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL, + 0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL, + 0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL, + 0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL, + 0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL, + 0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL, + 0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL, + 0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL, + 0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL, + 0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL, + 0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL, + 0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL, + 0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL, + 0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL, + 0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL, + 0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL, + 0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL, + 0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL, + 0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL, + 0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL, + 0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL, + 0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL, + 0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL, + 0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL, + 0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL, + 0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL, + 0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL, + 0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL, + 0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL, + 0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL, + 0xf10605deUL +#endif + } +}; diff --git a/hotspot/src/cpu/s390/vm/stubRoutines_s390.hpp b/hotspot/src/cpu/s390/vm/stubRoutines_s390.hpp new file mode 100644 index 00000000000..5bb64303b6b --- /dev/null +++ b/hotspot/src/cpu/s390/vm/stubRoutines_s390.hpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_STUBROUTINES_ZARCH_64_64_HPP +#define CPU_S390_VM_STUBROUTINES_ZARCH_64_64_HPP + +// This file holds the platform specific parts of the StubRoutines +// definition. See stubRoutines.hpp for a description on how to extend it. + +static bool returns_to_call_stub(address return_pc) { return return_pc == _call_stub_return_address; } + +enum { // Platform dependent constants. + // TODO: May be able to shrink this a lot + code_size1 = 20000, // Simply increase if too small (assembler will crash if too small). + code_size2 = 20000 // Simply increase if too small (assembler will crash if too small). +}; + +// MethodHandles adapters +enum method_handles_platform_dependent_constants { + method_handles_adapters_code_size = 5000 +}; + +#define CRC32_COLUMN_SIZE 256 +#define CRC32_BYFOUR +#ifdef CRC32_BYFOUR + #define CRC32_TABLES 8 +#else + #define CRC32_TABLES 1 +#endif + +// Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction. +#define TROT_ALIGNMENT 8 // Required by instruction, + // guaranteed by jlong table element type. +#define TROT_COLUMN_SIZE (256*sizeof(jchar)/sizeof(jlong)) + +class zarch { + friend class StubGenerator; + + public: + enum { nof_instance_allocators = 10 }; + + // allocator lock values + enum { + unlocked = 0, + locked = 1 + }; + + private: + static address _handler_for_unsafe_access_entry; + + static int _atomic_memory_operation_lock; + + static address _partial_subtype_check; + static juint _crc_table[CRC32_TABLES][CRC32_COLUMN_SIZE]; + + // Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction. + static address _trot_table_addr; + static jlong _trot_table[TROT_COLUMN_SIZE]; + + public: + // Global lock for everyone who needs to use atomic_compare_and_exchange + // or atomic_increment -- should probably use more locks for more + // scalability -- for instance one for each eden space or group of. + + // Address of the lock for atomic_compare_and_exchange. + static int* atomic_memory_operation_lock_addr() { return &_atomic_memory_operation_lock; } + + // Accessor and mutator for _atomic_memory_operation_lock. + static int atomic_memory_operation_lock() { return _atomic_memory_operation_lock; } + static void set_atomic_memory_operation_lock(int value) { _atomic_memory_operation_lock = value; } + + static address handler_for_unsafe_access_entry() { return _handler_for_unsafe_access_entry; } + + static address partial_subtype_check() { return _partial_subtype_check; } + + static void generate_load_crc_table_addr(MacroAssembler* masm, Register table); + + // Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction. + static void generate_load_trot_table_addr(MacroAssembler* masm, Register table); +}; + +#endif // CPU_S390_VM_STUBROUTINES_ZARCH_64_64_HPP diff --git a/hotspot/src/cpu/s390/vm/templateInterpreterGenerator_s390.cpp b/hotspot/src/cpu/s390/vm/templateInterpreterGenerator_s390.cpp new file mode 100644 index 00000000000..74dd0915edc --- /dev/null +++ b/hotspot/src/cpu/s390/vm/templateInterpreterGenerator_s390.cpp @@ -0,0 +1,2398 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/abstractInterpreter.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/templateInterpreterGenerator.hpp" +#include "interpreter/templateTable.hpp" +#include "oops/arrayOop.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" + + +// Size of interpreter code. Increase if too small. Interpreter will +// fail with a guarantee ("not enough space for interpreter generation"); +// if too small. +// Run with +PrintInterpreter to get the VM to print out the size. +// Max size with JVMTI +int TemplateInterpreter::InterpreterCodeSize = 320*K; + +#undef __ +#ifdef PRODUCT + #define __ _masm-> +#else + #define __ _masm-> +// #define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)-> +#endif + +#define BLOCK_COMMENT(str) __ block_comment(str) +#define BIND(label) __ bind(label); BLOCK_COMMENT(#label ":") + +#define oop_tmp_offset _z_ijava_state_neg(oop_tmp) + +//----------------------------------------------------------------------------- + +address TemplateInterpreterGenerator::generate_slow_signature_handler() { + // + // New slow_signature handler that respects the z/Architecture + // C calling conventions. + // + // We get called by the native entry code with our output register + // area == 8. First we call InterpreterRuntime::get_result_handler + // to copy the pointer to the signature string temporarily to the + // first C-argument and to return the result_handler in + // Z_RET. Since native_entry will copy the jni-pointer to the + // first C-argument slot later on, it's OK to occupy this slot + // temporarily. Then we copy the argument list on the java + // expression stack into native varargs format on the native stack + // and load arguments into argument registers. Integer arguments in + // the varargs vector will be sign-extended to 8 bytes. + // + // On entry: + // Z_ARG1 - intptr_t* Address of java argument list in memory. + // Z_state - cppInterpreter* Address of interpreter state for + // this method + // Z_method + // + // On exit (just before return instruction): + // Z_RET contains the address of the result_handler. + // Z_ARG2 is not updated for static methods and contains "this" otherwise. + // Z_ARG3-Z_ARG5 contain the first 3 arguments of types other than float and double. + // Z_FARG1-Z_FARG4 contain the first 4 arguments of type float or double. + + const int LogSizeOfCase = 3; + + const int max_fp_register_arguments = Argument::n_float_register_parameters; + const int max_int_register_arguments = Argument::n_register_parameters - 2; // First 2 are reserved. + + const Register arg_java = Z_tmp_2; + const Register arg_c = Z_tmp_3; + const Register signature = Z_R1_scratch; // Is a string. + const Register fpcnt = Z_R0_scratch; + const Register argcnt = Z_tmp_4; + const Register intSlot = Z_tmp_1; + const Register sig_end = Z_tmp_1; // Assumed end of signature (only used in do_object). + const Register target_sp = Z_tmp_1; + const FloatRegister floatSlot = Z_F1; + + const int d_signature = _z_abi(gpr6); // Only spill space, register contents not affected. + const int d_fpcnt = _z_abi(gpr7); // Only spill space, register contents not affected. + + unsigned int entry_offset = __ offset(); + + BLOCK_COMMENT("slow_signature_handler {"); + + // We use target_sp for storing arguments in the C frame. + __ save_return_pc(); + + __ z_stmg(Z_R10,Z_R13,-32,Z_SP); + __ push_frame_abi160(32); + + __ z_lgr(arg_java, Z_ARG1); + + Register method = Z_ARG2; // Directly load into correct argument register. + + __ get_method(method); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::get_signature), Z_thread, method); + + // Move signature to callee saved register. + // Don't directly write to stack. Frame is used by VM call. + __ z_lgr(Z_tmp_1, Z_RET); + + // Reload method. Register may have been altered by VM call. + __ get_method(method); + + // Get address of result handler. + __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::get_result_handler), Z_thread, method); + + // Save signature address to stack. + __ z_stg(Z_tmp_1, d_signature, Z_SP); + + // Don't overwrite return value (Z_RET, Z_ARG1) in rest of the method ! + + { + Label isStatic; + + // Test if static. + // We can test the bit directly. + // Path is Z_method->_access_flags._flags. + // We only support flag bits in the least significant byte (assert !). + // Therefore add 3 to address that byte within "_flags". + // Reload method. VM call above may have destroyed register contents + __ get_method(method); + __ testbit(method2_(method, access_flags), JVM_ACC_STATIC_BIT); + method = noreg; // end of life + __ z_btrue(isStatic); + + // For non-static functions, pass "this" in Z_ARG2 and copy it to 2nd C-arg slot. + // Need to box the Java object here, so we use arg_java + // (address of current Java stack slot) as argument and + // don't dereference it as in case of ints, floats, etc.. + __ z_lgr(Z_ARG2, arg_java); + __ add2reg(arg_java, -BytesPerWord); + __ bind(isStatic); + } + + // argcnt == 0 corresponds to 3rd C argument. + // arg #1 (result handler) and + // arg #2 (this, for non-statics), unused else + // are reserved and pre-filled above. + // arg_java points to the corresponding Java argument here. It + // has been decremented by one argument (this) in case of non-static. + __ clear_reg(argcnt, true, false); // Don't set CC. + __ z_lg(target_sp, 0, Z_SP); + __ add2reg(arg_c, _z_abi(remaining_cargs), target_sp); + // No floating-point args parsed so far. + __ clear_mem(Address(Z_SP, d_fpcnt), 8); + + NearLabel move_intSlot_to_ARG, move_floatSlot_to_FARG; + NearLabel loop_start, loop_start_restore, loop_end; + NearLabel do_int, do_long, do_float, do_double; + NearLabel do_dontreachhere, do_object, do_array, do_boxed; + +#ifdef ASSERT + // Signature needs to point to '(' (== 0x28) at entry. + __ z_lg(signature, d_signature, Z_SP); + __ z_cli(0, signature, (int) '('); + __ z_brne(do_dontreachhere); +#endif + + __ bind(loop_start_restore); + __ z_lg(signature, d_signature, Z_SP); // Restore signature ptr, destroyed by move_XX_to_ARG. + + BIND(loop_start); + // Advance to next argument type token from the signature. + __ add2reg(signature, 1); + + // Use CLI, works well on all CPU versions. + __ z_cli(0, signature, (int) ')'); + __ z_bre(loop_end); // end of signature + __ z_cli(0, signature, (int) 'L'); + __ z_bre(do_object); // object #9 + __ z_cli(0, signature, (int) 'F'); + __ z_bre(do_float); // float #7 + __ z_cli(0, signature, (int) 'J'); + __ z_bre(do_long); // long #6 + __ z_cli(0, signature, (int) 'B'); + __ z_bre(do_int); // byte #1 + __ z_cli(0, signature, (int) 'Z'); + __ z_bre(do_int); // boolean #2 + __ z_cli(0, signature, (int) 'C'); + __ z_bre(do_int); // char #3 + __ z_cli(0, signature, (int) 'S'); + __ z_bre(do_int); // short #4 + __ z_cli(0, signature, (int) 'I'); + __ z_bre(do_int); // int #5 + __ z_cli(0, signature, (int) 'D'); + __ z_bre(do_double); // double #8 + __ z_cli(0, signature, (int) '['); + __ z_bre(do_array); // array #10 + + __ bind(do_dontreachhere); + + __ unimplemented("ShouldNotReachHere in slow_signature_handler", 120); + + // Array argument + BIND(do_array); + + { + Label start_skip, end_skip; + + __ bind(start_skip); + + // Advance to next type tag from signature. + __ add2reg(signature, 1); + + // Use CLI, works well on all CPU versions. + __ z_cli(0, signature, (int) '['); + __ z_bre(start_skip); // Skip further brackets. + + __ z_cli(0, signature, (int) '9'); + __ z_brh(end_skip); // no optional size + + __ z_cli(0, signature, (int) '0'); + __ z_brnl(start_skip); // Skip optional size. + + __ bind(end_skip); + + __ z_cli(0, signature, (int) 'L'); + __ z_brne(do_boxed); // If not array of objects: go directly to do_boxed. + } + + // OOP argument + BIND(do_object); + // Pass by an object's type name. + { + Label L; + + __ add2reg(sig_end, 4095, signature); // Assume object type name is shorter than 4k. + __ load_const_optimized(Z_R0, (int) ';'); // Type name terminator (must be in Z_R0!). + __ MacroAssembler::search_string(sig_end, signature); + __ z_brl(L); + __ z_illtrap(); // No semicolon found: internal error or object name too long. + __ bind(L); + __ z_lgr(signature, sig_end); + // fallthru to do_boxed + } + + // Need to box the Java object here, so we use arg_java + // (address of current Java stack slot) as argument and + // don't dereference it as in case of ints, floats, etc.. + + // UNBOX argument + // Load reference and check for NULL. + Label do_int_Entry4Boxed; + __ bind(do_boxed); + { + __ load_and_test_long(intSlot, Address(arg_java)); + __ z_bre(do_int_Entry4Boxed); + __ z_lgr(intSlot, arg_java); + __ z_bru(do_int_Entry4Boxed); + } + + // INT argument + + // (also for byte, boolean, char, short) + // Use lgf for load (sign-extend) and stg for store. + BIND(do_int); + __ z_lgf(intSlot, 0, arg_java); + + __ bind(do_int_Entry4Boxed); + __ add2reg(arg_java, -BytesPerWord); + // If argument fits into argument register, go and handle it, otherwise continue. + __ compare32_and_branch(argcnt, max_int_register_arguments, + Assembler::bcondLow, move_intSlot_to_ARG); + __ z_stg(intSlot, 0, arg_c); + __ add2reg(arg_c, BytesPerWord); + __ z_bru(loop_start); + + // LONG argument + + BIND(do_long); + __ add2reg(arg_java, -2*BytesPerWord); // Decrement first to have positive displacement for lg. + __ z_lg(intSlot, BytesPerWord, arg_java); + // If argument fits into argument register, go and handle it, otherwise continue. + __ compare32_and_branch(argcnt, max_int_register_arguments, + Assembler::bcondLow, move_intSlot_to_ARG); + __ z_stg(intSlot, 0, arg_c); + __ add2reg(arg_c, BytesPerWord); + __ z_bru(loop_start); + + // FLOAT argumen + + BIND(do_float); + __ z_le(floatSlot, 0, arg_java); + __ add2reg(arg_java, -BytesPerWord); + assert(max_fp_register_arguments <= 255, "always true"); // safety net + __ z_cli(d_fpcnt+7, Z_SP, max_fp_register_arguments); + __ z_brl(move_floatSlot_to_FARG); + __ z_ste(floatSlot, 4, arg_c); + __ add2reg(arg_c, BytesPerWord); + __ z_bru(loop_start); + + // DOUBLE argument + + BIND(do_double); + __ add2reg(arg_java, -2*BytesPerWord); // Decrement first to have positive displacement for lg. + __ z_ld(floatSlot, BytesPerWord, arg_java); + assert(max_fp_register_arguments <= 255, "always true"); // safety net + __ z_cli(d_fpcnt+7, Z_SP, max_fp_register_arguments); + __ z_brl(move_floatSlot_to_FARG); + __ z_std(floatSlot, 0, arg_c); + __ add2reg(arg_c, BytesPerWord); + __ z_bru(loop_start); + + // Method exit, all arguments proocessed. + __ bind(loop_end); + __ pop_frame(); + __ restore_return_pc(); + __ z_lmg(Z_R10,Z_R13,-32,Z_SP); + __ z_br(Z_R14); + + // Copy int arguments. + + Label iarg_caselist; // Distance between each case has to be a power of 2 + // (= 1 << LogSizeOfCase). + __ align(16); + BIND(iarg_caselist); + __ z_lgr(Z_ARG3, intSlot); // 4 bytes + __ z_bru(loop_start_restore); // 4 bytes + + __ z_lgr(Z_ARG4, intSlot); + __ z_bru(loop_start_restore); + + __ z_lgr(Z_ARG5, intSlot); + __ z_bru(loop_start_restore); + + __ align(16); + __ bind(move_intSlot_to_ARG); + __ z_stg(signature, d_signature, Z_SP); // Spill since signature == Z_R1_scratch. + __ z_larl(Z_R1_scratch, iarg_caselist); + __ z_sllg(Z_R0_scratch, argcnt, LogSizeOfCase); + __ add2reg(argcnt, 1); + __ z_agr(Z_R1_scratch, Z_R0_scratch); + __ z_bcr(Assembler::bcondAlways, Z_R1_scratch); + + // Copy float arguments. + + Label farg_caselist; // Distance between each case has to be a power of 2 + // (= 1 << logSizeOfCase, padded with nop. + __ align(16); + BIND(farg_caselist); + __ z_ldr(Z_FARG1, floatSlot); // 2 bytes + __ z_bru(loop_start_restore); // 4 bytes + __ z_nop(); // 2 bytes + + __ z_ldr(Z_FARG2, floatSlot); + __ z_bru(loop_start_restore); + __ z_nop(); + + __ z_ldr(Z_FARG3, floatSlot); + __ z_bru(loop_start_restore); + __ z_nop(); + + __ z_ldr(Z_FARG4, floatSlot); + __ z_bru(loop_start_restore); + __ z_nop(); + + __ align(16); + __ bind(move_floatSlot_to_FARG); + __ z_stg(signature, d_signature, Z_SP); // Spill since signature == Z_R1_scratch. + __ z_lg(Z_R0_scratch, d_fpcnt, Z_SP); // Need old value for indexing. + __ add2mem_64(Address(Z_SP, d_fpcnt), 1, Z_R1_scratch); // Increment index. + __ z_larl(Z_R1_scratch, farg_caselist); + __ z_sllg(Z_R0_scratch, Z_R0_scratch, LogSizeOfCase); + __ z_agr(Z_R1_scratch, Z_R0_scratch); + __ z_bcr(Assembler::bcondAlways, Z_R1_scratch); + + BLOCK_COMMENT("} slow_signature_handler"); + + return __ addr_at(entry_offset); +} + +address TemplateInterpreterGenerator::generate_result_handler_for (BasicType type) { + address entry = __ pc(); + + assert(Z_tos == Z_RET, "Result handler: must move result!"); + assert(Z_ftos == Z_FRET, "Result handler: must move float result!"); + + switch (type) { + case T_BOOLEAN: + __ c2bool(Z_tos); + break; + case T_CHAR: + __ and_imm(Z_tos, 0xffff); + break; + case T_BYTE: + __ z_lbr(Z_tos, Z_tos); + break; + case T_SHORT: + __ z_lhr(Z_tos, Z_tos); + break; + case T_INT: + case T_LONG: + case T_VOID: + case T_FLOAT: + case T_DOUBLE: + break; + case T_OBJECT: + // Retrieve result from frame... + __ mem2reg_opt(Z_tos, Address(Z_fp, oop_tmp_offset)); + // and verify it. + __ verify_oop(Z_tos); + break; + default: + ShouldNotReachHere(); + } + __ z_br(Z_R14); // Return from result handler. + return entry; +} + +// Abstract method entry. +// Attempt to execute abstract method. Throw exception. +address TemplateInterpreterGenerator::generate_abstract_entry(void) { + unsigned int entry_offset = __ offset(); + + // Caller could be the call_stub or a compiled method (x86 version is wrong!). + + BLOCK_COMMENT("abstract_entry {"); + + // Implement call of InterpreterRuntime::throw_AbstractMethodError. + __ set_top_ijava_frame_at_SP_as_last_Java_frame(Z_SP, Z_R1); + __ save_return_pc(); // Save Z_R14. + __ push_frame_abi160(0); // Without new frame the RT call could overwrite the saved Z_R14. + + __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError), Z_thread); + + __ pop_frame(); + __ restore_return_pc(); // Restore Z_R14. + __ reset_last_Java_frame(); + + // Restore caller sp for c2i case. + __ resize_frame_absolute(Z_R10, Z_R0, true); // Cut the stack back to where the caller started. + + // branch to SharedRuntime::generate_forward_exception() which handles all possible callers, + // i.e. call stub, compiled method, interpreted method. + __ load_absolute_address(Z_tmp_1, StubRoutines::forward_exception_entry()); + __ z_br(Z_tmp_1); + + BLOCK_COMMENT("} abstract_entry"); + + return __ addr_at(entry_offset); +} + +address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { +#if INCLUDE_ALL_GCS + if (UseG1GC) { + // Inputs: + // Z_ARG1 - receiver + // + // What we do: + // - Load the referent field address. + // - Load the value in the referent field. + // - Pass that value to the pre-barrier. + // + // In the case of G1 this will record the value of the + // referent in an SATB buffer if marking is active. + // This will cause concurrent marking to mark the referent + // field as live. + + Register scratch1 = Z_tmp_2; + Register scratch2 = Z_tmp_3; + Register pre_val = Z_RET; // return value + // Z_esp is callers operand stack pointer, i.e. it points to the parameters. + Register Rargp = Z_esp; + + Label slow_path; + address entry = __ pc(); + + const int referent_offset = java_lang_ref_Reference::referent_offset; + guarantee(referent_offset > 0, "referent offset not initialized"); + + BLOCK_COMMENT("Reference_get {"); + + // If the receiver is null then it is OK to jump to the slow path. + __ load_and_test_long(pre_val, Address(Rargp, Interpreter::stackElementSize)); // Get receiver. + __ z_bre(slow_path); + + // Load the value of the referent field. + __ load_heap_oop(pre_val, referent_offset, pre_val); + + // Restore caller sp for c2i case. + __ resize_frame_absolute(Z_R10, Z_R0, true); // Cut the stack back to where the caller started. + + // Generate the G1 pre-barrier code to log the value of + // the referent field in an SATB buffer. + // Note: + // With these parameters the write_barrier_pre does not + // generate instructions to load the previous value. + __ g1_write_barrier_pre(noreg, // obj + noreg, // offset + pre_val, // pre_val + noreg, // no new val to preserve + scratch1, // tmp + scratch2, // tmp + true); // pre_val_needed + + __ z_br(Z_R14); + + // Branch to previously generated regular method entry. + __ bind(slow_path); + + address meth_entry = Interpreter::entry_for_kind(Interpreter::zerolocals); + __ jump_to_entry(meth_entry, Z_R1); + + BLOCK_COMMENT("} Reference_get"); + + return entry; + } +#endif // INCLUDE_ALL_GCS + + return NULL; +} + +address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { + address entry = __ pc(); + + DEBUG_ONLY(__ verify_esp(Z_esp, Z_ARG5)); + + // Restore bcp under the assumption that the current frame is still + // interpreted. + __ restore_bcp(); + + // Expression stack must be empty before entering the VM if an + // exception happened. + __ empty_expression_stack(); + // Throw exception. + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); + return entry; +} + +// +// Args: +// Z_ARG3: aberrant index +// +address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler(const char * name) { + address entry = __ pc(); + address excp = CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ArrayIndexOutOfBoundsException); + + // Expression stack must be empty before entering the VM if an + // exception happened. + __ empty_expression_stack(); + + // Setup parameters. + // Leave out the name and use register for array to create more detailed exceptions. + __ load_absolute_address(Z_ARG2, (address) name); + __ call_VM(noreg, excp, Z_ARG2, Z_ARG3); + return entry; +} + +address TemplateInterpreterGenerator::generate_ClassCastException_handler() { + address entry = __ pc(); + + // Object is at TOS. + __ pop_ptr(Z_ARG2); + + // Expression stack must be empty before entering the VM if an + // exception happened. + __ empty_expression_stack(); + + __ call_VM(Z_ARG1, + CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException), + Z_ARG2); + + DEBUG_ONLY(__ should_not_reach_here();) + + return entry; +} + +address TemplateInterpreterGenerator::generate_exception_handler_common(const char* name, const char* message, bool pass_oop) { + assert(!pass_oop || message == NULL, "either oop or message but not both"); + address entry = __ pc(); + + BLOCK_COMMENT("exception_handler_common {"); + + // Expression stack must be empty before entering the VM if an + // exception happened. + __ empty_expression_stack(); + if (name != NULL) { + __ load_absolute_address(Z_ARG2, (address)name); + } else { + __ clear_reg(Z_ARG2, true, false); + } + + if (pass_oop) { + __ call_VM(Z_tos, + CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), + Z_ARG2, Z_tos /*object (see TT::aastore())*/); + } else { + if (message != NULL) { + __ load_absolute_address(Z_ARG3, (address)message); + } else { + __ clear_reg(Z_ARG3, true, false); + } + __ call_VM(Z_tos, + CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), + Z_ARG2, Z_ARG3); + } + // Throw exception. + __ load_absolute_address(Z_R1_scratch, Interpreter::throw_exception_entry()); + __ z_br(Z_R1_scratch); + + BLOCK_COMMENT("} exception_handler_common"); + + return entry; +} + +// Unused, should never pass by. +address TemplateInterpreterGenerator::generate_continuation_for (TosState state) { + address entry = __ pc(); + __ should_not_reach_here(); + return entry; +} + +address TemplateInterpreterGenerator::generate_return_entry_for (TosState state, int step, size_t index_size) { + address entry = __ pc(); + + BLOCK_COMMENT("return_entry {"); + + // Pop i2c extension or revert top-2-parent-resize done by interpreted callees. + Register sp_before_i2c_extension = Z_bcp; + __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // Restore frame pointer. + __ z_lg(sp_before_i2c_extension, Address(Z_fp, _z_ijava_state_neg(top_frame_sp))); + __ resize_frame_absolute(sp_before_i2c_extension, Z_locals/*tmp*/, true/*load_fp*/); + + // TODO(ZASM): necessary?? + // // and NULL it as marker that esp is now tos until next java call + // __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD); + + __ restore_bcp(); + __ restore_locals(); + __ restore_esp(); + + if (state == atos) { + __ profile_return_type(Z_tmp_1, Z_tos, Z_tmp_2); + } + + Register cache = Z_tmp_1; + Register size = Z_tmp_1; + Register offset = Z_tmp_2; + const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::flags_offset()); + __ get_cache_and_index_at_bcp(cache, offset, 1, index_size); + + // #args is in rightmost byte of the _flags field. + __ z_llgc(size, Address(cache, offset, flags_offset+(sizeof(size_t)-1))); + __ z_sllg(size, size, Interpreter::logStackElementSize); // Each argument size in bytes. + __ z_agr(Z_esp, size); // Pop arguments. + __ dispatch_next(state, step); + + BLOCK_COMMENT("} return_entry"); + + return entry; +} + +address TemplateInterpreterGenerator::generate_deopt_entry_for (TosState state, + int step) { + address entry = __ pc(); + + BLOCK_COMMENT("deopt_entry {"); + + // TODO(ZASM): necessary? NULL last_sp until next java call + // __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD); + __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // Restore frame pointer. + __ restore_bcp(); + __ restore_locals(); + __ restore_esp(); + + // Handle exceptions. + { + Label L; + __ load_and_test_long(Z_R0/*pending_exception*/, thread_(pending_exception)); + __ z_bre(L); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + __ dispatch_next(state, step); + + BLOCK_COMMENT("} deopt_entry"); + + return entry; +} + +address TemplateInterpreterGenerator::generate_safept_entry_for (TosState state, + address runtime_entry) { + address entry = __ pc(); + __ push(state); + __ call_VM(noreg, runtime_entry); + __ dispatch_via(vtos, Interpreter::_normal_table.table_for (vtos)); + return entry; +} + +// +// Helpers for commoning out cases in the various type of method entries. +// + +// Increment invocation count & check for overflow. +// +// Note: checking for negative value instead of overflow +// so we have a 'sticky' overflow test. +// +// Z_ARG2: method (see generate_fixed_frame()) +// +void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue) { + Label done; + Register method = Z_ARG2; // Generate_fixed_frame() copies Z_method into Z_ARG2. + Register m_counters = Z_ARG4; + + BLOCK_COMMENT("counter_incr {"); + + // Note: In tiered we increment either counters in method or in MDO depending + // if we are profiling or not. + if (TieredCompilation) { + int increment = InvocationCounter::count_increment; + if (ProfileInterpreter) { + NearLabel no_mdo; + Register mdo = m_counters; + // Are we profiling? + __ load_and_test_long(mdo, method2_(method, method_data)); + __ branch_optimized(Assembler::bcondZero, no_mdo); + // Increment counter in the MDO. + const Address mdo_invocation_counter(mdo, MethodData::invocation_counter_offset() + + InvocationCounter::counter_offset()); + const Address mask(mdo, MethodData::invoke_mask_offset()); + __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, + Z_R1_scratch, false, Assembler::bcondZero, + overflow); + __ z_bru(done); + __ bind(no_mdo); + } + + // Increment counter in MethodCounters. + const Address invocation_counter(m_counters, + MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset()); + // Get address of MethodCounters object. + __ get_method_counters(method, m_counters, done); + const Address mask(m_counters, MethodCounters::invoke_mask_offset()); + __ increment_mask_and_jump(invocation_counter, + increment, mask, + Z_R1_scratch, false, Assembler::bcondZero, + overflow); + } else { + Register counter_sum = Z_ARG3; // The result of this piece of code. + Register tmp = Z_R1_scratch; +#ifdef ASSERT + { + NearLabel ok; + __ get_method(tmp); + __ compare64_and_branch(method, tmp, Assembler::bcondEqual, ok); + __ z_illtrap(0x66); + __ bind(ok); + } +#endif + + // Get address of MethodCounters object. + __ get_method_counters(method, m_counters, done); + // Update standard invocation counters. + __ increment_invocation_counter(m_counters, counter_sum); + if (ProfileInterpreter) { + __ add2mem_32(Address(m_counters, MethodCounters::interpreter_invocation_counter_offset()), 1, tmp); + if (profile_method != NULL) { + const Address profile_limit(m_counters, MethodCounters::interpreter_profile_limit_offset()); + __ z_cl(counter_sum, profile_limit); + __ branch_optimized(Assembler::bcondLow, *profile_method_continue); + // If no method data exists, go to profile_method. + __ test_method_data_pointer(tmp, *profile_method); + } + } + + const Address invocation_limit(m_counters, MethodCounters::interpreter_invocation_limit_offset()); + __ z_cl(counter_sum, invocation_limit); + __ branch_optimized(Assembler::bcondNotLow, *overflow); + } + + __ bind(done); + + BLOCK_COMMENT("} counter_incr"); +} + +void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) { + // InterpreterRuntime::frequency_counter_overflow takes two + // arguments, the first (thread) is passed by call_VM, the second + // indicates if the counter overflow occurs at a backwards branch + // (NULL bcp). We pass zero for it. The call returns the address + // of the verified entry point for the method or NULL if the + // compilation did not complete (either went background or bailed + // out). + __ clear_reg(Z_ARG2); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), + Z_ARG2); + __ z_bru(do_continue); +} + +void TemplateInterpreterGenerator::generate_stack_overflow_check(Register frame_size, Register tmp1) { + Register tmp2 = Z_R1_scratch; + const int page_size = os::vm_page_size(); + NearLabel after_frame_check; + + BLOCK_COMMENT("counter_overflow {"); + + assert_different_registers(frame_size, tmp1); + + // Stack banging is sufficient overflow check if frame_size < page_size. + if (Immediate::is_uimm(page_size, 15)) { + __ z_chi(frame_size, page_size); + __ z_brl(after_frame_check); + } else { + __ load_const_optimized(tmp1, page_size); + __ compareU32_and_branch(frame_size, tmp1, Assembler::bcondLow, after_frame_check); + } + + // Get the stack base, and in debug, verify it is non-zero. + __ z_lg(tmp1, thread_(stack_base)); +#ifdef ASSERT + address reentry = NULL; + NearLabel base_not_zero; + __ compareU64_and_branch(tmp1, (intptr_t)0L, Assembler::bcondNotEqual, base_not_zero); + reentry = __ stop_chain_static(reentry, "stack base is zero in generate_stack_overflow_check"); + __ bind(base_not_zero); +#endif + + // Get the stack size, and in debug, verify it is non-zero. + assert(sizeof(size_t) == sizeof(intptr_t), "wrong load size"); + __ z_lg(tmp2, thread_(stack_size)); +#ifdef ASSERT + NearLabel size_not_zero; + __ compareU64_and_branch(tmp2, (intptr_t)0L, Assembler::bcondNotEqual, size_not_zero); + reentry = __ stop_chain_static(reentry, "stack size is zero in generate_stack_overflow_check"); + __ bind(size_not_zero); +#endif + + // Compute the beginning of the protected zone minus the requested frame size. + __ z_sgr(tmp1, tmp2); + __ add2reg(tmp1, JavaThread::stack_guard_zone_size()); + + // Add in the size of the frame (which is the same as subtracting it from the + // SP, which would take another register. + __ z_agr(tmp1, frame_size); + + // The frame is greater than one page in size, so check against + // the bottom of the stack. + __ compareU64_and_branch(Z_SP, tmp1, Assembler::bcondHigh, after_frame_check); + + // The stack will overflow, throw an exception. + + // Restore SP to sender's sp. This is necessary if the sender's frame is an + // extended compiled frame (see gen_c2i_adapter()) and safer anyway in case of + // JSR292 adaptations. + __ resize_frame_absolute(Z_R10, tmp1, true/*load_fp*/); + + // Note also that the restored frame is not necessarily interpreted. + // Use the shared runtime version of the StackOverflowError. + assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); + AddressLiteral stub(StubRoutines::throw_StackOverflowError_entry()); + __ load_absolute_address(tmp1, StubRoutines::throw_StackOverflowError_entry()); + __ z_br(tmp1); + + // If you get to here, then there is enough stack space. + __ bind(after_frame_check); + + BLOCK_COMMENT("} counter_overflow"); +} + +// Allocate monitor and lock method (asm interpreter). +// +// Args: +// Z_locals: locals + +void TemplateInterpreterGenerator::lock_method(void) { + + BLOCK_COMMENT("lock_method {"); + + // Synchronize method. + const Register method = Z_tmp_2; + __ get_method(method); + +#ifdef ASSERT + address reentry = NULL; + { + Label L; + __ testbit(method2_(method, access_flags), JVM_ACC_SYNCHRONIZED_BIT); + __ z_btrue(L); + reentry = __ stop_chain_static(reentry, "method doesn't need synchronization"); + __ bind(L); + } +#endif // ASSERT + + // Get synchronization object. + const Register object = Z_tmp_2; + + { + Label done; + Label static_method; + + __ testbit(method2_(method, access_flags), JVM_ACC_STATIC_BIT); + __ z_btrue(static_method); + + // non-static method: Load receiver obj from stack. + __ mem2reg_opt(object, Address(Z_locals, Interpreter::local_offset_in_bytes(0))); + __ z_bru(done); + + __ bind(static_method); + + // Lock the java mirror. + __ load_mirror(object, method); +#ifdef ASSERT + { + NearLabel L; + __ compare64_and_branch(object, (intptr_t) 0, Assembler::bcondNotEqual, L); + reentry = __ stop_chain_static(reentry, "synchronization object is NULL"); + __ bind(L); + } +#endif // ASSERT + + __ bind(done); + } + + __ add_monitor_to_stack(true, Z_ARG3, Z_ARG4, Z_ARG5); // Allocate monitor elem. + // Store object and lock it. + __ get_monitors(Z_tmp_1); + __ reg2mem_opt(object, Address(Z_tmp_1, BasicObjectLock::obj_offset_in_bytes())); + __ lock_object(Z_tmp_1, object); + + BLOCK_COMMENT("} lock_method"); +} + +// Generate a fixed interpreter frame. This is identical setup for +// interpreted methods and for native methods hence the shared code. +// +// Registers alive +// Z_thread - JavaThread* +// Z_SP - old stack pointer +// Z_method - callee's method +// Z_esp - parameter list (slot 'above' last param) +// Z_R14 - return pc, to be stored in caller's frame +// Z_R10 - sender sp, note: Z_tmp_1 is Z_R10! +// +// Registers updated +// Z_SP - new stack pointer +// Z_esp - callee's operand stack pointer +// points to the slot above the value on top +// Z_locals - used to access locals: locals[i] := *(Z_locals - i*BytesPerWord) +// Z_bcp - the bytecode pointer +// Z_fp - the frame pointer, thereby killing Z_method +// Z_ARG2 - copy of Z_method +// +void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { + + // stack layout + // + // F1 [TOP_IJAVA_FRAME_ABI] <-- Z_SP, Z_R10 (see note below) + // [F1's operand stack (unused)] + // [F1's outgoing Java arguments] <-- Z_esp + // [F1's operand stack (non args)] + // [monitors] (optional) + // [IJAVA_STATE] + // + // F2 [PARENT_IJAVA_FRAME_ABI] + // ... + // + // 0x000 + // + // Note: Z_R10, the sender sp, will be below Z_SP if F1 was extended by a c2i adapter. + + //============================================================================= + // Allocate space for locals other than the parameters, the + // interpreter state, monitors, and the expression stack. + + const Register local_count = Z_ARG5; + const Register fp = Z_tmp_2; + + BLOCK_COMMENT("generate_fixed_frame {"); + + { + // local registers + const Register top_frame_size = Z_ARG2; + const Register sp_after_resize = Z_ARG3; + const Register max_stack = Z_ARG4; + + // local_count = method->constMethod->max_locals(); + __ z_lg(Z_R1_scratch, Address(Z_method, Method::const_offset())); + __ z_llgh(local_count, Address(Z_R1_scratch, ConstMethod::size_of_locals_offset())); + + if (native_call) { + // If we're calling a native method, we replace max_stack (which is + // zero) with space for the worst-case signature handler varargs + // vector, which is: + // max_stack = max(Argument::n_register_parameters, parameter_count+2); + // + // We add two slots to the parameter_count, one for the jni + // environment and one for a possible native mirror. We allocate + // space for at least the number of ABI registers, even though + // InterpreterRuntime::slow_signature_handler won't write more than + // parameter_count+2 words when it creates the varargs vector at the + // top of the stack. The generated slow signature handler will just + // load trash into registers beyond the necessary number. We're + // still going to cut the stack back by the ABI register parameter + // count so as to get SP+16 pointing at the ABI outgoing parameter + // area, so we need to allocate at least that much even though we're + // going to throw it away. + // + + __ z_lg(Z_R1_scratch, Address(Z_method, Method::const_offset())); + __ z_llgh(max_stack, Address(Z_R1_scratch, ConstMethod::size_of_parameters_offset())); + __ add2reg(max_stack, 2); + + NearLabel passing_args_on_stack; + + // max_stack in bytes + __ z_sllg(max_stack, max_stack, LogBytesPerWord); + + int argument_registers_in_bytes = Argument::n_register_parameters << LogBytesPerWord; + __ compare64_and_branch(max_stack, argument_registers_in_bytes, Assembler::bcondNotLow, passing_args_on_stack); + + __ load_const_optimized(max_stack, argument_registers_in_bytes); + + __ bind(passing_args_on_stack); + } else { + // !native_call + __ z_lg(max_stack, method_(const)); + + // Calculate number of non-parameter locals (in slots): + __ z_lg(Z_R1_scratch, Address(Z_method, Method::const_offset())); + __ z_sh(local_count, Address(Z_R1_scratch, ConstMethod::size_of_parameters_offset())); + + // max_stack = method->max_stack(); + __ z_llgh(max_stack, Address(max_stack, ConstMethod::max_stack_offset())); + // max_stack in bytes + __ z_sllg(max_stack, max_stack, LogBytesPerWord); + } + + // Resize (i.e. normally shrink) the top frame F1 ... + // F1 [TOP_IJAVA_FRAME_ABI] <-- Z_SP, Z_R10 + // F1's operand stack (free) + // ... + // F1's operand stack (free) <-- Z_esp + // F1's outgoing Java arg m + // ... + // F1's outgoing Java arg 0 + // ... + // + // ... into a parent frame (Z_R10 holds F1's SP before any modification, see also above) + // + // +......................+ + // : : <-- Z_R10, saved below as F0's z_ijava_state.sender_sp + // : : + // F1 [PARENT_IJAVA_FRAME_ABI] <-- Z_SP \ + // F0's non arg local | = delta + // ... | + // F0's non arg local <-- Z_esp / + // F1's outgoing Java arg m + // ... + // F1's outgoing Java arg 0 + // ... + // + // then push the new top frame F0. + // + // F0 [TOP_IJAVA_FRAME_ABI] = frame::z_top_ijava_frame_abi_size \ + // [operand stack] = max_stack | = top_frame_size + // [IJAVA_STATE] = frame::z_ijava_state_size / + + // sp_after_resize = Z_esp - delta + // + // delta = PARENT_IJAVA_FRAME_ABI + (locals_count - params_count) + + __ add2reg(sp_after_resize, (Interpreter::stackElementSize) - (frame::z_parent_ijava_frame_abi_size), Z_esp); + __ z_sllg(Z_R0_scratch, local_count, LogBytesPerWord); // Params have already been subtracted from local_count. + __ z_slgr(sp_after_resize, Z_R0_scratch); + + // top_frame_size = TOP_IJAVA_FRAME_ABI + max_stack + size of interpreter state + __ add2reg(top_frame_size, + frame::z_top_ijava_frame_abi_size + + frame::z_ijava_state_size + + frame::interpreter_frame_monitor_size() * wordSize, + max_stack); + + // Check if there's room for the new frame... + Register frame_size = max_stack; // Reuse the regiser for max_stack. + __ z_lgr(frame_size, Z_SP); + __ z_sgr(frame_size, sp_after_resize); + __ z_agr(frame_size, top_frame_size); + generate_stack_overflow_check(frame_size, fp/*tmp1*/); + + DEBUG_ONLY(__ z_cg(Z_R14, _z_abi16(return_pc), Z_SP)); + __ asm_assert_eq("killed Z_R14", 0); + __ resize_frame_absolute(sp_after_resize, fp, true); + __ save_return_pc(Z_R14); + + // ... and push the new frame F0. + __ push_frame(top_frame_size, fp, true /*copy_sp*/, false); + } + + //============================================================================= + // Initialize the new frame F0: initialize interpreter state. + + { + // locals + const Register local_addr = Z_ARG4; + + BLOCK_COMMENT("generate_fixed_frame: initialize interpreter state {"); + +#ifdef ASSERT + // Set the magic number (using local_addr as tmp register). + __ load_const_optimized(local_addr, frame::z_istate_magic_number); + __ z_stg(local_addr, _z_ijava_state_neg(magic), fp); +#endif + + // Save sender SP from F1 (i.e. before it was potentially modified by an + // adapter) into F0's interpreter state. We us it as well to revert + // resizing the frame above. + __ z_stg(Z_R10, _z_ijava_state_neg(sender_sp), fp); + + // Load cp cache and save it at the and of this block. + __ z_lg(Z_R1_scratch, Address(Z_method, Method::const_offset())); + __ z_lg(Z_R1_scratch, Address(Z_R1_scratch, ConstMethod::constants_offset())); + __ z_lg(Z_R1_scratch, Address(Z_R1_scratch, ConstantPool::cache_offset_in_bytes())); + + // z_ijava_state->method = method; + __ z_stg(Z_method, _z_ijava_state_neg(method), fp); + + // Point locals at the first argument. Method's locals are the + // parameters on top of caller's expression stack. + // Tos points past last Java argument. + + __ z_lg(Z_locals, Address(Z_method, Method::const_offset())); + __ z_llgh(Z_locals /*parameter_count words*/, + Address(Z_locals, ConstMethod::size_of_parameters_offset())); + __ z_sllg(Z_locals /*parameter_count bytes*/, Z_locals /*parameter_count*/, LogBytesPerWord); + __ z_agr(Z_locals, Z_esp); + // z_ijava_state->locals - i*BytesPerWord points to i-th Java local (i starts at 0) + // z_ijava_state->locals = Z_esp + parameter_count bytes + __ z_stg(Z_locals, _z_ijava_state_neg(locals), fp); + + // z_ijava_state->oop_temp = NULL; + __ store_const(Address(fp, oop_tmp_offset), 0); + + // Initialize z_ijava_state->mdx. + Register Rmdp = Z_bcp; + // native_call: assert that mdo == NULL + const bool check_for_mdo = !native_call DEBUG_ONLY(|| native_call); + if (ProfileInterpreter && check_for_mdo) { +#ifdef FAST_DISPATCH + // FAST_DISPATCH and ProfileInterpreter are mutually exclusive since + // they both use I2. + assert(0, "FAST_DISPATCH and +ProfileInterpreter are mutually exclusive"); +#endif // FAST_DISPATCH + Label get_continue; + + __ load_and_test_long(Rmdp, method_(method_data)); + __ z_brz(get_continue); + DEBUG_ONLY(if (native_call) __ stop("native methods don't have a mdo")); + __ add2reg(Rmdp, in_bytes(MethodData::data_offset())); + __ bind(get_continue); + } + __ z_stg(Rmdp, _z_ijava_state_neg(mdx), fp); + + // Initialize z_ijava_state->bcp and Z_bcp. + if (native_call) { + __ clear_reg(Z_bcp); // Must initialize. Will get written into frame where GC reads it. + } else { + __ z_lg(Z_bcp, method_(const)); + __ add2reg(Z_bcp, in_bytes(ConstMethod::codes_offset())); + } + __ z_stg(Z_bcp, _z_ijava_state_neg(bcp), fp); + + // no monitors and empty operand stack + // => z_ijava_state->monitors points to the top slot in IJAVA_STATE. + // => Z_ijava_state->esp points one slot above into the operand stack. + // z_ijava_state->monitors = fp - frame::z_ijava_state_size - Interpreter::stackElementSize; + // z_ijava_state->esp = Z_esp = z_ijava_state->monitors; + __ add2reg(Z_esp, -frame::z_ijava_state_size, fp); + __ z_stg(Z_esp, _z_ijava_state_neg(monitors), fp); + __ add2reg(Z_esp, -Interpreter::stackElementSize); + __ z_stg(Z_esp, _z_ijava_state_neg(esp), fp); + + // z_ijava_state->cpoolCache = Z_R1_scratch (see load above); + __ z_stg(Z_R1_scratch, _z_ijava_state_neg(cpoolCache), fp); + + // Get mirror and store it in the frame as GC root for this Method*. + __ load_mirror(Z_R1_scratch, Z_method); + __ z_stg(Z_R1_scratch, _z_ijava_state_neg(mirror), fp); + + BLOCK_COMMENT("} generate_fixed_frame: initialize interpreter state"); + + //============================================================================= + if (!native_call) { + // Fill locals with 0x0s. + NearLabel locals_zeroed; + NearLabel doXC; + + // Local_count is already num_locals_slots - num_param_slots. + __ compare64_and_branch(local_count, (intptr_t)0L, Assembler::bcondNotHigh, locals_zeroed); + + // Advance local_addr to point behind locals (creates positive incr. in loop). + __ z_lg(Z_R1_scratch, Address(Z_method, Method::const_offset())); + __ z_llgh(Z_R0_scratch, + Address(Z_R1_scratch, ConstMethod::size_of_locals_offset())); + if (Z_R0_scratch == Z_R0) { + __ z_aghi(Z_R0_scratch, -1); + } else { + __ add2reg(Z_R0_scratch, -1); + } + __ z_lgr(local_addr/*locals*/, Z_locals); + __ z_sllg(Z_R0_scratch, Z_R0_scratch, LogBytesPerWord); + __ z_sllg(local_count, local_count, LogBytesPerWord); // Local_count are non param locals. + __ z_sgr(local_addr, Z_R0_scratch); + + if (VM_Version::has_Prefetch()) { + __ z_pfd(0x02, 0, Z_R0, local_addr); + __ z_pfd(0x02, 256, Z_R0, local_addr); + } + + // Can't optimise for Z10 using "compare and branch" (immediate value is too big). + __ z_cghi(local_count, 256); + __ z_brnh(doXC); + + // MVCLE: Initialize if quite a lot locals. + // __ bind(doMVCLE); + __ z_lgr(Z_R0_scratch, local_addr); + __ z_lgr(Z_R1_scratch, local_count); + __ clear_reg(Z_ARG2); // Src len of MVCLE is zero. + + __ MacroAssembler::move_long_ext(Z_R0_scratch, Z_ARG1, 0); + __ z_bru(locals_zeroed); + + Label XC_template; + __ bind(XC_template); + __ z_xc(0, 0, local_addr, 0, local_addr); + + __ bind(doXC); + __ z_bctgr(local_count, Z_R0); // Get #bytes-1 for EXECUTE. + if (VM_Version::has_ExecuteExtensions()) { + __ z_exrl(local_count, XC_template); // Execute XC with variable length. + } else { + __ z_larl(Z_R1_scratch, XC_template); + __ z_ex(local_count, 0, Z_R0, Z_R1_scratch); // Execute XC with variable length. + } + + __ bind(locals_zeroed); + } + + } + // Finally set the frame pointer, destroying Z_method. + assert(Z_fp == Z_method, "maybe set Z_fp earlier if other register than Z_method"); + // Oprofile analysis suggests to keep a copy in a register to be used by + // generate_counter_incr(). + __ z_lgr(Z_ARG2, Z_method); + __ z_lgr(Z_fp, fp); + + BLOCK_COMMENT("} generate_fixed_frame"); +} + +// Various method entries + +// Math function, frame manager must set up an interpreter state, etc. +address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { + + if (!InlineIntrinsics) { return NULL; } // Generate a vanilla entry. + + // Only support absolute value and square root. + if (kind != Interpreter::java_lang_math_abs && kind != Interpreter::java_lang_math_sqrt) { + return NULL; + } + + BLOCK_COMMENT("math_entry {"); + + address math_entry = __ pc(); + + if (kind == Interpreter::java_lang_math_abs) { + // Load operand from stack. + __ mem2freg_opt(Z_FRET, Address(Z_esp, Interpreter::stackElementSize)); + __ z_lpdbr(Z_FRET); + } else { + // sqrt + // Can use memory operand directly. + __ z_sqdb(Z_FRET, Interpreter::stackElementSize, Z_esp); + } + + // Restore caller sp for c2i case. + __ resize_frame_absolute(Z_R10, Z_R0, true); // Cut the stack back to where the caller started. + + // We are done, return. + __ z_br(Z_R14); + + BLOCK_COMMENT("} math_entry"); + + return math_entry; +} + +// Interpreter stub for calling a native method. (asm interpreter). +// This sets up a somewhat different looking stack for calling the +// native method than the typical interpreter frame setup. +address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { + // Determine code generation flags. + bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + + // Interpreter entry for ordinary Java methods. + // + // Registers alive + // Z_SP - stack pointer + // Z_thread - JavaThread* + // Z_method - callee's method (method to be invoked) + // Z_esp - operand (or expression) stack pointer of caller. one slot above last arg. + // Z_R10 - sender sp (before modifications, e.g. by c2i adapter + // and as well by generate_fixed_frame below) + // Z_R14 - return address to caller (call_stub or c2i_adapter) + // + // Registers updated + // Z_SP - stack pointer + // Z_fp - callee's framepointer + // Z_esp - callee's operand stack pointer + // points to the slot above the value on top + // Z_locals - used to access locals: locals[i] := *(Z_locals - i*BytesPerWord) + // Z_tos - integer result, if any + // z_ftos - floating point result, if any + // + // Stack layout at this point: + // + // F1 [TOP_IJAVA_FRAME_ABI] <-- Z_SP, Z_R10 (Z_R10 will be below Z_SP if + // frame was extended by c2i adapter) + // [outgoing Java arguments] <-- Z_esp + // ... + // PARENT [PARENT_IJAVA_FRAME_ABI] + // ... + // + + address entry_point = __ pc(); + + // Make sure registers are different! + assert_different_registers(Z_thread, Z_method, Z_esp); + + BLOCK_COMMENT("native_entry {"); + + // Make sure method is native and not abstract. +#ifdef ASSERT + address reentry = NULL; + { Label L; + __ testbit(method_(access_flags), JVM_ACC_NATIVE_BIT); + __ z_btrue(L); + reentry = __ stop_chain_static(reentry, "tried to execute non-native method as native"); + __ bind(L); + } + { Label L; + __ testbit(method_(access_flags), JVM_ACC_ABSTRACT_BIT); + __ z_bfalse(L); + reentry = __ stop_chain_static(reentry, "tried to execute abstract method as non-abstract"); + __ bind(L); + } +#endif // ASSERT + +#ifdef ASSERT + // Save the return PC into the callers frame for assertion in generate_fixed_frame. + __ save_return_pc(Z_R14); +#endif + + // Generate the code to allocate the interpreter stack frame. + generate_fixed_frame(true); + + const Address do_not_unlock_if_synchronized(Z_thread, JavaThread::do_not_unlock_if_synchronized_offset()); + // Since at this point in the method invocation the exception handler + // would try to exit the monitor of synchronized methods which hasn't + // been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. If any exception was thrown by + // runtime, exception handling i.e. unlock_if_synchronized_method will + // check this thread local flag. + __ z_mvi(do_not_unlock_if_synchronized, true); + + // Increment invocation count and check for overflow. + NearLabel invocation_counter_overflow; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow, NULL, NULL); + } + + Label continue_after_compile; + __ bind(continue_after_compile); + + bang_stack_shadow_pages(true); + + // Reset the _do_not_unlock_if_synchronized flag. + __ z_mvi(do_not_unlock_if_synchronized, false); + + // Check for synchronized methods. + // This mst happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + if (synchronized) { + lock_method(); + } else { + // No synchronization necessary. +#ifdef ASSERT + { Label L; + __ get_method(Z_R1_scratch); + __ testbit(method2_(Z_R1_scratch, access_flags), JVM_ACC_SYNCHRONIZED_BIT); + __ z_bfalse(L); + reentry = __ stop_chain_static(reentry, "method needs synchronization"); + __ bind(L); + } +#endif // ASSERT + } + + // start execution + + // jvmti support + __ notify_method_entry(); + + //============================================================================= + // Get and call the signature handler. + const Register Rmethod = Z_tmp_2; + const Register signature_handler_entry = Z_tmp_1; + const Register Rresult_handler = Z_tmp_3; + Label call_signature_handler; + + assert_different_registers(Z_fp, Rmethod, signature_handler_entry, Rresult_handler); + assert(Rresult_handler->is_nonvolatile(), "Rresult_handler must be in a non-volatile register"); + + // Reload method. + __ get_method(Rmethod); + + // Check for signature handler. + __ load_and_test_long(signature_handler_entry, method2_(Rmethod, signature_handler)); + __ z_brne(call_signature_handler); + + // Method has never been called. Either generate a specialized + // handler or point to the slow one. + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), + Rmethod); + + // Reload method. + __ get_method(Rmethod); + + // Reload signature handler, it must have been created/assigned in the meantime. + __ z_lg(signature_handler_entry, method2_(Rmethod, signature_handler)); + + __ bind(call_signature_handler); + + // We have a TOP_IJAVA_FRAME here, which belongs to us. + __ set_top_ijava_frame_at_SP_as_last_Java_frame(Z_SP, Z_R1/*tmp*/); + + // Call signature handler and pass locals address in Z_ARG1. + __ z_lgr(Z_ARG1, Z_locals); + __ call_stub(signature_handler_entry); + // Save result handler returned by signature handler. + __ z_lgr(Rresult_handler, Z_RET); + + // Reload method (the slow signature handler may block for GC). + __ get_method(Rmethod); + + // Pass mirror handle if static call. + { + Label method_is_not_static; + __ testbit(method2_(Rmethod, access_flags), JVM_ACC_STATIC_BIT); + __ z_bfalse(method_is_not_static); + // Get mirror. + __ load_mirror(Z_R1, Rmethod); + // z_ijava_state.oop_temp = pool_holder->klass_part()->java_mirror(); + __ z_stg(Z_R1, oop_tmp_offset, Z_fp); + // Pass handle to mirror as 2nd argument to JNI method. + __ add2reg(Z_ARG2, oop_tmp_offset, Z_fp); + __ bind(method_is_not_static); + } + + // Pass JNIEnv address as first parameter. + __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread); + + // Note: last java frame has been set above already. The pc from there + // is precise enough. + + // Get native function entry point before we change the thread state. + __ z_lg(Z_R1/*native_method_entry*/, method2_(Rmethod, native_function)); + + //============================================================================= + // Transition from _thread_in_Java to _thread_in_native. As soon as + // we make this change the safepoint code needs to be certain that + // the last Java frame we established is good. The pc in that frame + // just need to be near here not an actual return address. +#ifdef ASSERT + { + NearLabel L; + __ mem2reg_opt(Z_R14, Address(Z_thread, JavaThread::thread_state_offset()), false /*32 bits*/); + __ compareU32_and_branch(Z_R14, _thread_in_Java, Assembler::bcondEqual, L); + reentry = __ stop_chain_static(reentry, "Wrong thread state in native stub"); + __ bind(L); + } +#endif + + // Memory ordering: Z does not reorder store/load with subsequent load. That's strong enough. + __ set_thread_state(_thread_in_native); + + //============================================================================= + // Call the native method. Argument registers must not have been + // overwritten since "__ call_stub(signature_handler);" (except for + // ARG1 and ARG2 for static methods). + + __ call_c(Z_R1/*native_method_entry*/); + + // NOTE: frame::interpreter_frame_result() depends on these stores. + __ z_stg(Z_RET, _z_ijava_state_neg(lresult), Z_fp); + __ freg2mem_opt(Z_FRET, Address(Z_fp, _z_ijava_state_neg(fresult))); + const Register Rlresult = signature_handler_entry; + assert(Rlresult->is_nonvolatile(), "Rlresult must be in a non-volatile register"); + __ z_lgr(Rlresult, Z_RET); + + // Z_method may no longer be valid, because of GC. + + // Block, if necessary, before resuming in _thread_in_Java state. + // In order for GC to work, don't clear the last_Java_sp until after + // blocking. + + //============================================================================= + // Switch thread to "native transition" state before reading the + // synchronization state. This additional state is necessary + // because reading and testing the synchronization state is not + // atomic w.r.t. GC, as this scenario demonstrates: Java thread A, + // in _thread_in_native state, loads _not_synchronized and is + // preempted. VM thread changes sync state to synchronizing and + // suspends threads for GC. Thread A is resumed to finish this + // native method, but doesn't block here since it didn't see any + // synchronization is progress, and escapes. + + __ set_thread_state(_thread_in_native_trans); + if (UseMembar) { + __ z_fence(); + } else { + // Write serialization page so VM thread can do a pseudo remote + // membar. We use the current thread pointer to calculate a thread + // specific offset to write to within the page. This minimizes bus + // traffic due to cache line collision. + __ serialize_memory(Z_thread, Z_R1, Z_R0); + } + // Now before we return to java we must look for a current safepoint + // (a new safepoint can not start since we entered native_trans). + // We must check here because a current safepoint could be modifying + // the callers registers right this moment. + + // Check for safepoint operation in progress and/or pending suspend requests. + { + Label Continue, do_safepoint; + __ generate_safepoint_check(do_safepoint, Z_R1, true); + // Check for suspend. + __ load_and_test_int(Z_R0/*suspend_flags*/, thread_(suspend_flags)); + __ z_bre(Continue); // 0 -> no flag set -> not suspended + __ bind(do_safepoint); + __ z_lgr(Z_ARG1, Z_thread); + __ call_c(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)); + __ bind(Continue); + } + + //============================================================================= + // Back in Interpreter Frame. + + // We are in thread_in_native_trans here and back in the normal + // interpreter frame. We don't have to do anything special about + // safepoints and we can switch to Java mode anytime we are ready. + + // Note: frame::interpreter_frame_result has a dependency on how the + // method result is saved across the call to post_method_exit. For + // native methods it assumes that the non-FPU/non-void result is + // saved in z_ijava_state.lresult and a FPU result in z_ijava_state.fresult. If + // this changes then the interpreter_frame_result implementation + // will need to be updated too. + + //============================================================================= + // Back in Java. + + // Memory ordering: Z does not reorder store/load with subsequent + // load. That's strong enough. + __ set_thread_state(_thread_in_Java); + + __ reset_last_Java_frame(); + + // We reset the JNI handle block only after unboxing the result; see below. + + // The method register is junk from after the thread_in_native transition + // until here. Also can't call_VM until the bcp has been + // restored. Need bcp for throwing exception below so get it now. + __ get_method(Rmethod); + + // Restore Z_bcp to have legal interpreter frame, + // i.e., bci == 0 <=> Z_bcp == code_base(). + __ z_lg(Z_bcp, Address(Rmethod, Method::const_offset())); // get constMethod + __ add2reg(Z_bcp, in_bytes(ConstMethod::codes_offset())); // get codebase + + if (CheckJNICalls) { + // clear_pending_jni_exception_check + __ clear_mem(Address(Z_thread, JavaThread::pending_jni_exception_check_fn_offset()), sizeof(oop)); + } + + // Check if the native method returns an oop, and if so, move it + // from the jni handle to z_ijava_state.oop_temp. This is + // necessary, because we reset the jni handle block below. + // NOTE: frame::interpreter_frame_result() depends on this, too. + { NearLabel no_oop_result, store_oop_result; + __ load_absolute_address(Z_R1, AbstractInterpreter::result_handler(T_OBJECT)); + __ compareU64_and_branch(Z_R1, Rresult_handler, Assembler::bcondNotEqual, no_oop_result); + __ compareU64_and_branch(Rlresult, (intptr_t)0L, Assembler::bcondEqual, store_oop_result); + __ z_lg(Rlresult, 0, Rlresult); // unbox + __ bind(store_oop_result); + __ z_stg(Rlresult, oop_tmp_offset, Z_fp); + __ verify_oop(Rlresult); + __ bind(no_oop_result); + } + + // Reset handle block. + __ z_lg(Z_R1/*active_handles*/, thread_(active_handles)); + __ clear_mem(Address(Z_R1, JNIHandleBlock::top_offset_in_bytes()), 4); + + // Bandle exceptions (exception handling will handle unlocking!). + { + Label L; + __ load_and_test_long(Z_R0/*pending_exception*/, thread_(pending_exception)); + __ z_bre(L); + __ MacroAssembler::call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + + if (synchronized) { + Register Rfirst_monitor = Z_ARG2; + __ add2reg(Rfirst_monitor, -(frame::z_ijava_state_size + (int)sizeof(BasicObjectLock)), Z_fp); +#ifdef ASSERT + NearLabel ok; + __ z_lg(Z_R1, _z_ijava_state_neg(monitors), Z_fp); + __ compareU64_and_branch(Rfirst_monitor, Z_R1, Assembler::bcondEqual, ok); + reentry = __ stop_chain_static(reentry, "native_entry:unlock: inconsistent z_ijava_state.monitors"); + __ bind(ok); +#endif + __ unlock_object(Rfirst_monitor); + } + + // JVMTI support. Result has already been saved above to the frame. + __ notify_method_exit(true/*native_method*/, ilgl, InterpreterMacroAssembler::NotifyJVMTI); + + // Move native method result back into proper registers and return. + // C++ interpreter does not use result handler. So do we need to here? TODO(ZASM): check if correct. + { NearLabel no_oop_or_null; + __ mem2freg_opt(Z_FRET, Address(Z_fp, _z_ijava_state_neg(fresult))); + __ load_and_test_long(Z_RET, Address(Z_fp, _z_ijava_state_neg(lresult))); + __ z_bre(no_oop_or_null); // No unboxing if the result is NULL. + __ load_absolute_address(Z_R1, AbstractInterpreter::result_handler(T_OBJECT)); + __ compareU64_and_branch(Z_R1, Rresult_handler, Assembler::bcondNotEqual, no_oop_or_null); + __ z_lg(Z_RET, oop_tmp_offset, Z_fp); + __ verify_oop(Z_RET); + __ bind(no_oop_or_null); + } + + // Pop the native method's interpreter frame. + __ pop_interpreter_frame(Z_R14 /*return_pc*/, Z_ARG2/*tmp1*/, Z_ARG3/*tmp2*/); + + // Return to caller. + __ z_br(Z_R14); + + if (inc_counter) { + // Handle overflow of counter and compile method. + __ bind(invocation_counter_overflow); + generate_counter_overflow(continue_after_compile); + } + + BLOCK_COMMENT("} native_entry"); + + return entry_point; +} + +// +// Generic interpreted method entry to template interpreter. +// +address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { + address entry_point = __ pc(); + + bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + + // Interpreter entry for ordinary Java methods. + // + // Registers alive + // Z_SP - stack pointer + // Z_thread - JavaThread* + // Z_method - callee's method (method to be invoked) + // Z_esp - operand (or expression) stack pointer of caller. one slot above last arg. + // Z_R10 - sender sp (before modifications, e.g. by c2i adapter + // and as well by generate_fixed_frame below) + // Z_R14 - return address to caller (call_stub or c2i_adapter) + // + // Registers updated + // Z_SP - stack pointer + // Z_fp - callee's framepointer + // Z_esp - callee's operand stack pointer + // points to the slot above the value on top + // Z_locals - used to access locals: locals[i] := *(Z_locals - i*BytesPerWord) + // Z_tos - integer result, if any + // z_ftos - floating point result, if any + // + // + // stack layout at this point: + // + // F1 [TOP_IJAVA_FRAME_ABI] <-- Z_SP, Z_R10 (Z_R10 will be below Z_SP if + // frame was extended by c2i adapter) + // [outgoing Java arguments] <-- Z_esp + // ... + // PARENT [PARENT_IJAVA_FRAME_ABI] + // ... + // + // stack layout before dispatching the first bytecode: + // + // F0 [TOP_IJAVA_FRAME_ABI] <-- Z_SP + // [operand stack] <-- Z_esp + // monitor (optional, can grow) + // [IJAVA_STATE] + // F1 [PARENT_IJAVA_FRAME_ABI] <-- Z_fp (== *Z_SP) + // [F0's locals] <-- Z_locals + // [F1's operand stack] + // [F1's monitors] (optional) + // [IJAVA_STATE] + + // Make sure registers are different! + assert_different_registers(Z_thread, Z_method, Z_esp); + + BLOCK_COMMENT("normal_entry {"); + + // Make sure method is not native and not abstract. + // Rethink these assertions - they can be simplified and shared. +#ifdef ASSERT + address reentry = NULL; + { Label L; + __ testbit(method_(access_flags), JVM_ACC_NATIVE_BIT); + __ z_bfalse(L); + reentry = __ stop_chain_static(reentry, "tried to execute native method as non-native"); + __ bind(L); + } + { Label L; + __ testbit(method_(access_flags), JVM_ACC_ABSTRACT_BIT); + __ z_bfalse(L); + reentry = __ stop_chain_static(reentry, "tried to execute abstract method as non-abstract"); + __ bind(L); + } +#endif // ASSERT + +#ifdef ASSERT + // Save the return PC into the callers frame for assertion in generate_fixed_frame. + __ save_return_pc(Z_R14); +#endif + + // Generate the code to allocate the interpreter stack frame. + generate_fixed_frame(false); + + const Address do_not_unlock_if_synchronized(Z_thread, JavaThread::do_not_unlock_if_synchronized_offset()); + // Since at this point in the method invocation the exception handler + // would try to exit the monitor of synchronized methods which hasn't + // been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. If any exception was thrown by + // runtime, exception handling i.e. unlock_if_synchronized_method will + // check this thread local flag. + __ z_mvi(do_not_unlock_if_synchronized, true); + + __ profile_parameters_type(Z_tmp_2, Z_ARG3, Z_ARG4); + + // Increment invocation counter and check for overflow. + // + // Note: checking for negative value instead of overflow so we have a 'sticky' + // overflow test (may be of importance as soon as we have true MT/MP). + NearLabel invocation_counter_overflow; + NearLabel profile_method; + NearLabel profile_method_continue; + NearLabel Lcontinue; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow, &profile_method, &profile_method_continue); + if (ProfileInterpreter) { + __ bind(profile_method_continue); + } + } + __ bind(Lcontinue); + + bang_stack_shadow_pages(false); + + // Reset the _do_not_unlock_if_synchronized flag. + __ z_mvi(do_not_unlock_if_synchronized, false); + + // Check for synchronized methods. + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + if (synchronized) { + // Allocate monitor and lock method. + lock_method(); + } else { +#ifdef ASSERT + { Label L; + __ get_method(Z_R1_scratch); + __ testbit(method2_(Z_R1_scratch, access_flags), JVM_ACC_SYNCHRONIZED_BIT); + __ z_bfalse(L); + reentry = __ stop_chain_static(reentry, "method needs synchronization"); + __ bind(L); + } +#endif // ASSERT + } + + // start execution + +#ifdef ASSERT + __ verify_esp(Z_esp, Z_R1_scratch); + + __ verify_thread(); +#endif + + // jvmti support + __ notify_method_entry(); + + // Start executing instructions. + __ dispatch_next(vtos); + // Dispatch_next does not return. + DEBUG_ONLY(__ should_not_reach_here()); + + // Invocation counter overflow. + if (inc_counter) { + if (ProfileInterpreter) { + // We have decided to profile this method in the interpreter. + __ bind(profile_method); + + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); + __ set_method_data_pointer_for_bcp(); + __ z_bru(profile_method_continue); + } + + // Handle invocation counter overflow. + __ bind(invocation_counter_overflow); + generate_counter_overflow(Lcontinue); + } + + BLOCK_COMMENT("} normal_entry"); + + return entry_point; +} + +// Method entry for static native methods: +// int java.util.zip.CRC32.update(int crc, int b) +address TemplateInterpreterGenerator::generate_CRC32_update_entry() { + + if (UseCRC32Intrinsics) { + uint64_t entry_off = __ offset(); + Label slow_path; + + // If we need a safepoint check, generate full interpreter entry. + __ generate_safepoint_check(slow_path, Z_R1, false); + + BLOCK_COMMENT("CRC32_update {"); + + // We don't generate local frame and don't align stack because + // we not even call stub code (we generate the code inline) + // and there is no safepoint on this path. + + // Load java parameters. + // Z_esp is callers operand stack pointer, i.e. it points to the parameters. + const Register argP = Z_esp; + const Register crc = Z_ARG1; // crc value + const Register data = Z_ARG2; // address of java byte value (kernel_crc32 needs address) + const Register dataLen = Z_ARG3; // source data len (1 byte). Not used because calling the single-byte emitter. + const Register table = Z_ARG4; // address of crc32 table + + // Arguments are reversed on java expression stack. + __ z_la(data, 3+1*wordSize, argP); // byte value (stack address). + // Being passed as an int, the single byte is at offset +3. + __ z_llgf(crc, 2 * wordSize, argP); // Current crc state, zero extend to 64 bit to have a clean register. + + StubRoutines::zarch::generate_load_crc_table_addr(_masm, table); + __ kernel_crc32_singleByte(crc, data, dataLen, table, Z_R1); + + // Restore caller sp for c2i case. + __ resize_frame_absolute(Z_R10, Z_R0, true); // Cut the stack back to where the caller started. + + __ z_br(Z_R14); + + BLOCK_COMMENT("} CRC32_update"); + + // Use a previously generated vanilla native entry as the slow path. + BIND(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native), Z_R1); + return __ addr_at(entry_off); + } + + return NULL; +} + + +// Method entry for static native methods: +// int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) +// int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) +address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + + if (UseCRC32Intrinsics) { + uint64_t entry_off = __ offset(); + Label slow_path; + + // If we need a safepoint check, generate full interpreter entry. + __ generate_safepoint_check(slow_path, Z_R1, false); + + // We don't generate local frame and don't align stack because + // we call stub code and there is no safepoint on this path. + + // Load parameters. + // Z_esp is callers operand stack pointer, i.e. it points to the parameters. + const Register argP = Z_esp; + const Register crc = Z_ARG1; // crc value + const Register data = Z_ARG2; // address of java byte array + const Register dataLen = Z_ARG3; // source data len + const Register table = Z_ARG4; // address of crc32 table + const Register t0 = Z_R10; // work reg for kernel* emitters + const Register t1 = Z_R11; // work reg for kernel* emitters + const Register t2 = Z_R12; // work reg for kernel* emitters + const Register t3 = Z_R13; // work reg for kernel* emitters + + // Arguments are reversed on java expression stack. + // Calculate address of start element. + if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { // Used for "updateByteBuffer direct". + // crc @ (SP + 5W) (32bit) + // buf @ (SP + 3W) (64bit ptr to long array) + // off @ (SP + 2W) (32bit) + // dataLen @ (SP + 1W) (32bit) + // data = buf + off + BLOCK_COMMENT("CRC32_updateByteBuffer {"); + __ z_llgf(crc, 5*wordSize, argP); // current crc state + __ z_lg(data, 3*wordSize, argP); // start of byte buffer + __ z_agf(data, 2*wordSize, argP); // Add byte buffer offset. + __ z_lgf(dataLen, 1*wordSize, argP); // #bytes to process + } else { // Used for "updateBytes update". + // crc @ (SP + 4W) (32bit) + // buf @ (SP + 3W) (64bit ptr to byte array) + // off @ (SP + 2W) (32bit) + // dataLen @ (SP + 1W) (32bit) + // data = buf + off + base_offset + BLOCK_COMMENT("CRC32_updateBytes {"); + __ z_llgf(crc, 4*wordSize, argP); // current crc state + __ z_lg(data, 3*wordSize, argP); // start of byte buffer + __ z_agf(data, 2*wordSize, argP); // Add byte buffer offset. + __ z_lgf(dataLen, 1*wordSize, argP); // #bytes to process + __ z_aghi(data, arrayOopDesc::base_offset_in_bytes(T_BYTE)); + } + + StubRoutines::zarch::generate_load_crc_table_addr(_masm, table); + + __ resize_frame(-(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers. + __ z_stmg(t0, t3, 1*8, Z_SP); // Spill regs 10..13 to make them available as work registers. + __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3); + __ z_lmg(t0, t3, 1*8, Z_SP); // Spill regs 10..13 back from stack. + + // Restore caller sp for c2i case. + __ resize_frame_absolute(Z_R10, Z_R0, true); // Cut the stack back to where the caller started. + + __ z_br(Z_R14); + + BLOCK_COMMENT("} CRC32_update{Bytes|ByteBuffer}"); + + // Use a previously generated vanilla native entry as the slow path. + BIND(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native), Z_R1); + return __ addr_at(entry_off); + } + + return NULL; +} + +// Not supported +address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + return NULL; +} + +void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { + // Quick & dirty stack overflow checking: bang the stack & handle trap. + // Note that we do the banging after the frame is setup, since the exception + // handling code expects to find a valid interpreter frame on the stack. + // Doing the banging earlier fails if the caller frame is not an interpreter + // frame. + // (Also, the exception throwing code expects to unlock any synchronized + // method receiver, so do the banging after locking the receiver.) + + // Bang each page in the shadow zone. We can't assume it's been done for + // an interpreter frame with greater than a page of locals, so each page + // needs to be checked. Only true for non-native. For native, we only bang the last page. + if (UseStackBanging) { + const int page_size = os::vm_page_size(); + const int n_shadow_pages = (int)(JavaThread::stack_shadow_zone_size()/page_size); + const int start_page_num = native_call ? n_shadow_pages : 1; + for (int pages = start_page_num; pages <= n_shadow_pages; pages++) { + __ bang_stack_with_offset(pages*page_size); + } + } +} + +//----------------------------------------------------------------------------- +// Exceptions + +void TemplateInterpreterGenerator::generate_throw_exception() { + + BLOCK_COMMENT("throw_exception {"); + + // Entry point in previous activation (i.e., if the caller was interpreted). + Interpreter::_rethrow_exception_entry = __ pc(); + __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // Frame accessors use Z_fp. + // Z_ARG1 (==Z_tos): exception + // Z_ARG2 : Return address/pc that threw exception. + __ restore_bcp(); // R13 points to call/send. + __ restore_locals(); + + // Fallthrough, no need to restore Z_esp. + + // Entry point for exceptions thrown within interpreter code. + Interpreter::_throw_exception_entry = __ pc(); + // Expression stack is undefined here. + // Z_ARG1 (==Z_tos): exception + // Z_bcp: exception bcp + __ verify_oop(Z_ARG1); + __ z_lgr(Z_ARG2, Z_ARG1); + + // Expression stack must be empty before entering the VM in case of + // an exception. + __ empty_expression_stack(); + // Find exception handler address and preserve exception oop. + const Register Rpreserved_exc_oop = Z_tmp_1; + __ call_VM(Rpreserved_exc_oop, + CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), + Z_ARG2); + // Z_RET: exception handler entry point + // Z_bcp: bcp for exception handler + __ push_ptr(Rpreserved_exc_oop); // Push exception which is now the only value on the stack. + __ z_br(Z_RET); // Jump to exception handler (may be _remove_activation_entry!). + + // If the exception is not handled in the current frame the frame is + // removed and the exception is rethrown (i.e. exception + // continuation is _rethrow_exception). + // + // Note: At this point the bci is still the bci for the instruction + // which caused the exception and the expression stack is + // empty. Thus, for any VM calls at this point, GC will find a legal + // oop map (with empty expression stack). + + // + // JVMTI PopFrame support + // + + Interpreter::_remove_activation_preserving_args_entry = __ pc(); + __ z_lg(Z_fp, _z_parent_ijava_frame_abi(callers_sp), Z_SP); + __ empty_expression_stack(); + // Set the popframe_processing bit in pending_popframe_condition + // indicating that we are currently handling popframe, so that + // call_VMs that may happen later do not trigger new popframe + // handling cycles. + __ load_sized_value(Z_tmp_1, Address(Z_thread, JavaThread::popframe_condition_offset()), 4, false /*signed*/); + __ z_oill(Z_tmp_1, JavaThread::popframe_processing_bit); + __ z_sty(Z_tmp_1, thread_(popframe_condition)); + + { + // Check to see whether we are returning to a deoptimized frame. + // (The PopFrame call ensures that the caller of the popped frame is + // either interpreted or compiled and deoptimizes it if compiled.) + // In this case, we can't call dispatch_next() after the frame is + // popped, but instead must save the incoming arguments and restore + // them after deoptimization has occurred. + // + // Note that we don't compare the return PC against the + // deoptimization blob's unpack entry because of the presence of + // adapter frames in C2. + NearLabel caller_not_deoptimized; + __ z_lg(Z_ARG1, _z_parent_ijava_frame_abi(return_pc), Z_fp); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), Z_ARG1); + __ compareU64_and_branch(Z_RET, (intptr_t)0, Assembler::bcondNotEqual, caller_not_deoptimized); + + // Compute size of arguments for saving when returning to + // deoptimized caller. + __ get_method(Z_ARG2); + __ z_lg(Z_ARG2, Address(Z_ARG2, Method::const_offset())); + __ z_llgh(Z_ARG2, Address(Z_ARG2, ConstMethod::size_of_parameters_offset())); + __ z_sllg(Z_ARG2, Z_ARG2, Interpreter::logStackElementSize); // slots 2 bytes + __ restore_locals(); + // Compute address of args to be saved. + __ z_lgr(Z_ARG3, Z_locals); + __ z_slgr(Z_ARG3, Z_ARG2); + __ add2reg(Z_ARG3, wordSize); + // Save these arguments. + __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), + Z_thread, Z_ARG2, Z_ARG3); + + __ remove_activation(vtos, Z_R14, + /* throw_monitor_exception */ false, + /* install_monitor_exception */ false, + /* notify_jvmdi */ false); + + // Inform deoptimization that it is responsible for restoring + // these arguments. + __ store_const(thread_(popframe_condition), + JavaThread::popframe_force_deopt_reexecution_bit, + Z_tmp_1, false); + + // Continue in deoptimization handler. + __ z_br(Z_R14); + + __ bind(caller_not_deoptimized); + } + + // Clear the popframe condition flag. + __ clear_mem(thread_(popframe_condition), sizeof(int)); + + __ remove_activation(vtos, + noreg, // Retaddr is not used. + false, // throw_monitor_exception + false, // install_monitor_exception + false); // notify_jvmdi + __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // Restore frame pointer. + __ restore_bcp(); + __ restore_locals(); + __ restore_esp(); + // The method data pointer was incremented already during + // call profiling. We have to restore the mdp for the current bcp. + if (ProfileInterpreter) { + __ set_method_data_pointer_for_bcp(); + } +#if INCLUDE_JVMTI + { + Label L_done; + + __ z_cli(0, Z_bcp, Bytecodes::_invokestatic); + __ z_brc(Assembler::bcondNotEqual, L_done); + + // The member name argument must be restored if _invokestatic is + // re-executed after a PopFrame call. Detect such a case in the + // InterpreterRuntime function and return the member name + // argument, or NULL. + __ z_lg(Z_ARG2, Address(Z_locals)); + __ get_method(Z_ARG3); + __ call_VM(Z_tmp_1, + CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), + Z_ARG2, Z_ARG3, Z_bcp); + + __ z_ltgr(Z_tmp_1, Z_tmp_1); + __ z_brc(Assembler::bcondEqual, L_done); + + __ z_stg(Z_tmp_1, Address(Z_esp, wordSize)); + __ bind(L_done); + } +#endif // INCLUDE_JVMTI + __ dispatch_next(vtos); + // End of PopFrame support. + Interpreter::_remove_activation_entry = __ pc(); + + // In between activations - previous activation type unknown yet + // compute continuation point - the continuation point expects the + // following registers set up: + // + // Z_ARG1 (==Z_tos): exception + // Z_ARG2 : return address/pc that threw exception + + Register return_pc = Z_tmp_1; + Register handler = Z_tmp_2; + assert(return_pc->is_nonvolatile(), "use non-volatile reg. to preserve exception pc"); + assert(handler->is_nonvolatile(), "use non-volatile reg. to handler pc"); + __ asm_assert_ijava_state_magic(return_pc/*tmp*/); // The top frame should be an interpreter frame. + __ z_lg(return_pc, _z_parent_ijava_frame_abi(return_pc), Z_fp); + + // Moved removing the activation after VM call, because the new top + // frame does not necessarily have the z_abi_160 required for a VM + // call (e.g. if it is compiled). + + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, + SharedRuntime::exception_handler_for_return_address), + Z_thread, return_pc); + __ z_lgr(handler, Z_RET); // Save exception handler. + + // Preserve exception over this code sequence. + __ pop_ptr(Z_ARG1); + __ set_vm_result(Z_ARG1); + // Remove the activation (without doing throws on illegalMonitorExceptions). + __ remove_activation(vtos, noreg/*ret.pc already loaded*/, false/*throw exc*/, true/*install exc*/, false/*notify jvmti*/); + __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // Restore frame pointer. + + __ get_vm_result(Z_ARG1); // Restore exception. + __ verify_oop(Z_ARG1); + __ z_lgr(Z_ARG2, return_pc); // Restore return address. + +#ifdef ASSERT + // The return_pc in the new top frame is dead... at least that's my + // current understanding. To assert this I overwrite it. + // Note: for compiled frames the handler is the deopt blob + // which writes Z_ARG2 into the return_pc slot. + __ load_const_optimized(return_pc, 0xb00b1); + __ z_stg(return_pc, _z_parent_ijava_frame_abi(return_pc), Z_SP); +#endif + + // Z_ARG1 (==Z_tos): exception + // Z_ARG2 : return address/pc that threw exception + + // Note that an "issuing PC" is actually the next PC after the call. + __ z_br(handler); // Jump to exception handler of caller. + + BLOCK_COMMENT("} throw_exception"); +} + +// +// JVMTI ForceEarlyReturn support +// +address TemplateInterpreterGenerator::generate_earlyret_entry_for (TosState state) { + address entry = __ pc(); + + BLOCK_COMMENT("earlyret_entry {"); + + __ z_lg(Z_fp, _z_parent_ijava_frame_abi(callers_sp), Z_SP); + __ restore_bcp(); + __ restore_locals(); + __ restore_esp(); + __ empty_expression_stack(); + __ load_earlyret_value(state); + + Register RjvmtiState = Z_tmp_1; + __ z_lg(RjvmtiState, thread_(jvmti_thread_state)); + __ store_const(Address(RjvmtiState, JvmtiThreadState::earlyret_state_offset()), + JvmtiThreadState::earlyret_inactive, 4, 4, Z_R0_scratch); + + __ remove_activation(state, + Z_tmp_1, // retaddr + false, // throw_monitor_exception + false, // install_monitor_exception + true); // notify_jvmdi + __ z_br(Z_tmp_1); + + BLOCK_COMMENT("} earlyret_entry"); + + return entry; +} + +//----------------------------------------------------------------------------- +// Helper for vtos entry point generation. + +void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, + address& bep, + address& cep, + address& sep, + address& aep, + address& iep, + address& lep, + address& fep, + address& dep, + address& vep) { + assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); + Label L; + aep = __ pc(); __ push_ptr(); __ z_bru(L); + fep = __ pc(); __ push_f(); __ z_bru(L); + dep = __ pc(); __ push_d(); __ z_bru(L); + lep = __ pc(); __ push_l(); __ z_bru(L); + bep = cep = sep = + iep = __ pc(); __ push_i(); + vep = __ pc(); + __ bind(L); + generate_and_dispatch(t); +} + +//----------------------------------------------------------------------------- + +#ifndef PRODUCT +address TemplateInterpreterGenerator::generate_trace_code(TosState state) { + address entry = __ pc(); + NearLabel counter_below_trace_threshold; + + if (TraceBytecodesAt > 0) { + // Skip runtime call, if the trace threshold is not yet reached. + __ load_absolute_address(Z_tmp_1, (address)&BytecodeCounter::_counter_value); + __ load_absolute_address(Z_tmp_2, (address)&TraceBytecodesAt); + __ load_sized_value(Z_tmp_1, Address(Z_tmp_1), 4, false /*signed*/); + __ load_sized_value(Z_tmp_2, Address(Z_tmp_2), 8, false /*signed*/); + __ compareU64_and_branch(Z_tmp_1, Z_tmp_2, Assembler::bcondLow, counter_below_trace_threshold); + } + + int offset2 = state == ltos || state == dtos ? 2 : 1; + + __ push(state); + // Preserved return pointer is in Z_R14. + // InterpreterRuntime::trace_bytecode() preserved and returns the value passed as second argument. + __ z_lgr(Z_ARG2, Z_R14); + __ z_lg(Z_ARG3, Address(Z_esp, Interpreter::expr_offset_in_bytes(0))); + if (WizardMode) { + __ z_lgr(Z_ARG4, Z_esp); // Trace Z_esp in WizardMode. + } else { + __ z_lg(Z_ARG4, Address(Z_esp, Interpreter::expr_offset_in_bytes(offset2))); + } + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), Z_ARG2, Z_ARG3, Z_ARG4); + __ z_lgr(Z_R14, Z_RET); // Estore return address (see above). + __ pop(state); + + __ bind(counter_below_trace_threshold); + __ z_br(Z_R14); // return + + return entry; +} + +// Make feasible for old CPUs. +void TemplateInterpreterGenerator::count_bytecode() { + __ load_absolute_address(Z_R1_scratch, (address) &BytecodeCounter::_counter_value); + __ add2mem_32(Address(Z_R1_scratch), 1, Z_R0_scratch); +} + +void TemplateInterpreterGenerator::histogram_bytecode(Template * t) { + __ load_absolute_address(Z_R1_scratch, (address)&BytecodeHistogram::_counters[ t->bytecode() ]); + __ add2mem_32(Address(Z_R1_scratch), 1, Z_tmp_1); +} + +void TemplateInterpreterGenerator::histogram_bytecode_pair(Template * t) { + Address index_addr(Z_tmp_1, (intptr_t) 0); + Register index = Z_tmp_2; + + // Load previous index. + __ load_absolute_address(Z_tmp_1, (address) &BytecodePairHistogram::_index); + __ mem2reg_opt(index, index_addr, false); + + // Mask with current bytecode and store as new previous index. + __ z_srl(index, BytecodePairHistogram::log2_number_of_codes); + __ load_const_optimized(Z_R0_scratch, + (int)t->bytecode() << BytecodePairHistogram::log2_number_of_codes); + __ z_or(index, Z_R0_scratch); + __ reg2mem_opt(index, index_addr, false); + + // Load counter array's address. + __ z_lgfr(index, index); // Sign extend for addressing. + __ z_sllg(index, index, LogBytesPerInt); // index2bytes + __ load_absolute_address(Z_R1_scratch, + (address) &BytecodePairHistogram::_counters); + // Add index and increment counter. + __ z_agr(Z_R1_scratch, index); + __ add2mem_32(Address(Z_R1_scratch), 1, Z_tmp_1); +} + +void TemplateInterpreterGenerator::trace_bytecode(Template* t) { + // Call a little run-time stub to avoid blow-up for each bytecode. + // The run-time runtime saves the right registers, depending on + // the tosca in-state for the given template. + address entry = Interpreter::trace_code(t->tos_in()); + guarantee(entry != NULL, "entry must have been generated"); + __ call_stub(entry); +} + +void TemplateInterpreterGenerator::stop_interpreter_at() { + NearLabel L; + + __ load_absolute_address(Z_tmp_1, (address)&BytecodeCounter::_counter_value); + __ load_absolute_address(Z_tmp_2, (address)&StopInterpreterAt); + __ load_sized_value(Z_tmp_1, Address(Z_tmp_1), 4, false /*signed*/); + __ load_sized_value(Z_tmp_2, Address(Z_tmp_2), 8, false /*signed*/); + __ compareU64_and_branch(Z_tmp_1, Z_tmp_2, Assembler::bcondLow, L); + assert(Z_tmp_1->is_nonvolatile(), "must be nonvolatile to preserve Z_tos"); + assert(Z_F8->is_nonvolatile(), "must be nonvolatile to preserve Z_ftos"); + __ z_lgr(Z_tmp_1, Z_tos); // Save tos. + __ z_lgr(Z_tmp_2, Z_bytecode); // Save Z_bytecode. + __ z_ldr(Z_F8, Z_ftos); // Save ftos. + // Use -XX:StopInterpreterAt= to set the limit + // and break at breakpoint(). + __ call_VM(noreg, CAST_FROM_FN_PTR(address, breakpoint), false); + __ z_lgr(Z_tos, Z_tmp_1); // Restore tos. + __ z_lgr(Z_bytecode, Z_tmp_2); // Save Z_bytecode. + __ z_ldr(Z_ftos, Z_F8); // Restore ftos. + __ bind(L); +} + +#endif // !PRODUCT diff --git a/hotspot/src/cpu/s390/vm/templateTable_s390.cpp b/hotspot/src/cpu/s390/vm/templateTable_s390.cpp new file mode 100644 index 00000000000..aa02fd1b213 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/templateTable_s390.cpp @@ -0,0 +1,4250 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/templateTable.hpp" +#include "memory/universe.inline.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" + +#ifdef PRODUCT +#define __ _masm-> +#define BLOCK_COMMENT(str) +#define BIND(label) __ bind(label); +#else +#define __ (PRODUCT_ONLY(false&&)Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)-> +#define BLOCK_COMMENT(str) __ block_comment(str) +#define BIND(label) __ bind(label); BLOCK_COMMENT(#label ":") +#endif + +// The assumed minimum size of a BranchTableBlock. +// The actual size of each block heavily depends on the CPU capabilities and, +// of course, on the logic implemented in each block. +#ifdef ASSERT + #define BTB_MINSIZE 256 +#else + #define BTB_MINSIZE 64 +#endif + +#ifdef ASSERT +// Macro to open a BranchTableBlock (a piece of code that is branched to by a calculated branch). +#define BTB_BEGIN(lbl, alignment, name) \ + __ align_address(alignment); \ + __ bind(lbl); \ + { unsigned int b_off = __ offset(); \ + uintptr_t b_addr = (uintptr_t)__ pc(); \ + __ z_larl(Z_R0, (int64_t)0); /* Check current address alignment. */ \ + __ z_slgr(Z_R0, br_tab); /* Current Address must be equal */ \ + __ z_slgr(Z_R0, flags); /* to calculated branch target. */ \ + __ z_brc(Assembler::bcondLogZero, 3); /* skip trap if ok. */ \ + __ z_illtrap(0x55); \ + guarantee(b_addr%alignment == 0, "bad alignment at begin of block" name); + +// Macro to close a BranchTableBlock (a piece of code that is branched to by a calculated branch). +#define BTB_END(lbl, alignment, name) \ + uintptr_t e_addr = (uintptr_t)__ pc(); \ + unsigned int e_off = __ offset(); \ + unsigned int len = e_off-b_off; \ + if (len > alignment) { \ + tty->print_cr("%4d of %4d @ " INTPTR_FORMAT ": Block len for %s", \ + len, alignment, e_addr-len, name); \ + guarantee(len <= alignment, "block too large"); \ + } \ + guarantee(len == e_addr-b_addr, "block len mismatch"); \ + } +#else +// Macro to open a BranchTableBlock (a piece of code that is branched to by a calculated branch). +#define BTB_BEGIN(lbl, alignment, name) \ + __ align_address(alignment); \ + __ bind(lbl); \ + { unsigned int b_off = __ offset(); \ + uintptr_t b_addr = (uintptr_t)__ pc(); \ + guarantee(b_addr%alignment == 0, "bad alignment at begin of block" name); + +// Macro to close a BranchTableBlock (a piece of code that is branched to by a calculated branch). +#define BTB_END(lbl, alignment, name) \ + uintptr_t e_addr = (uintptr_t)__ pc(); \ + unsigned int e_off = __ offset(); \ + unsigned int len = e_off-b_off; \ + if (len > alignment) { \ + tty->print_cr("%4d of %4d @ " INTPTR_FORMAT ": Block len for %s", \ + len, alignment, e_addr-len, name); \ + guarantee(len <= alignment, "block too large"); \ + } \ + guarantee(len == e_addr-b_addr, "block len mismatch"); \ + } +#endif // ASSERT + +// Platform-dependent initialization. + +void TemplateTable::pd_initialize() { + // No specific initialization. +} + +// Address computation: local variables + +static inline Address iaddress(int n) { + return Address(Z_locals, Interpreter::local_offset_in_bytes(n)); +} + +static inline Address laddress(int n) { + return iaddress(n + 1); +} + +static inline Address faddress(int n) { + return iaddress(n); +} + +static inline Address daddress(int n) { + return laddress(n); +} + +static inline Address aaddress(int n) { + return iaddress(n); +} + +// Pass NULL, if no shift instruction should be emitted. +static inline Address iaddress(InterpreterMacroAssembler *masm, Register r) { + if (masm) { + masm->z_sllg(r, r, LogBytesPerWord); // index2bytes + } + return Address(Z_locals, r, Interpreter::local_offset_in_bytes(0)); +} + +// Pass NULL, if no shift instruction should be emitted. +static inline Address laddress(InterpreterMacroAssembler *masm, Register r) { + if (masm) { + masm->z_sllg(r, r, LogBytesPerWord); // index2bytes + } + return Address(Z_locals, r, Interpreter::local_offset_in_bytes(1) ); +} + +static inline Address faddress(InterpreterMacroAssembler *masm, Register r) { + return iaddress(masm, r); +} + +static inline Address daddress(InterpreterMacroAssembler *masm, Register r) { + return laddress(masm, r); +} + +static inline Address aaddress(InterpreterMacroAssembler *masm, Register r) { + return iaddress(masm, r); +} + +// At top of Java expression stack which may be different than esp(). It +// isn't for category 1 objects. +static inline Address at_tos(int slot = 0) { + return Address(Z_esp, Interpreter::expr_offset_in_bytes(slot)); +} + +// Condition conversion +static Assembler::branch_condition j_not(TemplateTable::Condition cc) { + switch (cc) { + case TemplateTable::equal : + return Assembler::bcondNotEqual; + case TemplateTable::not_equal : + return Assembler::bcondEqual; + case TemplateTable::less : + return Assembler::bcondNotLow; + case TemplateTable::less_equal : + return Assembler::bcondHigh; + case TemplateTable::greater : + return Assembler::bcondNotHigh; + case TemplateTable::greater_equal: + return Assembler::bcondLow; + } + ShouldNotReachHere(); + return Assembler::bcondZero; +} + +// Do an oop store like *(base + offset) = val +// offset can be a register or a constant. +static void do_oop_store(InterpreterMacroAssembler* _masm, + Register base, + RegisterOrConstant offset, + Register val, + bool val_is_null, // == false does not guarantee that val really is not equal NULL. + Register tmp1, // If tmp3 is volatile, either tmp1 or tmp2 must be + Register tmp2, // non-volatile to hold a copy of pre_val across runtime calls. + Register tmp3, // Ideally, this tmp register is non-volatile, as it is used to + // hold pre_val (must survive runtime calls). + BarrierSet::Name barrier, + bool precise) { + BLOCK_COMMENT("do_oop_store {"); + assert(val != noreg, "val must always be valid, even if it is zero"); + assert_different_registers(tmp1, tmp2, tmp3, val, base, offset.register_or_noreg()); + __ verify_oop(val); + switch (barrier) { +#if INCLUDE_ALL_GCS + case BarrierSet::G1SATBCTLogging: + { +#ifdef ASSERT + if (val_is_null) { // Check if the flag setting reflects reality. + Label OK; + __ z_ltgr(val, val); + __ z_bre(OK); + __ z_illtrap(0x11); + __ bind(OK); + } +#endif + Register pre_val = tmp3; + // Load and record the previous value. + __ g1_write_barrier_pre(base, offset, pre_val, val, + tmp1, tmp2, + false); // Needs to hold pre_val in non_volatile register? + + if (val_is_null) { + __ store_heap_oop_null(val, offset, base); + } else { + Label Done; + // val_is_null == false does not guarantee that val really is not equal NULL. + // Checking for this case dynamically has some cost, but also some benefit (in GC). + // It's hard to say if cost or benefit is greater. + { Label OK; + __ z_ltgr(val, val); + __ z_brne(OK); + __ store_heap_oop_null(val, offset, base); + __ z_bru(Done); + __ bind(OK); + } + // G1 barrier needs uncompressed oop for region cross check. + // Store_heap_oop compresses the oop in the argument register. + Register val_work = val; + if (UseCompressedOops) { + val_work = tmp3; + __ z_lgr(val_work, val); + } + __ store_heap_oop_not_null(val_work, offset, base); + + // We need precise card marks for oop array stores. + // Otherwise, cardmarking the object which contains the oop is sufficient. + if (precise && !(offset.is_constant() && offset.as_constant() == 0)) { + __ add2reg_with_index(base, + offset.constant_or_zero(), + offset.register_or_noreg(), + base); + } + __ g1_write_barrier_post(base /* store_adr */, val, tmp1, tmp2, tmp3); + __ bind(Done); + } + } + break; +#endif // INCLUDE_ALL_GCS + case BarrierSet::CardTableForRS: + case BarrierSet::CardTableExtension: + { + if (val_is_null) { + __ store_heap_oop_null(val, offset, base); + } else { + __ store_heap_oop(val, offset, base); + // Flatten object address if needed. + if (precise && ((offset.register_or_noreg() != noreg) || (offset.constant_or_zero() != 0))) { + __ load_address(base, Address(base, offset.register_or_noreg(), offset.constant_or_zero())); + } + __ card_write_barrier_post(base, tmp1); + } + } + break; + case BarrierSet::ModRef: + // fall through + default: + ShouldNotReachHere(); + + } + BLOCK_COMMENT("} do_oop_store"); +} + +Address TemplateTable::at_bcp(int offset) { + assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); + return Address(Z_bcp, offset); +} + +void TemplateTable::patch_bytecode(Bytecodes::Code bc, + Register bc_reg, + Register temp_reg, + bool load_bc_into_bc_reg, // = true + int byte_no) { + if (!RewriteBytecodes) { return; } + + NearLabel L_patch_done; + BLOCK_COMMENT("patch_bytecode {"); + + switch (bc) { + case Bytecodes::_fast_aputfield: + case Bytecodes::_fast_bputfield: + case Bytecodes::_fast_zputfield: + case Bytecodes::_fast_cputfield: + case Bytecodes::_fast_dputfield: + case Bytecodes::_fast_fputfield: + case Bytecodes::_fast_iputfield: + case Bytecodes::_fast_lputfield: + case Bytecodes::_fast_sputfield: + { + // We skip bytecode quickening for putfield instructions when + // the put_code written to the constant pool cache is zero. + // This is required so that every execution of this instruction + // calls out to InterpreterRuntime::resolve_get_put to do + // additional, required work. + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + assert(load_bc_into_bc_reg, "we use bc_reg as temp"); + __ get_cache_and_index_and_bytecode_at_bcp(Z_R1_scratch, bc_reg, + temp_reg, byte_no, 1); + __ load_const_optimized(bc_reg, bc); + __ compareU32_and_branch(temp_reg, (intptr_t)0, + Assembler::bcondZero, L_patch_done); + } + break; + default: + assert(byte_no == -1, "sanity"); + // The pair bytecodes have already done the load. + if (load_bc_into_bc_reg) { + __ load_const_optimized(bc_reg, bc); + } + break; + } + + if (JvmtiExport::can_post_breakpoint()) { + + Label L_fast_patch; + + // If a breakpoint is present we can't rewrite the stream directly. + __ z_cli(at_bcp(0), Bytecodes::_breakpoint); + __ z_brne(L_fast_patch); + __ get_method(temp_reg); + // Let breakpoint table handling rewrite to quicker bytecode. + __ call_VM_static(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), + temp_reg, Z_R13, bc_reg); + __ z_bru(L_patch_done); + + __ bind(L_fast_patch); + } + +#ifdef ASSERT + NearLabel L_okay; + + // We load into 64 bits, since this works on any CPU. + __ z_llgc(temp_reg, at_bcp(0)); + __ compareU32_and_branch(temp_reg, Bytecodes::java_code(bc), + Assembler::bcondEqual, L_okay ); + __ compareU32_and_branch(temp_reg, bc_reg, Assembler::bcondEqual, L_okay); + __ stop_static("patching the wrong bytecode"); + __ bind(L_okay); +#endif + + // Patch bytecode. + __ z_stc(bc_reg, at_bcp(0)); + + __ bind(L_patch_done); + BLOCK_COMMENT("} patch_bytecode"); +} + +// Individual instructions + +void TemplateTable::nop() { + transition(vtos, vtos); +} + +void TemplateTable::shouldnotreachhere() { + transition(vtos, vtos); + __ stop("shouldnotreachhere bytecode"); +} + +void TemplateTable::aconst_null() { + transition(vtos, atos); + __ clear_reg(Z_tos, true, false); +} + +void TemplateTable::iconst(int value) { + transition(vtos, itos); + // Zero extension of the iconst makes zero extension at runtime obsolete. + __ load_const_optimized(Z_tos, ((unsigned long)(unsigned int)value)); +} + +void TemplateTable::lconst(int value) { + transition(vtos, ltos); + __ load_const_optimized(Z_tos, value); +} + +// No pc-relative load/store for floats. +void TemplateTable::fconst(int value) { + transition(vtos, ftos); + static float one = 1.0f, two = 2.0f; + + switch (value) { + case 0: + __ z_lzer(Z_ftos); + return; + case 1: + __ load_absolute_address(Z_R1_scratch, (address) &one); + __ mem2freg_opt(Z_ftos, Address(Z_R1_scratch), false); + return; + case 2: + __ load_absolute_address(Z_R1_scratch, (address) &two); + __ mem2freg_opt(Z_ftos, Address(Z_R1_scratch), false); + return; + default: + ShouldNotReachHere(); + return; + } +} + +void TemplateTable::dconst(int value) { + transition(vtos, dtos); + static double one = 1.0; + + switch (value) { + case 0: + __ z_lzdr(Z_ftos); + return; + case 1: + __ load_absolute_address(Z_R1_scratch, (address) &one); + __ mem2freg_opt(Z_ftos, Address(Z_R1_scratch)); + return; + default: + ShouldNotReachHere(); + return; + } +} + +void TemplateTable::bipush() { + transition(vtos, itos); + __ z_lb(Z_tos, at_bcp(1)); +} + +void TemplateTable::sipush() { + transition(vtos, itos); + __ get_2_byte_integer_at_bcp(Z_tos, 1, InterpreterMacroAssembler::Signed); +} + + +void TemplateTable::ldc(bool wide) { + transition(vtos, vtos); + Label call_ldc, notFloat, notClass, Done; + const Register RcpIndex = Z_tmp_1; + const Register Rtags = Z_ARG2; + + if (wide) { + __ get_2_byte_integer_at_bcp(RcpIndex, 1, InterpreterMacroAssembler::Unsigned); + } else { + __ z_llgc(RcpIndex, at_bcp(1)); + } + + __ get_cpool_and_tags(Z_tmp_2, Rtags); + + const int base_offset = ConstantPool::header_size() * wordSize; + const int tags_offset = Array::base_offset_in_bytes(); + const Register Raddr_type = Rtags; + + // Get address of type. + __ add2reg_with_index(Raddr_type, tags_offset, RcpIndex, Rtags); + + __ z_cli(0, Raddr_type, JVM_CONSTANT_UnresolvedClass); + __ z_bre(call_ldc); // Unresolved class - get the resolved class. + + __ z_cli(0, Raddr_type, JVM_CONSTANT_UnresolvedClassInError); + __ z_bre(call_ldc); // Unresolved class in error state - call into runtime + // to throw the error from the first resolution attempt. + + __ z_cli(0, Raddr_type, JVM_CONSTANT_Class); + __ z_brne(notClass); // Resolved class - need to call vm to get java + // mirror of the class. + + // We deal with a class. Call vm to do the appropriate. + __ bind(call_ldc); + __ load_const_optimized(Z_ARG2, wide); + call_VM(Z_RET, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), Z_ARG2); + __ push_ptr(Z_RET); + __ z_bru(Done); + + // Not a class. + __ bind(notClass); + Register RcpOffset = RcpIndex; + __ z_sllg(RcpOffset, RcpIndex, LogBytesPerWord); // Convert index to offset. + __ z_cli(0, Raddr_type, JVM_CONSTANT_Float); + __ z_brne(notFloat); + + // ftos + __ mem2freg_opt(Z_ftos, Address(Z_tmp_2, RcpOffset, base_offset), false); + __ push_f(); + __ z_bru(Done); + + __ bind(notFloat); +#ifdef ASSERT + { + Label L; + + __ z_cli(0, Raddr_type, JVM_CONSTANT_Integer); + __ z_bre(L); + // String and Object are rewritten to fast_aldc. + __ stop("unexpected tag type in ldc"); + + __ bind(L); + } +#endif + + // itos + __ mem2reg_opt(Z_tos, Address(Z_tmp_2, RcpOffset, base_offset), false); + __ push_i(Z_tos); + + __ bind(Done); +} + +// Fast path for caching oop constants. +// %%% We should use this to handle Class and String constants also. +// %%% It will simplify the ldc/primitive path considerably. +void TemplateTable::fast_aldc(bool wide) { + transition(vtos, atos); + + const Register index = Z_tmp_2; + int index_size = wide ? sizeof(u2) : sizeof(u1); + Label L_resolved; + + // We are resolved if the resolved reference cache entry contains a + // non-null object (CallSite, etc.). + __ get_cache_index_at_bcp(index, 1, index_size); // Load index. + __ load_resolved_reference_at_index(Z_tos, index); + __ z_ltgr(Z_tos, Z_tos); + __ z_brne(L_resolved); + + // First time invocation - must resolve first. + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); + __ load_const_optimized(Z_ARG1, (int)bytecode()); + __ call_VM(Z_tos, entry, Z_ARG1); + + __ bind(L_resolved); + __ verify_oop(Z_tos); +} + +void TemplateTable::ldc2_w() { + transition(vtos, vtos); + Label Long, Done; + + // Z_tmp_1 = index of cp entry + __ get_2_byte_integer_at_bcp(Z_tmp_1, 1, InterpreterMacroAssembler::Unsigned); + + __ get_cpool_and_tags(Z_tmp_2, Z_tos); + + const int base_offset = ConstantPool::header_size() * wordSize; + const int tags_offset = Array::base_offset_in_bytes(); + + // Get address of type. + __ add2reg_with_index(Z_tos, tags_offset, Z_tos, Z_tmp_1); + + // Index needed in both branches, so calculate here. + __ z_sllg(Z_tmp_1, Z_tmp_1, LogBytesPerWord); // index2bytes + + // Check type. + __ z_cli(0, Z_tos, JVM_CONSTANT_Double); + __ z_brne(Long); + + // dtos + __ mem2freg_opt(Z_ftos, Address(Z_tmp_2, Z_tmp_1, base_offset)); + __ push_d(); + __ z_bru(Done); + + __ bind(Long); + // ltos + __ mem2reg_opt(Z_tos, Address(Z_tmp_2, Z_tmp_1, base_offset)); + __ push_l(); + + __ bind(Done); +} + +void TemplateTable::locals_index(Register reg, int offset) { + __ z_llgc(reg, at_bcp(offset)); + __ z_lcgr(reg); +} + +void TemplateTable::iload() { + iload_internal(); +} + +void TemplateTable::nofast_iload() { + iload_internal(may_not_rewrite); +} + +void TemplateTable::iload_internal(RewriteControl rc) { + transition(vtos, itos); + + if (RewriteFrequentPairs && rc == may_rewrite) { + NearLabel rewrite, done; + const Register bc = Z_ARG4; + + assert(Z_R1_scratch != bc, "register damaged"); + + // Get next byte. + __ z_llgc(Z_R1_scratch, at_bcp(Bytecodes::length_for (Bytecodes::_iload))); + + // If _iload, wait to rewrite to iload2. We only want to rewrite the + // last two iloads in a pair. Comparing against fast_iload means that + // the next bytecode is neither an iload or a caload, and therefore + // an iload pair. + __ compareU32_and_branch(Z_R1_scratch, Bytecodes::_iload, + Assembler::bcondEqual, done); + + __ load_const_optimized(bc, Bytecodes::_fast_iload2); + __ compareU32_and_branch(Z_R1_scratch, Bytecodes::_fast_iload, + Assembler::bcondEqual, rewrite); + + // If _caload, rewrite to fast_icaload. + __ load_const_optimized(bc, Bytecodes::_fast_icaload); + __ compareU32_and_branch(Z_R1_scratch, Bytecodes::_caload, + Assembler::bcondEqual, rewrite); + + // Rewrite so iload doesn't check again. + __ load_const_optimized(bc, Bytecodes::_fast_iload); + + // rewrite + // bc: fast bytecode + __ bind(rewrite); + patch_bytecode(Bytecodes::_iload, bc, Z_R1_scratch, false); + + __ bind(done); + + } + + // Get the local value into tos. + locals_index(Z_R1_scratch); + __ mem2reg_opt(Z_tos, iaddress(_masm, Z_R1_scratch), false); +} + +void TemplateTable::fast_iload2() { + transition(vtos, itos); + + locals_index(Z_R1_scratch); + __ mem2reg_opt(Z_tos, iaddress(_masm, Z_R1_scratch), false); + __ push_i(Z_tos); + locals_index(Z_R1_scratch, 3); + __ mem2reg_opt(Z_tos, iaddress(_masm, Z_R1_scratch), false); +} + +void TemplateTable::fast_iload() { + transition(vtos, itos); + + locals_index(Z_R1_scratch); + __ mem2reg_opt(Z_tos, iaddress(_masm, Z_R1_scratch), false); +} + +void TemplateTable::lload() { + transition(vtos, ltos); + + locals_index(Z_R1_scratch); + __ mem2reg_opt(Z_tos, laddress(_masm, Z_R1_scratch)); +} + +void TemplateTable::fload() { + transition(vtos, ftos); + + locals_index(Z_R1_scratch); + __ mem2freg_opt(Z_ftos, faddress(_masm, Z_R1_scratch), false); +} + +void TemplateTable::dload() { + transition(vtos, dtos); + + locals_index(Z_R1_scratch); + __ mem2freg_opt(Z_ftos, daddress(_masm, Z_R1_scratch)); +} + +void TemplateTable::aload() { + transition(vtos, atos); + + locals_index(Z_R1_scratch); + __ mem2reg_opt(Z_tos, aaddress(_masm, Z_R1_scratch)); +} + +void TemplateTable::locals_index_wide(Register reg) { + __ get_2_byte_integer_at_bcp(reg, 2, InterpreterMacroAssembler::Unsigned); + __ z_lcgr(reg); +} + +void TemplateTable::wide_iload() { + transition(vtos, itos); + + locals_index_wide(Z_tmp_1); + __ mem2reg_opt(Z_tos, iaddress(_masm, Z_tmp_1), false); +} + +void TemplateTable::wide_lload() { + transition(vtos, ltos); + + locals_index_wide(Z_tmp_1); + __ mem2reg_opt(Z_tos, laddress(_masm, Z_tmp_1)); +} + +void TemplateTable::wide_fload() { + transition(vtos, ftos); + + locals_index_wide(Z_tmp_1); + __ mem2freg_opt(Z_ftos, faddress(_masm, Z_tmp_1), false); +} + +void TemplateTable::wide_dload() { + transition(vtos, dtos); + + locals_index_wide(Z_tmp_1); + __ mem2freg_opt(Z_ftos, daddress(_masm, Z_tmp_1)); +} + +void TemplateTable::wide_aload() { + transition(vtos, atos); + + locals_index_wide(Z_tmp_1); + __ mem2reg_opt(Z_tos, aaddress(_masm, Z_tmp_1)); +} + +void TemplateTable::index_check(Register array, Register index, unsigned int shift) { + assert_different_registers(Z_R1_scratch, array, index); + + // Check array. + __ null_check(array, Z_R0_scratch, arrayOopDesc::length_offset_in_bytes()); + + // Sign extend index for use by indexed load. + __ z_lgfr(index, index); + + // Check index. + Label index_ok; + __ z_cl(index, Address(array, arrayOopDesc::length_offset_in_bytes())); + __ z_brl(index_ok); + __ lgr_if_needed(Z_ARG3, index); // See generate_ArrayIndexOutOfBounds_handler(). + // Give back the array to create more detailed exceptions. + __ lgr_if_needed(Z_ARG2, array); // See generate_ArrayIndexOutOfBounds_handler(). + __ load_absolute_address(Z_R1_scratch, + Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); + __ z_bcr(Assembler::bcondAlways, Z_R1_scratch); + __ bind(index_ok); + + if (shift > 0) + __ z_sllg(index, index, shift); +} + +void TemplateTable::iaload() { + transition(itos, itos); + + __ pop_ptr(Z_tmp_1); // array + // Index is in Z_tos. + Register index = Z_tos; + index_check(Z_tmp_1, index, LogBytesPerInt); // Kills Z_ARG3. + // Load the value. + __ mem2reg_opt(Z_tos, + Address(Z_tmp_1, index, arrayOopDesc::base_offset_in_bytes(T_INT)), + false); +} + +void TemplateTable::laload() { + transition(itos, ltos); + + __ pop_ptr(Z_tmp_2); + // Z_tos : index + // Z_tmp_2 : array + Register index = Z_tos; + index_check(Z_tmp_2, index, LogBytesPerLong); + __ mem2reg_opt(Z_tos, + Address(Z_tmp_2, index, arrayOopDesc::base_offset_in_bytes(T_LONG))); +} + +void TemplateTable::faload() { + transition(itos, ftos); + + __ pop_ptr(Z_tmp_2); + // Z_tos : index + // Z_tmp_2 : array + Register index = Z_tos; + index_check(Z_tmp_2, index, LogBytesPerInt); + __ mem2freg_opt(Z_ftos, + Address(Z_tmp_2, index, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), + false); +} + +void TemplateTable::daload() { + transition(itos, dtos); + + __ pop_ptr(Z_tmp_2); + // Z_tos : index + // Z_tmp_2 : array + Register index = Z_tos; + index_check(Z_tmp_2, index, LogBytesPerLong); + __ mem2freg_opt(Z_ftos, + Address(Z_tmp_2, index, arrayOopDesc::base_offset_in_bytes(T_DOUBLE))); +} + +void TemplateTable::aaload() { + transition(itos, atos); + + unsigned const int shift = LogBytesPerHeapOop; + __ pop_ptr(Z_tmp_1); // array + // Index is in Z_tos. + Register index = Z_tos; + index_check(Z_tmp_1, index, shift); + // Now load array element. + __ load_heap_oop(Z_tos, + Address(Z_tmp_1, index, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); + __ verify_oop(Z_tos); +} + +void TemplateTable::baload() { + transition(itos, itos); + + __ pop_ptr(Z_tmp_1); + // Z_tos : index + // Z_tmp_1 : array + Register index = Z_tos; + index_check(Z_tmp_1, index, 0); + __ z_lb(Z_tos, + Address(Z_tmp_1, index, arrayOopDesc::base_offset_in_bytes(T_BYTE))); +} + +void TemplateTable::caload() { + transition(itos, itos); + + __ pop_ptr(Z_tmp_2); + // Z_tos : index + // Z_tmp_2 : array + Register index = Z_tos; + index_check(Z_tmp_2, index, LogBytesPerShort); + // Load into 64 bits, works on all CPUs. + __ z_llgh(Z_tos, + Address(Z_tmp_2, index, arrayOopDesc::base_offset_in_bytes(T_CHAR))); +} + +// Iload followed by caload frequent pair. +void TemplateTable::fast_icaload() { + transition(vtos, itos); + + // Load index out of locals. + locals_index(Z_R1_scratch); + __ mem2reg_opt(Z_ARG3, iaddress(_masm, Z_R1_scratch), false); + // Z_ARG3 : index + // Z_tmp_2 : array + __ pop_ptr(Z_tmp_2); + index_check(Z_tmp_2, Z_ARG3, LogBytesPerShort); + // Load into 64 bits, works on all CPUs. + __ z_llgh(Z_tos, + Address(Z_tmp_2, Z_ARG3, arrayOopDesc::base_offset_in_bytes(T_CHAR))); +} + +void TemplateTable::saload() { + transition(itos, itos); + + __ pop_ptr(Z_tmp_2); + // Z_tos : index + // Z_tmp_2 : array + Register index = Z_tos; + index_check(Z_tmp_2, index, LogBytesPerShort); + __ z_lh(Z_tos, + Address(Z_tmp_2, index, arrayOopDesc::base_offset_in_bytes(T_SHORT))); +} + +void TemplateTable::iload(int n) { + transition(vtos, itos); + __ z_ly(Z_tos, iaddress(n)); +} + +void TemplateTable::lload(int n) { + transition(vtos, ltos); + __ z_lg(Z_tos, laddress(n)); +} + +void TemplateTable::fload(int n) { + transition(vtos, ftos); + __ mem2freg_opt(Z_ftos, faddress(n), false); +} + +void TemplateTable::dload(int n) { + transition(vtos, dtos); + __ mem2freg_opt(Z_ftos, daddress(n)); +} + +void TemplateTable::aload(int n) { + transition(vtos, atos); + __ mem2reg_opt(Z_tos, aaddress(n)); +} + +void TemplateTable::aload_0() { + aload_0_internal(); +} + +void TemplateTable::nofast_aload_0() { + aload_0_internal(may_not_rewrite); +} + +void TemplateTable::aload_0_internal(RewriteControl rc) { + transition(vtos, atos); + + // According to bytecode histograms, the pairs: + // + // _aload_0, _fast_igetfield + // _aload_0, _fast_agetfield + // _aload_0, _fast_fgetfield + // + // occur frequently. If RewriteFrequentPairs is set, the (slow) + // _aload_0 bytecode checks if the next bytecode is either + // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then + // rewrites the current bytecode into a pair bytecode; otherwise it + // rewrites the current bytecode into _fast_aload_0 that doesn't do + // the pair check anymore. + // + // Note: If the next bytecode is _getfield, the rewrite must be + // delayed, otherwise we may miss an opportunity for a pair. + // + // Also rewrite frequent pairs + // aload_0, aload_1 + // aload_0, iload_1 + // These bytecodes with a small amount of code are most profitable + // to rewrite. + if (!(RewriteFrequentPairs && (rc == may_rewrite))) { + aload(0); + return; + } + + NearLabel rewrite, done; + const Register bc = Z_ARG4; + + assert(Z_R1_scratch != bc, "register damaged"); + // Get next byte. + __ z_llgc(Z_R1_scratch, at_bcp(Bytecodes::length_for (Bytecodes::_aload_0))); + + // Do actual aload_0. + aload(0); + + // If _getfield then wait with rewrite. + __ compareU32_and_branch(Z_R1_scratch, Bytecodes::_getfield, + Assembler::bcondEqual, done); + + // If _igetfield then rewrite to _fast_iaccess_0. + assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) + == Bytecodes::_aload_0, "fix bytecode definition"); + + __ load_const_optimized(bc, Bytecodes::_fast_iaccess_0); + __ compareU32_and_branch(Z_R1_scratch, Bytecodes::_fast_igetfield, + Assembler::bcondEqual, rewrite); + + // If _agetfield then rewrite to _fast_aaccess_0. + assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) + == Bytecodes::_aload_0, "fix bytecode definition"); + + __ load_const_optimized(bc, Bytecodes::_fast_aaccess_0); + __ compareU32_and_branch(Z_R1_scratch, Bytecodes::_fast_agetfield, + Assembler::bcondEqual, rewrite); + + // If _fgetfield then rewrite to _fast_faccess_0. + assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) + == Bytecodes::_aload_0, "fix bytecode definition"); + + __ load_const_optimized(bc, Bytecodes::_fast_faccess_0); + __ compareU32_and_branch(Z_R1_scratch, Bytecodes::_fast_fgetfield, + Assembler::bcondEqual, rewrite); + + // Else rewrite to _fast_aload0. + assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) + == Bytecodes::_aload_0, "fix bytecode definition"); + __ load_const_optimized(bc, Bytecodes::_fast_aload_0); + + // rewrite + // bc: fast bytecode + __ bind(rewrite); + + patch_bytecode(Bytecodes::_aload_0, bc, Z_R1_scratch, false); + // Reload local 0 because of VM call inside patch_bytecode(). + // this may trigger GC and thus change the oop. + aload(0); + + __ bind(done); +} + +void TemplateTable::istore() { + transition(itos, vtos); + locals_index(Z_R1_scratch); + __ reg2mem_opt(Z_tos, iaddress(_masm, Z_R1_scratch), false); +} + +void TemplateTable::lstore() { + transition(ltos, vtos); + locals_index(Z_R1_scratch); + __ reg2mem_opt(Z_tos, laddress(_masm, Z_R1_scratch)); +} + +void TemplateTable::fstore() { + transition(ftos, vtos); + locals_index(Z_R1_scratch); + __ freg2mem_opt(Z_ftos, faddress(_masm, Z_R1_scratch)); +} + +void TemplateTable::dstore() { + transition(dtos, vtos); + locals_index(Z_R1_scratch); + __ freg2mem_opt(Z_ftos, daddress(_masm, Z_R1_scratch)); +} + +void TemplateTable::astore() { + transition(vtos, vtos); + __ pop_ptr(Z_tos); + locals_index(Z_R1_scratch); + __ reg2mem_opt(Z_tos, aaddress(_masm, Z_R1_scratch)); +} + +void TemplateTable::wide_istore() { + transition(vtos, vtos); + __ pop_i(Z_tos); + locals_index_wide(Z_tmp_1); + __ reg2mem_opt(Z_tos, iaddress(_masm, Z_tmp_1), false); +} + +void TemplateTable::wide_lstore() { + transition(vtos, vtos); + __ pop_l(Z_tos); + locals_index_wide(Z_tmp_1); + __ reg2mem_opt(Z_tos, laddress(_masm, Z_tmp_1)); +} + +void TemplateTable::wide_fstore() { + transition(vtos, vtos); + __ pop_f(Z_ftos); + locals_index_wide(Z_tmp_1); + __ freg2mem_opt(Z_ftos, faddress(_masm, Z_tmp_1), false); +} + +void TemplateTable::wide_dstore() { + transition(vtos, vtos); + __ pop_d(Z_ftos); + locals_index_wide(Z_tmp_1); + __ freg2mem_opt(Z_ftos, daddress(_masm, Z_tmp_1)); +} + +void TemplateTable::wide_astore() { + transition(vtos, vtos); + __ pop_ptr(Z_tos); + locals_index_wide(Z_tmp_1); + __ reg2mem_opt(Z_tos, aaddress(_masm, Z_tmp_1)); +} + +void TemplateTable::iastore() { + transition(itos, vtos); + + Register index = Z_ARG3; // Index_check expects index in Z_ARG3. + // Value is in Z_tos ... + __ pop_i(index); // index + __ pop_ptr(Z_tmp_1); // array + index_check(Z_tmp_1, index, LogBytesPerInt); + // ... and then move the value. + __ reg2mem_opt(Z_tos, + Address(Z_tmp_1, index, arrayOopDesc::base_offset_in_bytes(T_INT)), + false); +} + +void TemplateTable::lastore() { + transition(ltos, vtos); + + __ pop_i(Z_ARG3); + __ pop_ptr(Z_tmp_2); + // Z_tos : value + // Z_ARG3 : index + // Z_tmp_2 : array + index_check(Z_tmp_2, Z_ARG3, LogBytesPerLong); // Prefer index in Z_ARG3. + __ reg2mem_opt(Z_tos, + Address(Z_tmp_2, Z_ARG3, arrayOopDesc::base_offset_in_bytes(T_LONG))); +} + +void TemplateTable::fastore() { + transition(ftos, vtos); + + __ pop_i(Z_ARG3); + __ pop_ptr(Z_tmp_2); + // Z_ftos : value + // Z_ARG3 : index + // Z_tmp_2 : array + index_check(Z_tmp_2, Z_ARG3, LogBytesPerInt); // Prefer index in Z_ARG3. + __ freg2mem_opt(Z_ftos, + Address(Z_tmp_2, Z_ARG3, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), + false); +} + +void TemplateTable::dastore() { + transition(dtos, vtos); + + __ pop_i(Z_ARG3); + __ pop_ptr(Z_tmp_2); + // Z_ftos : value + // Z_ARG3 : index + // Z_tmp_2 : array + index_check(Z_tmp_2, Z_ARG3, LogBytesPerLong); // Prefer index in Z_ARG3. + __ freg2mem_opt(Z_ftos, + Address(Z_tmp_2, Z_ARG3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE))); +} + +void TemplateTable::aastore() { + NearLabel is_null, ok_is_subtype, done; + transition(vtos, vtos); + + // stack: ..., array, index, value + + Register Rvalue = Z_tos; + Register Rarray = Z_ARG2; + Register Rindex = Z_ARG3; // Convention for index_check(). + + __ load_ptr(0, Rvalue); + __ z_l(Rindex, Address(Z_esp, Interpreter::expr_offset_in_bytes(1))); + __ load_ptr(2, Rarray); + + unsigned const int shift = LogBytesPerHeapOop; + index_check(Rarray, Rindex, shift); // side effect: Rindex = Rindex << shift + Register Rstore_addr = Rindex; + // Address where the store goes to, i.e. &(Rarry[index]) + __ load_address(Rstore_addr, Address(Rarray, Rindex, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); + + // do array store check - check for NULL value first. + __ compareU64_and_branch(Rvalue, (intptr_t)0, Assembler::bcondEqual, is_null); + + Register Rsub_klass = Z_ARG4; + Register Rsuper_klass = Z_ARG5; + __ load_klass(Rsub_klass, Rvalue); + // Load superklass. + __ load_klass(Rsuper_klass, Rarray); + __ z_lg(Rsuper_klass, Address(Rsuper_klass, ObjArrayKlass::element_klass_offset())); + + // Generate a fast subtype check. Branch to ok_is_subtype if no failure. + // Throw if failure. + Register tmp1 = Z_tmp_1; + Register tmp2 = Z_tmp_2; + __ gen_subtype_check(Rsub_klass, Rsuper_klass, tmp1, tmp2, ok_is_subtype); + + // Fall through on failure. + // Object is in Rvalue == Z_tos. + assert(Rvalue == Z_tos, "that's the expected location"); + __ load_absolute_address(tmp1, Interpreter::_throw_ArrayStoreException_entry); + __ z_br(tmp1); + + // Come here on success. + __ bind(ok_is_subtype); + + // Now store using the appropriate barrier. + Register tmp3 = Rsub_klass; + do_oop_store(_masm, Rstore_addr, (intptr_t)0/*offset*/, Rvalue, false/*val==null*/, + tmp3, tmp2, tmp1, _bs->kind(), true); + __ z_bru(done); + + // Have a NULL in Rvalue. + __ bind(is_null); + __ profile_null_seen(tmp1); + + // Store a NULL. + do_oop_store(_masm, Rstore_addr, (intptr_t)0/*offset*/, Rvalue, true/*val==null*/, + tmp3, tmp2, tmp1, _bs->kind(), true); + + // Pop stack arguments. + __ bind(done); + __ add2reg(Z_esp, 3 * Interpreter::stackElementSize); +} + + +void TemplateTable::bastore() { + transition(itos, vtos); + + __ pop_i(Z_ARG3); + __ pop_ptr(Z_tmp_2); + // Z_tos : value + // Z_ARG3 : index + // Z_tmp_2 : array + // No index shift necessary - pass 0. + index_check(Z_tmp_2, Z_ARG3, 0); // Prefer index in Z_ARG3. + __ z_stc(Z_tos, + Address(Z_tmp_2, Z_ARG3, arrayOopDesc::base_offset_in_bytes(T_BYTE))); +} + +void TemplateTable::castore() { + transition(itos, vtos); + + __ pop_i(Z_ARG3); + __ pop_ptr(Z_tmp_2); + // Z_tos : value + // Z_ARG3 : index + // Z_tmp_2 : array + Register index = Z_ARG3; // prefer index in Z_ARG3 + index_check(Z_tmp_2, index, LogBytesPerShort); + __ z_sth(Z_tos, + Address(Z_tmp_2, index, arrayOopDesc::base_offset_in_bytes(T_CHAR))); +} + +void TemplateTable::sastore() { + castore(); +} + +void TemplateTable::istore(int n) { + transition(itos, vtos); + __ reg2mem_opt(Z_tos, iaddress(n), false); +} + +void TemplateTable::lstore(int n) { + transition(ltos, vtos); + __ reg2mem_opt(Z_tos, laddress(n)); +} + +void TemplateTable::fstore(int n) { + transition(ftos, vtos); + __ freg2mem_opt(Z_ftos, faddress(n), false); +} + +void TemplateTable::dstore(int n) { + transition(dtos, vtos); + __ freg2mem_opt(Z_ftos, daddress(n)); +} + +void TemplateTable::astore(int n) { + transition(vtos, vtos); + __ pop_ptr(Z_tos); + __ reg2mem_opt(Z_tos, aaddress(n)); +} + +void TemplateTable::pop() { + transition(vtos, vtos); + __ add2reg(Z_esp, Interpreter::stackElementSize); +} + +void TemplateTable::pop2() { + transition(vtos, vtos); + __ add2reg(Z_esp, 2 * Interpreter::stackElementSize); +} + +void TemplateTable::dup() { + transition(vtos, vtos); + __ load_ptr(0, Z_tos); + __ push_ptr(Z_tos); + // stack: ..., a, a +} + +void TemplateTable::dup_x1() { + transition(vtos, vtos); + + // stack: ..., a, b + __ load_ptr(0, Z_tos); // load b + __ load_ptr(1, Z_R0_scratch); // load a + __ store_ptr(1, Z_tos); // store b + __ store_ptr(0, Z_R0_scratch); // store a + __ push_ptr(Z_tos); // push b + // stack: ..., b, a, b +} + +void TemplateTable::dup_x2() { + transition(vtos, vtos); + + // stack: ..., a, b, c + __ load_ptr(0, Z_R0_scratch); // load c + __ load_ptr(2, Z_R1_scratch); // load a + __ store_ptr(2, Z_R0_scratch); // store c in a + __ push_ptr(Z_R0_scratch); // push c + // stack: ..., c, b, c, c + __ load_ptr(2, Z_R0_scratch); // load b + __ store_ptr(2, Z_R1_scratch); // store a in b + // stack: ..., c, a, c, c + __ store_ptr(1, Z_R0_scratch); // store b in c + // stack: ..., c, a, b, c +} + +void TemplateTable::dup2() { + transition(vtos, vtos); + + // stack: ..., a, b + __ load_ptr(1, Z_R0_scratch); // load a + __ push_ptr(Z_R0_scratch); // push a + __ load_ptr(1, Z_R0_scratch); // load b + __ push_ptr(Z_R0_scratch); // push b + // stack: ..., a, b, a, b +} + +void TemplateTable::dup2_x1() { + transition(vtos, vtos); + + // stack: ..., a, b, c + __ load_ptr(0, Z_R0_scratch); // load c + __ load_ptr(1, Z_R1_scratch); // load b + __ push_ptr(Z_R1_scratch); // push b + __ push_ptr(Z_R0_scratch); // push c + // stack: ..., a, b, c, b, c + __ store_ptr(3, Z_R0_scratch); // store c in b + // stack: ..., a, c, c, b, c + __ load_ptr( 4, Z_R0_scratch); // load a + __ store_ptr(2, Z_R0_scratch); // store a in 2nd c + // stack: ..., a, c, a, b, c + __ store_ptr(4, Z_R1_scratch); // store b in a + // stack: ..., b, c, a, b, c +} + +void TemplateTable::dup2_x2() { + transition(vtos, vtos); + + // stack: ..., a, b, c, d + __ load_ptr(0, Z_R0_scratch); // load d + __ load_ptr(1, Z_R1_scratch); // load c + __ push_ptr(Z_R1_scratch); // push c + __ push_ptr(Z_R0_scratch); // push d + // stack: ..., a, b, c, d, c, d + __ load_ptr(4, Z_R1_scratch); // load b + __ store_ptr(2, Z_R1_scratch); // store b in d + __ store_ptr(4, Z_R0_scratch); // store d in b + // stack: ..., a, d, c, b, c, d + __ load_ptr(5, Z_R0_scratch); // load a + __ load_ptr(3, Z_R1_scratch); // load c + __ store_ptr(3, Z_R0_scratch); // store a in c + __ store_ptr(5, Z_R1_scratch); // store c in a + // stack: ..., c, d, a, b, c, d +} + +void TemplateTable::swap() { + transition(vtos, vtos); + + // stack: ..., a, b + __ load_ptr(1, Z_R0_scratch); // load a + __ load_ptr(0, Z_R1_scratch); // load b + __ store_ptr(0, Z_R0_scratch); // store a in b + __ store_ptr(1, Z_R1_scratch); // store b in a + // stack: ..., b, a +} + +void TemplateTable::iop2(Operation op) { + transition(itos, itos); + switch (op) { + case add : __ z_ay(Z_tos, __ stackTop()); __ pop_i(); break; + case sub : __ z_sy(Z_tos, __ stackTop()); __ pop_i(); __ z_lcr(Z_tos, Z_tos); break; + case mul : __ z_msy(Z_tos, __ stackTop()); __ pop_i(); break; + case _and : __ z_ny(Z_tos, __ stackTop()); __ pop_i(); break; + case _or : __ z_oy(Z_tos, __ stackTop()); __ pop_i(); break; + case _xor : __ z_xy(Z_tos, __ stackTop()); __ pop_i(); break; + case shl : __ z_lr(Z_tmp_1, Z_tos); + __ z_nill(Z_tmp_1, 31); // Lowest 5 bits are shiftamount. + __ pop_i(Z_tos); __ z_sll(Z_tos, 0, Z_tmp_1); break; + case shr : __ z_lr(Z_tmp_1, Z_tos); + __ z_nill(Z_tmp_1, 31); // Lowest 5 bits are shiftamount. + __ pop_i(Z_tos); __ z_sra(Z_tos, 0, Z_tmp_1); break; + case ushr : __ z_lr(Z_tmp_1, Z_tos); + __ z_nill(Z_tmp_1, 31); // Lowest 5 bits are shiftamount. + __ pop_i(Z_tos); __ z_srl(Z_tos, 0, Z_tmp_1); break; + default : ShouldNotReachHere(); break; + } + return; +} + +void TemplateTable::lop2(Operation op) { + transition(ltos, ltos); + + switch (op) { + case add : __ z_ag(Z_tos, __ stackTop()); __ pop_l(); break; + case sub : __ z_sg(Z_tos, __ stackTop()); __ pop_l(); __ z_lcgr(Z_tos, Z_tos); break; + case mul : __ z_msg(Z_tos, __ stackTop()); __ pop_l(); break; + case _and : __ z_ng(Z_tos, __ stackTop()); __ pop_l(); break; + case _or : __ z_og(Z_tos, __ stackTop()); __ pop_l(); break; + case _xor : __ z_xg(Z_tos, __ stackTop()); __ pop_l(); break; + default : ShouldNotReachHere(); break; + } + return; +} + +// Common part of idiv/irem. +static void idiv_helper(InterpreterMacroAssembler * _masm, address exception) { + NearLabel not_null; + + // Use register pair Z_tmp_1, Z_tmp_2 for DIVIDE SINGLE. + assert(Z_tmp_1->successor() == Z_tmp_2, " need even/odd register pair for idiv/irem"); + + // Get dividend. + __ pop_i(Z_tmp_2); + + // If divisor == 0 throw exception. + __ compare32_and_branch(Z_tos, (intptr_t) 0, + Assembler::bcondNotEqual, not_null ); + __ load_absolute_address(Z_R1_scratch, exception); + __ z_br(Z_R1_scratch); + + __ bind(not_null); + + __ z_lgfr(Z_tmp_2, Z_tmp_2); // Sign extend dividend. + __ z_dsgfr(Z_tmp_1, Z_tos); // Do it. +} + +void TemplateTable::idiv() { + transition(itos, itos); + + idiv_helper(_masm, Interpreter::_throw_ArithmeticException_entry); + __ z_llgfr(Z_tos, Z_tmp_2); // Result is in Z_tmp_2. +} + +void TemplateTable::irem() { + transition(itos, itos); + + idiv_helper(_masm, Interpreter::_throw_ArithmeticException_entry); + __ z_llgfr(Z_tos, Z_tmp_1); // Result is in Z_tmp_1. +} + +void TemplateTable::lmul() { + transition(ltos, ltos); + + // Multiply with memory operand. + __ z_msg(Z_tos, __ stackTop()); + __ pop_l(); // Pop operand. +} + +// Common part of ldiv/lrem. +// +// Input: +// Z_tos := the divisor (dividend still on stack) +// +// Updated registers: +// Z_tmp_1 := pop_l() % Z_tos ; if is_ldiv == false +// Z_tmp_2 := pop_l() / Z_tos ; if is_ldiv == true +// +static void ldiv_helper(InterpreterMacroAssembler * _masm, address exception, bool is_ldiv) { + NearLabel not_null, done; + + // Use register pair Z_tmp_1, Z_tmp_2 for DIVIDE SINGLE. + assert(Z_tmp_1->successor() == Z_tmp_2, + " need even/odd register pair for idiv/irem"); + + // Get dividend. + __ pop_l(Z_tmp_2); + + // If divisor == 0 throw exception. + __ compare64_and_branch(Z_tos, (intptr_t)0, Assembler::bcondNotEqual, not_null); + __ load_absolute_address(Z_R1_scratch, exception); + __ z_br(Z_R1_scratch); + + __ bind(not_null); + // Special case for dividend == 0x8000 and divisor == -1. + if (is_ldiv) { + // result := Z_tmp_2 := - dividend + __ z_lcgr(Z_tmp_2, Z_tmp_2); + } else { + // result remainder := Z_tmp_1 := 0 + __ clear_reg(Z_tmp_1, true, false); // Don't set CC. + } + + // if divisor == -1 goto done + __ compare64_and_branch(Z_tos, -1, Assembler::bcondEqual, done); + if (is_ldiv) + // Restore sign, because divisor != -1. + __ z_lcgr(Z_tmp_2, Z_tmp_2); + __ z_dsgr(Z_tmp_1, Z_tos); // Do it. + __ bind(done); +} + +void TemplateTable::ldiv() { + transition(ltos, ltos); + + ldiv_helper(_masm, Interpreter::_throw_ArithmeticException_entry, true /*is_ldiv*/); + __ z_lgr(Z_tos, Z_tmp_2); // Result is in Z_tmp_2. +} + +void TemplateTable::lrem() { + transition(ltos, ltos); + + ldiv_helper(_masm, Interpreter::_throw_ArithmeticException_entry, false /*is_ldiv*/); + __ z_lgr(Z_tos, Z_tmp_1); // Result is in Z_tmp_1. +} + +void TemplateTable::lshl() { + transition(itos, ltos); + + // Z_tos: shift amount + __ pop_l(Z_tmp_1); // Get shift value. + __ z_sllg(Z_tos, Z_tmp_1, 0, Z_tos); +} + +void TemplateTable::lshr() { + transition(itos, ltos); + + // Z_tos: shift amount + __ pop_l(Z_tmp_1); // Get shift value. + __ z_srag(Z_tos, Z_tmp_1, 0, Z_tos); +} + +void TemplateTable::lushr() { + transition(itos, ltos); + + // Z_tos: shift amount + __ pop_l(Z_tmp_1); // Get shift value. + __ z_srlg(Z_tos, Z_tmp_1, 0, Z_tos); +} + +void TemplateTable::fop2(Operation op) { + transition(ftos, ftos); + + switch (op) { + case add: + // Add memory operand. + __ z_aeb(Z_ftos, __ stackTop()); __ pop_f(); return; + case sub: + // Sub memory operand. + __ z_ler(Z_F1, Z_ftos); // first operand + __ pop_f(Z_ftos); // second operand from stack + __ z_sebr(Z_ftos, Z_F1); + return; + case mul: + // Multiply with memory operand. + __ z_meeb(Z_ftos, __ stackTop()); __ pop_f(); return; + case div: + __ z_ler(Z_F1, Z_ftos); // first operand + __ pop_f(Z_ftos); // second operand from stack + __ z_debr(Z_ftos, Z_F1); + return; + case rem: + // Do runtime call. + __ z_ler(Z_FARG2, Z_ftos); // divisor + __ pop_f(Z_FARG1); // dividend + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem)); + // Result should be in the right place (Z_ftos == Z_FRET). + return; + default: + ShouldNotReachHere(); + return; + } +} + +void TemplateTable::dop2(Operation op) { + transition(dtos, dtos); + + switch (op) { + case add: + // Add memory operand. + __ z_adb(Z_ftos, __ stackTop()); __ pop_d(); return; + case sub: + // Sub memory operand. + __ z_ldr(Z_F1, Z_ftos); // first operand + __ pop_d(Z_ftos); // second operand from stack + __ z_sdbr(Z_ftos, Z_F1); + return; + case mul: + // Multiply with memory operand. + __ z_mdb(Z_ftos, __ stackTop()); __ pop_d(); return; + case div: + __ z_ldr(Z_F1, Z_ftos); // first operand + __ pop_d(Z_ftos); // second operand from stack + __ z_ddbr(Z_ftos, Z_F1); + return; + case rem: + // Do runtime call. + __ z_ldr(Z_FARG2, Z_ftos); // divisor + __ pop_d(Z_FARG1); // dividend + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem)); + // Result should be in the right place (Z_ftos == Z_FRET). + return; + default: + ShouldNotReachHere(); + return; + } +} + +void TemplateTable::ineg() { + transition(itos, itos); + __ z_lcr(Z_tos); +} + +void TemplateTable::lneg() { + transition(ltos, ltos); + __ z_lcgr(Z_tos); +} + +void TemplateTable::fneg() { + transition(ftos, ftos); + __ z_lcebr(Z_ftos, Z_ftos); +} + +void TemplateTable::dneg() { + transition(dtos, dtos); + __ z_lcdbr(Z_ftos, Z_ftos); +} + +void TemplateTable::iinc() { + transition(vtos, vtos); + + Address local; + __ z_lb(Z_R0_scratch, at_bcp(2)); // Get constant. + locals_index(Z_R1_scratch); + local = iaddress(_masm, Z_R1_scratch); + __ z_a(Z_R0_scratch, local); + __ reg2mem_opt(Z_R0_scratch, local, false); +} + +void TemplateTable::wide_iinc() { + transition(vtos, vtos); + + // Z_tmp_1 := increment + __ get_2_byte_integer_at_bcp(Z_tmp_1, 4, InterpreterMacroAssembler::Signed); + // Z_R1_scratch := index of local to increment + locals_index_wide(Z_tmp_2); + // Load, increment, and store. + __ access_local_int(Z_tmp_2, Z_tos); + __ z_agr(Z_tos, Z_tmp_1); + // Shifted index is still in Z_tmp_2. + __ reg2mem_opt(Z_tos, Address(Z_locals, Z_tmp_2), false); +} + + +void TemplateTable::convert() { + // Checking +#ifdef ASSERT + TosState tos_in = ilgl; + TosState tos_out = ilgl; + + switch (bytecode()) { + case Bytecodes::_i2l: + case Bytecodes::_i2f: + case Bytecodes::_i2d: + case Bytecodes::_i2b: + case Bytecodes::_i2c: + case Bytecodes::_i2s: + tos_in = itos; + break; + case Bytecodes::_l2i: + case Bytecodes::_l2f: + case Bytecodes::_l2d: + tos_in = ltos; + break; + case Bytecodes::_f2i: + case Bytecodes::_f2l: + case Bytecodes::_f2d: + tos_in = ftos; + break; + case Bytecodes::_d2i: + case Bytecodes::_d2l: + case Bytecodes::_d2f: + tos_in = dtos; + break; + default : + ShouldNotReachHere(); + } + switch (bytecode()) { + case Bytecodes::_l2i: + case Bytecodes::_f2i: + case Bytecodes::_d2i: + case Bytecodes::_i2b: + case Bytecodes::_i2c: + case Bytecodes::_i2s: + tos_out = itos; + break; + case Bytecodes::_i2l: + case Bytecodes::_f2l: + case Bytecodes::_d2l: + tos_out = ltos; + break; + case Bytecodes::_i2f: + case Bytecodes::_l2f: + case Bytecodes::_d2f: + tos_out = ftos; + break; + case Bytecodes::_i2d: + case Bytecodes::_l2d: + case Bytecodes::_f2d: + tos_out = dtos; + break; + default : + ShouldNotReachHere(); + } + + transition(tos_in, tos_out); +#endif // ASSERT + + // Conversion + Label done; + switch (bytecode()) { + case Bytecodes::_i2l: + __ z_lgfr(Z_tos, Z_tos); + return; + case Bytecodes::_i2f: + __ z_cefbr(Z_ftos, Z_tos); + return; + case Bytecodes::_i2d: + __ z_cdfbr(Z_ftos, Z_tos); + return; + case Bytecodes::_i2b: + // Sign extend least significant byte. + __ move_reg_if_needed(Z_tos, T_BYTE, Z_tos, T_INT); + return; + case Bytecodes::_i2c: + // Zero extend 2 least significant bytes. + __ move_reg_if_needed(Z_tos, T_CHAR, Z_tos, T_INT); + return; + case Bytecodes::_i2s: + // Sign extend 2 least significant bytes. + __ move_reg_if_needed(Z_tos, T_SHORT, Z_tos, T_INT); + return; + case Bytecodes::_l2i: + // Sign-extend not needed here, upper 4 bytes of int value in register are ignored. + return; + case Bytecodes::_l2f: + __ z_cegbr(Z_ftos, Z_tos); + return; + case Bytecodes::_l2d: + __ z_cdgbr(Z_ftos, Z_tos); + return; + case Bytecodes::_f2i: + case Bytecodes::_f2l: + __ clear_reg(Z_tos, true, false); // Don't set CC. + __ z_cebr(Z_ftos, Z_ftos); + __ z_brno(done); // NaN -> 0 + if (bytecode() == Bytecodes::_f2i) + __ z_cfebr(Z_tos, Z_ftos, Assembler::to_zero); + else // bytecode() == Bytecodes::_f2l + __ z_cgebr(Z_tos, Z_ftos, Assembler::to_zero); + break; + case Bytecodes::_f2d: + __ move_freg_if_needed(Z_ftos, T_DOUBLE, Z_ftos, T_FLOAT); + return; + case Bytecodes::_d2i: + case Bytecodes::_d2l: + __ clear_reg(Z_tos, true, false); // Ddon't set CC. + __ z_cdbr(Z_ftos, Z_ftos); + __ z_brno(done); // NaN -> 0 + if (bytecode() == Bytecodes::_d2i) + __ z_cfdbr(Z_tos, Z_ftos, Assembler::to_zero); + else // Bytecodes::_d2l + __ z_cgdbr(Z_tos, Z_ftos, Assembler::to_zero); + break; + case Bytecodes::_d2f: + __ move_freg_if_needed(Z_ftos, T_FLOAT, Z_ftos, T_DOUBLE); + return; + default: + ShouldNotReachHere(); + } + __ bind(done); +} + +void TemplateTable::lcmp() { + transition(ltos, itos); + + Label done; + Register val1 = Z_R0_scratch; + Register val2 = Z_R1_scratch; + + if (VM_Version::has_LoadStoreConditional()) { + __ pop_l(val1); // pop value 1. + __ z_lghi(val2, -1); // lt value + __ z_cgr(val1, Z_tos); // Compare with Z_tos (value 2). Protect CC under all circumstances. + __ z_lghi(val1, 1); // gt value + __ z_lghi(Z_tos, 0); // eq value + + __ z_locgr(Z_tos, val1, Assembler::bcondHigh); + __ z_locgr(Z_tos, val2, Assembler::bcondLow); + } else { + __ pop_l(val1); // Pop value 1. + __ z_cgr(val1, Z_tos); // Compare with Z_tos (value 2). Protect CC under all circumstances. + + __ z_lghi(Z_tos, 0); // eq value + __ z_bre(done); + + __ z_lghi(Z_tos, 1); // gt value + __ z_brh(done); + + __ z_lghi(Z_tos, -1); // lt value + } + + __ bind(done); +} + + +void TemplateTable::float_cmp(bool is_float, int unordered_result) { + Label done; + + if (is_float) { + __ pop_f(Z_FARG2); + __ z_cebr(Z_FARG2, Z_ftos); + } else { + __ pop_d(Z_FARG2); + __ z_cdbr(Z_FARG2, Z_ftos); + } + + if (VM_Version::has_LoadStoreConditional()) { + Register one = Z_R0_scratch; + Register minus_one = Z_R1_scratch; + __ z_lghi(minus_one, -1); + __ z_lghi(one, 1); + __ z_lghi(Z_tos, 0); + __ z_locgr(Z_tos, one, unordered_result == 1 ? Assembler::bcondHighOrNotOrdered : Assembler::bcondHigh); + __ z_locgr(Z_tos, minus_one, unordered_result == 1 ? Assembler::bcondLow : Assembler::bcondLowOrNotOrdered); + } else { + // Z_FARG2 == Z_ftos + __ clear_reg(Z_tos, false, false); + __ z_bre(done); + + // F_ARG2 > Z_Ftos, or unordered + __ z_lhi(Z_tos, 1); + __ z_brc(unordered_result == 1 ? Assembler::bcondHighOrNotOrdered : Assembler::bcondHigh, done); + + // F_ARG2 < Z_FTOS, or unordered + __ z_lhi(Z_tos, -1); + + __ bind(done); + } +} + +void TemplateTable::branch(bool is_jsr, bool is_wide) { + const Register bumped_count = Z_tmp_1; + const Register method = Z_tmp_2; + const Register m_counters = Z_R1_scratch; + const Register mdo = Z_tos; + + BLOCK_COMMENT("TemplateTable::branch {"); + __ get_method(method); + __ profile_taken_branch(mdo, bumped_count); + + const ByteSize ctr_offset = InvocationCounter::counter_offset(); + const ByteSize be_offset = MethodCounters::backedge_counter_offset() + ctr_offset; + const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + ctr_offset; + + // Get (wide) offset to disp. + const Register disp = Z_ARG5; + if (is_wide) { + __ get_4_byte_integer_at_bcp(disp, 1); + } else { + __ get_2_byte_integer_at_bcp(disp, 1, InterpreterMacroAssembler::Signed); + } + + // Handle all the JSR stuff here, then exit. + // It's much shorter and cleaner than intermingling with the + // non-JSR normal-branch stuff occurring below. + if (is_jsr) { + // Compute return address as bci in Z_tos. + __ z_lgr(Z_R1_scratch, Z_bcp); + __ z_sg(Z_R1_scratch, Address(method, Method::const_offset())); + __ add2reg(Z_tos, (is_wide ? 5 : 3) - in_bytes(ConstMethod::codes_offset()), Z_R1_scratch); + + // Bump bcp to target of JSR. + __ z_agr(Z_bcp, disp); + // Push return address for "ret" on stack. + __ push_ptr(Z_tos); + // And away we go! + __ dispatch_next(vtos); + return; + } + + // Normal (non-jsr) branch handling. + + // Bump bytecode pointer by displacement (take the branch). + __ z_agr(Z_bcp, disp); + + assert(UseLoopCounter || !UseOnStackReplacement, + "on-stack-replacement requires loop counters"); + + NearLabel backedge_counter_overflow; + NearLabel profile_method; + NearLabel dispatch; + int increment = InvocationCounter::count_increment; + + if (UseLoopCounter) { + // Increment backedge counter for backward branches. + // disp: target offset + // Z_bcp: target bcp + // Z_locals: locals pointer + // + // Count only if backward branch. + __ compare32_and_branch(disp, (intptr_t)0, Assembler::bcondHigh, dispatch); + + if (TieredCompilation) { + Label noCounters; + + if (ProfileInterpreter) { + NearLabel no_mdo; + + // Are we profiling? + __ load_and_test_long(mdo, Address(method, Method::method_data_offset())); + __ branch_optimized(Assembler::bcondZero, no_mdo); + + // Increment the MDO backedge counter. + const Address mdo_backedge_counter(mdo, MethodData::backedge_counter_offset() + InvocationCounter::counter_offset()); + + const Address mask(mdo, MethodData::backedge_mask_offset()); + __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, + Z_ARG2, false, Assembler::bcondZero, + UseOnStackReplacement ? &backedge_counter_overflow : NULL); + __ z_bru(dispatch); + __ bind(no_mdo); + } + + // Increment backedge counter in MethodCounters*. + __ get_method_counters(method, m_counters, noCounters); + const Address mask(m_counters, MethodCounters::backedge_mask_offset()); + __ increment_mask_and_jump(Address(m_counters, be_offset), + increment, mask, + Z_ARG2, false, Assembler::bcondZero, + UseOnStackReplacement ? &backedge_counter_overflow : NULL); + __ bind(noCounters); + } else { + Register counter = Z_tos; + Label noCounters; + // Get address of MethodCounters object. + __ get_method_counters(method, m_counters, noCounters); + // Increment backedge counter. + __ increment_backedge_counter(m_counters, counter); + + if (ProfileInterpreter) { + // Test to see if we should create a method data obj. + __ z_cl(counter, Address(m_counters, MethodCounters::interpreter_profile_limit_offset())); + __ z_brl(dispatch); + + // If no method data exists, go to profile method. + __ test_method_data_pointer(Z_ARG4/*result unused*/, profile_method); + + if (UseOnStackReplacement) { + // Check for overflow against 'bumped_count' which is the MDO taken count. + __ z_cl(bumped_count, Address(m_counters, MethodCounters::interpreter_backward_branch_limit_offset())); + __ z_brl(dispatch); + + // When ProfileInterpreter is on, the backedge_count comes + // from the methodDataOop, which value does not get reset on + // the call to frequency_counter_overflow(). To avoid + // excessive calls to the overflow routine while the method is + // being compiled, add a second test to make sure the overflow + // function is called only once every overflow_frequency. + const int overflow_frequency = 1024; + __ and_imm(bumped_count, overflow_frequency - 1); + __ z_brz(backedge_counter_overflow); + + } + } else { + if (UseOnStackReplacement) { + // Check for overflow against 'counter', which is the sum of the + // counters. + __ z_cl(counter, Address(m_counters, MethodCounters::interpreter_backward_branch_limit_offset())); + __ z_brh(backedge_counter_overflow); + } + } + __ bind(noCounters); + } + + __ bind(dispatch); + } + + // Pre-load the next target bytecode into rbx. + __ z_llgc(Z_bytecode, Address(Z_bcp, (intptr_t) 0)); + + // Continue with the bytecode @ target. + // Z_tos: Return bci for jsr's, unused otherwise. + // Z_bytecode: target bytecode + // Z_bcp: target bcp + __ dispatch_only(vtos); + + // Out-of-line code runtime calls. + if (UseLoopCounter) { + if (ProfileInterpreter) { + // Out-of-line code to allocate method data oop. + __ bind(profile_method); + + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); + __ z_llgc(Z_bytecode, Address(Z_bcp, (intptr_t) 0)); // Restore target bytecode. + __ set_method_data_pointer_for_bcp(); + __ z_bru(dispatch); + } + + if (UseOnStackReplacement) { + + // invocation counter overflow + __ bind(backedge_counter_overflow); + + __ z_lcgr(Z_ARG2, disp); // Z_ARG2 := -disp + __ z_agr(Z_ARG2, Z_bcp); // Z_ARG2 := branch target bcp - disp == branch bcp + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), + Z_ARG2); + + // Z_RET: osr nmethod (osr ok) or NULL (osr not possible). + __ compare64_and_branch(Z_RET, (intptr_t) 0, Assembler::bcondEqual, dispatch); + + // Nmethod may have been invalidated (VM may block upon call_VM return). + __ z_cliy(nmethod::state_offset(), Z_RET, nmethod::in_use); + __ z_brne(dispatch); + + // Migrate the interpreter frame off of the stack. + + __ z_lgr(Z_tmp_1, Z_RET); // Save the nmethod. + + call_VM(noreg, + CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); + + // Z_RET is OSR buffer, move it to expected parameter location. + __ lgr_if_needed(Z_ARG1, Z_RET); + + // Pop the interpreter frame ... + __ pop_interpreter_frame(Z_R14, Z_ARG2/*tmp1*/, Z_ARG3/*tmp2*/); + + // ... and begin the OSR nmethod. + __ z_lg(Z_R1_scratch, Address(Z_tmp_1, nmethod::osr_entry_point_offset())); + __ z_br(Z_R1_scratch); + } + } + BLOCK_COMMENT("} TemplateTable::branch"); +} + +void TemplateTable::if_0cmp(Condition cc) { + transition(itos, vtos); + + // Assume branch is more often taken than not (loops use backward branches). + NearLabel not_taken; + __ compare32_and_branch(Z_tos, (intptr_t) 0, j_not(cc), not_taken); + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(Z_tos); +} + +void TemplateTable::if_icmp(Condition cc) { + transition(itos, vtos); + + // Assume branch is more often taken than not (loops use backward branches). + NearLabel not_taken; + __ pop_i(Z_R0_scratch); + __ compare32_and_branch(Z_R0_scratch, Z_tos, j_not(cc), not_taken); + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(Z_tos); +} + +void TemplateTable::if_nullcmp(Condition cc) { + transition(atos, vtos); + + // Assume branch is more often taken than not (loops use backward branches) . + NearLabel not_taken; + __ compare64_and_branch(Z_tos, (intptr_t) 0, j_not(cc), not_taken); + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(Z_tos); +} + +void TemplateTable::if_acmp(Condition cc) { + transition(atos, vtos); + // Assume branch is more often taken than not (loops use backward branches). + NearLabel not_taken; + __ pop_ptr(Z_ARG2); + __ verify_oop(Z_ARG2); + __ verify_oop(Z_tos); + __ compareU64_and_branch(Z_tos, Z_ARG2, j_not(cc), not_taken); + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(Z_ARG3); +} + +void TemplateTable::ret() { + transition(vtos, vtos); + + locals_index(Z_tmp_1); + // Get return bci, compute return bcp. Must load 64 bits. + __ mem2reg_opt(Z_tmp_1, iaddress(_masm, Z_tmp_1)); + __ profile_ret(Z_tmp_1, Z_tmp_2); + __ get_method(Z_tos); + __ mem2reg_opt(Z_R1_scratch, Address(Z_tos, Method::const_offset())); + __ load_address(Z_bcp, Address(Z_R1_scratch, Z_tmp_1, ConstMethod::codes_offset())); + __ dispatch_next(vtos); +} + +void TemplateTable::wide_ret() { + transition(vtos, vtos); + + locals_index_wide(Z_tmp_1); + // Get return bci, compute return bcp. + __ mem2reg_opt(Z_tmp_1, aaddress(_masm, Z_tmp_1)); + __ profile_ret(Z_tmp_1, Z_tmp_2); + __ get_method(Z_tos); + __ mem2reg_opt(Z_R1_scratch, Address(Z_tos, Method::const_offset())); + __ load_address(Z_bcp, Address(Z_R1_scratch, Z_tmp_1, ConstMethod::codes_offset())); + __ dispatch_next(vtos); +} + +void TemplateTable::tableswitch () { + transition(itos, vtos); + + NearLabel default_case, continue_execution; + Register bcp = Z_ARG5; + // Align bcp. + __ load_address(bcp, at_bcp(BytesPerInt)); + __ z_nill(bcp, (-BytesPerInt) & 0xffff); + + // Load lo & hi. + Register low = Z_tmp_1; + Register high = Z_tmp_2; + + // Load low into 64 bits, since used for address calculation. + __ mem2reg_signed_opt(low, Address(bcp, BytesPerInt)); + __ mem2reg_opt(high, Address(bcp, 2 * BytesPerInt), false); + // Sign extend "label" value for address calculation. + __ z_lgfr(Z_tos, Z_tos); + + // Check against lo & hi. + __ compare32_and_branch(Z_tos, low, Assembler::bcondLow, default_case); + __ compare32_and_branch(Z_tos, high, Assembler::bcondHigh, default_case); + + // Lookup dispatch offset. + __ z_sgr(Z_tos, low); + Register jump_table_offset = Z_ARG3; + // Index2offset; index in Z_tos is killed by profile_switch_case. + __ z_sllg(jump_table_offset, Z_tos, LogBytesPerInt); + __ profile_switch_case(Z_tos, Z_ARG4 /*tmp for mdp*/, low/*tmp*/, Z_bytecode/*tmp*/); + + Register index = Z_tmp_2; + + // Load index sign extended for addressing. + __ mem2reg_signed_opt(index, Address(bcp, jump_table_offset, 3 * BytesPerInt)); + + // Continue execution. + __ bind(continue_execution); + + // Load next bytecode. + __ z_llgc(Z_bytecode, Address(Z_bcp, index)); + __ z_agr(Z_bcp, index); // Advance bcp. + __ dispatch_only(vtos); + + // Handle default. + __ bind(default_case); + + __ profile_switch_default(Z_tos); + __ mem2reg_signed_opt(index, Address(bcp)); + __ z_bru(continue_execution); +} + +void TemplateTable::lookupswitch () { + transition(itos, itos); + __ stop("lookupswitch bytecode should have been rewritten"); +} + +void TemplateTable::fast_linearswitch () { + transition(itos, vtos); + + Label loop_entry, loop, found, continue_execution; + Register bcp = Z_ARG5; + + // Align bcp. + __ load_address(bcp, at_bcp(BytesPerInt)); + __ z_nill(bcp, (-BytesPerInt) & 0xffff); + + // Start search with last case. + Register current_case_offset = Z_tmp_1; + + __ mem2reg_signed_opt(current_case_offset, Address(bcp, BytesPerInt)); + __ z_sllg(current_case_offset, current_case_offset, LogBytesPerWord); // index2bytes + __ z_bru(loop_entry); + + // table search + __ bind(loop); + + __ z_c(Z_tos, Address(bcp, current_case_offset, 2 * BytesPerInt)); + __ z_bre(found); + + __ bind(loop_entry); + __ z_aghi(current_case_offset, -2 * BytesPerInt); // Decrement. + __ z_brnl(loop); + + // default case + Register offset = Z_tmp_2; + + __ profile_switch_default(Z_tos); + // Load offset sign extended for addressing. + __ mem2reg_signed_opt(offset, Address(bcp)); + __ z_bru(continue_execution); + + // Entry found -> get offset. + __ bind(found); + __ mem2reg_signed_opt(offset, Address(bcp, current_case_offset, 3 * BytesPerInt)); + // Profile that this case was taken. + Register current_case_idx = Z_ARG4; + __ z_srlg(current_case_idx, current_case_offset, LogBytesPerWord); // bytes2index + __ profile_switch_case(current_case_idx, Z_tos, bcp, Z_bytecode); + + // Continue execution. + __ bind(continue_execution); + + // Load next bytecode. + __ z_llgc(Z_bytecode, Address(Z_bcp, offset, 0)); + __ z_agr(Z_bcp, offset); // Advance bcp. + __ dispatch_only(vtos); +} + + +void TemplateTable::fast_binaryswitch() { + + transition(itos, vtos); + + // Implementation using the following core algorithm: + // + // int binary_search(int key, LookupswitchPair* array, int n) { + // // Binary search according to "Methodik des Programmierens" by + // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. + // int i = 0; + // int j = n; + // while (i+1 < j) { + // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) + // // with Q: for all i: 0 <= i < n: key < a[i] + // // where a stands for the array and assuming that the (inexisting) + // // element a[n] is infinitely big. + // int h = (i + j) >> 1; + // // i < h < j + // if (key < array[h].fast_match()) { + // j = h; + // } else { + // i = h; + // } + // } + // // R: a[i] <= key < a[i+1] or Q + // // (i.e., if key is within array, i is the correct index) + // return i; + // } + + // Register allocation + // Note: Since we use the indices in address operands, we do all the + // computation in 64 bits. + const Register key = Z_tos; // Already set (tosca). + const Register array = Z_tmp_1; + const Register i = Z_tmp_2; + const Register j = Z_ARG5; + const Register h = Z_ARG4; + const Register temp = Z_R1_scratch; + + // Find array start. + __ load_address(array, at_bcp(3 * BytesPerInt)); + __ z_nill(array, (-BytesPerInt) & 0xffff); // align + + // Initialize i & j. + __ clear_reg(i, true, false); // i = 0; Don't set CC. + __ mem2reg_signed_opt(j, Address(array, -BytesPerInt)); // j = length(array); + + // And start. + Label entry; + __ z_bru(entry); + + // binary search loop + { + NearLabel loop; + + __ bind(loop); + + // int h = (i + j) >> 1; + __ add2reg_with_index(h, 0, i, j); // h = i + j; + __ z_srag(h, h, 1); // h = (i + j) >> 1; + + // if (key < array[h].fast_match()) { + // j = h; + // } else { + // i = h; + // } + + // Convert array[h].match to native byte-ordering before compare. + __ z_sllg(temp, h, LogBytesPerWord); // index2bytes + __ mem2reg_opt(temp, Address(array, temp), false); + + NearLabel else_; + + __ compare32_and_branch(key, temp, Assembler::bcondNotLow, else_); + // j = h if (key < array[h].fast_match()) + __ z_lgr(j, h); + __ z_bru(entry); // continue + + __ bind(else_); + + // i = h if (key >= array[h].fast_match()) + __ z_lgr(i, h); // and fallthrough + + // while (i+1 < j) + __ bind(entry); + + // if (i + 1 < j) continue search + __ add2reg(h, 1, i); + __ compare64_and_branch(h, j, Assembler::bcondLow, loop); + } + + // End of binary search, result index is i (must check again!). + NearLabel default_case; + + // h is no longer needed, so use it to hold the byte offset. + __ z_sllg(h, i, LogBytesPerWord); // index2bytes + __ mem2reg_opt(temp, Address(array, h), false); + __ compare32_and_branch(key, temp, Assembler::bcondNotEqual, default_case); + + // entry found -> j = offset + __ mem2reg_signed_opt(j, Address(array, h, BytesPerInt)); + __ profile_switch_case(i, key, array, Z_bytecode); + // Load next bytecode. + __ z_llgc(Z_bytecode, Address(Z_bcp, j)); + __ z_agr(Z_bcp, j); // Advance bcp. + __ dispatch_only(vtos); + + // default case -> j = default offset + __ bind(default_case); + + __ profile_switch_default(i); + __ mem2reg_signed_opt(j, Address(array, -2 * BytesPerInt)); + // Load next bytecode. + __ z_llgc(Z_bytecode, Address(Z_bcp, j)); + __ z_agr(Z_bcp, j); // Advance bcp. + __ dispatch_only(vtos); +} + +void TemplateTable::_return(TosState state) { + transition(state, state); + assert(_desc->calls_vm(), + "inconsistent calls_vm information"); // call in remove_activation + + if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { + Register Rthis = Z_ARG2; + Register Rklass = Z_ARG5; + Label skip_register_finalizer; + assert(state == vtos, "only valid state"); + __ z_lg(Rthis, aaddress(0)); + __ load_klass(Rklass, Rthis); + __ testbit(Address(Rklass, Klass::access_flags_offset()), exact_log2(JVM_ACC_HAS_FINALIZER)); + __ z_bfalse(skip_register_finalizer); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), Rthis); + __ bind(skip_register_finalizer); + } + + __ remove_activation(state, Z_R14); + __ z_br(Z_R14); +} + +// ---------------------------------------------------------------------------- +// NOTE: Cpe_offset is already computed as byte offset, so we must not +// shift it afterwards! +void TemplateTable::resolve_cache_and_index(int byte_no, + Register Rcache, + Register cpe_offset, + size_t index_size) { + BLOCK_COMMENT("resolve_cache_and_index {"); + NearLabel resolved; + const Register bytecode_in_cpcache = Z_R1_scratch; + const int total_f1_offset = in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f1_offset()); + assert_different_registers(Rcache, cpe_offset, bytecode_in_cpcache); + + Bytecodes::Code code = bytecode(); + switch (code) { + case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break; + case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break; + } + + { + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + __ get_cache_and_index_and_bytecode_at_bcp(Rcache, cpe_offset, bytecode_in_cpcache, byte_no, 1, index_size); + // Have we resolved this bytecode? + __ compare32_and_branch(bytecode_in_cpcache, (int)code, Assembler::bcondEqual, resolved); + } + + // Resolve first time through. + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); + __ load_const_optimized(Z_ARG2, (int) code); + __ call_VM(noreg, entry, Z_ARG2); + + // Update registers with resolved info. + __ get_cache_and_index_at_bcp(Rcache, cpe_offset, 1, index_size); + __ bind(resolved); + BLOCK_COMMENT("} resolve_cache_and_index"); +} + +// The Rcache and index registers must be set before call. +// Index is already a byte offset, don't shift! +void TemplateTable::load_field_cp_cache_entry(Register obj, + Register cache, + Register index, + Register off, + Register flags, + bool is_static = false) { + assert_different_registers(cache, index, flags, off); + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + + // Field offset + __ mem2reg_opt(off, Address(cache, index, cp_base_offset + ConstantPoolCacheEntry::f2_offset())); + // Flags. Must load 64 bits. + __ mem2reg_opt(flags, Address(cache, index, cp_base_offset + ConstantPoolCacheEntry::flags_offset())); + + // klass overwrite register + if (is_static) { + __ mem2reg_opt(obj, Address(cache, index, cp_base_offset + ConstantPoolCacheEntry::f1_offset())); + __ mem2reg_opt(obj, Address(obj, Klass::java_mirror_offset())); + } +} + +void TemplateTable::load_invoke_cp_cache_entry(int byte_no, + Register method, + Register itable_index, + Register flags, + bool is_invokevirtual, + bool is_invokevfinal, // unused + bool is_invokedynamic) { + BLOCK_COMMENT("load_invoke_cp_cache_entry {"); + // Setup registers. + const Register cache = Z_ARG1; + const Register cpe_offset= flags; + const ByteSize base_off = ConstantPoolCache::base_offset(); + const ByteSize f1_off = ConstantPoolCacheEntry::f1_offset(); + const ByteSize f2_off = ConstantPoolCacheEntry::f2_offset(); + const ByteSize flags_off = ConstantPoolCacheEntry::flags_offset(); + const int method_offset = in_bytes(base_off + ((byte_no == f2_byte) ? f2_off : f1_off)); + const int flags_offset = in_bytes(base_off + flags_off); + // Access constant pool cache fields. + const int index_offset = in_bytes(base_off + f2_off); + + assert_different_registers(method, itable_index, flags, cache); + assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant"); + + if (is_invokevfinal) { + // Already resolved. + assert(itable_index == noreg, "register not used"); + __ get_cache_and_index_at_bcp(cache, cpe_offset, 1); + } else { + // Need to resolve. + resolve_cache_and_index(byte_no, cache, cpe_offset, is_invokedynamic ? sizeof(u4) : sizeof(u2)); + } + __ z_lg(method, Address(cache, cpe_offset, method_offset)); + + if (itable_index != noreg) { + __ z_lg(itable_index, Address(cache, cpe_offset, index_offset)); + } + + // Only load the lower 4 bytes and fill high bytes of flags with zeros. + // Callers depend on this zero-extension!!! + // Attention: overwrites cpe_offset == flags + __ z_llgf(flags, Address(cache, cpe_offset, flags_offset + (BytesPerLong-BytesPerInt))); + + BLOCK_COMMENT("} load_invoke_cp_cache_entry"); +} + +// The registers cache and index expected to be set before call. +// Correct values of the cache and index registers are preserved. +void TemplateTable::jvmti_post_field_access(Register cache, Register index, + bool is_static, bool has_tos) { + + // Do the JVMTI work here to avoid disturbing the register state below. + // We use c_rarg registers here because we want to use the register used in + // the call to the VM + if (!JvmtiExport::can_post_field_access()) { + return; + } + + // Check to see if a field access watch has been set before we + // take the time to call into the VM. + Label exit; + assert_different_registers(cache, index, Z_tos); + __ load_absolute_address(Z_tos, (address)JvmtiExport::get_field_access_count_addr()); + __ load_and_test_int(Z_R0, Address(Z_tos)); + __ z_brz(exit); + + // Index is returned as byte offset, do not shift! + __ get_cache_and_index_at_bcp(Z_ARG3, Z_R1_scratch, 1); + + // cache entry pointer + __ add2reg_with_index(Z_ARG3, + in_bytes(ConstantPoolCache::base_offset()), + Z_ARG3, Z_R1_scratch); + + if (is_static) { + __ clear_reg(Z_ARG2, true, false); // NULL object reference. Don't set CC. + } else { + __ mem2reg_opt(Z_ARG2, at_tos()); // Get object pointer without popping it. + __ verify_oop(Z_ARG2); + } + // Z_ARG2: object pointer or NULL + // Z_ARG3: cache entry pointer + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), + Z_ARG2, Z_ARG3); + __ get_cache_and_index_at_bcp(cache, index, 1); + + __ bind(exit); +} + +void TemplateTable::pop_and_check_object(Register r) { + __ pop_ptr(r); + __ null_check(r); // for field access must check obj. + __ verify_oop(r); +} + +void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) { + transition(vtos, vtos); + + const Register cache = Z_tmp_1; + const Register index = Z_tmp_2; + const Register obj = Z_tmp_1; + const Register off = Z_ARG2; + const Register flags = Z_ARG1; + const Register bc = Z_tmp_1; // Uses same reg as obj, so don't mix them. + + resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); + jvmti_post_field_access(cache, index, is_static, false); + load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); + + if (!is_static) { + // Obj is on the stack. + pop_and_check_object(obj); + } + + // Displacement is 0, so any store instruction will be fine on any CPU. + const Address field(obj, off); + + Label is_Byte, is_Bool, is_Int, is_Short, is_Char, + is_Long, is_Float, is_Object, is_Double; + Label is_badState8, is_badState9, is_badStateA, is_badStateB, + is_badStateC, is_badStateD, is_badStateE, is_badStateF, + is_badState; + Label branchTable, atosHandler, Done; + Register br_tab = Z_R1_scratch; + bool do_rewrite = !is_static && (rc == may_rewrite); + bool dont_rewrite = (is_static || (rc == may_not_rewrite)); + + assert(do_rewrite == !dont_rewrite, "Oops, code is not fit for that"); + assert(btos == 0, "change code, btos != 0"); + + // Calculate branch table size. Generated code size depends on ASSERT and on bytecode rewriting. +#ifdef ASSERT + const unsigned int bsize = dont_rewrite ? BTB_MINSIZE*1 : BTB_MINSIZE*4; +#else + const unsigned int bsize = dont_rewrite ? BTB_MINSIZE*1 : BTB_MINSIZE*4; +#endif + + // Calculate address of branch table entry and branch there. + { + const int bit_shift = exact_log2(bsize); // Size of each branch table entry. + const int r_bitpos = 63 - bit_shift; + const int l_bitpos = r_bitpos - ConstantPoolCacheEntry::tos_state_bits + 1; + const int n_rotate = (bit_shift-ConstantPoolCacheEntry::tos_state_shift); + __ z_larl(br_tab, branchTable); + __ rotate_then_insert(flags, flags, l_bitpos, r_bitpos, n_rotate, true); + } + __ z_bc(Assembler::bcondAlways, 0, flags, br_tab); + + __ align_address(bsize); + BIND(branchTable); + + // btos + BTB_BEGIN(is_Byte, bsize, "getfield_or_static:is_Byte"); + __ z_lb(Z_tos, field); + __ push(btos); + // Rewrite bytecode to be faster. + if (do_rewrite) { + patch_bytecode(Bytecodes::_fast_bgetfield, bc, Z_ARG5); + } + __ z_bru(Done); + BTB_END(is_Byte, bsize, "getfield_or_static:is_Byte"); + + // ztos + BTB_BEGIN(is_Bool, bsize, "getfield_or_static:is_Bool"); + __ z_lb(Z_tos, field); + __ push(ztos); + // Rewrite bytecode to be faster. + if (do_rewrite) { + // Use btos rewriting, no truncating to t/f bit is needed for getfield. + patch_bytecode(Bytecodes::_fast_bgetfield, bc, Z_ARG5); + } + __ z_bru(Done); + BTB_END(is_Bool, bsize, "getfield_or_static:is_Bool"); + + // ctos + BTB_BEGIN(is_Char, bsize, "getfield_or_static:is_Char"); + // Load into 64 bits, works on all CPUs. + __ z_llgh(Z_tos, field); + __ push(ctos); + // Rewrite bytecode to be faster. + if (do_rewrite) { + patch_bytecode(Bytecodes::_fast_cgetfield, bc, Z_ARG5); + } + __ z_bru(Done); + BTB_END(is_Char, bsize, "getfield_or_static:is_Char"); + + // stos + BTB_BEGIN(is_Short, bsize, "getfield_or_static:is_Short"); + __ z_lh(Z_tos, field); + __ push(stos); + // Rewrite bytecode to be faster. + if (do_rewrite) { + patch_bytecode(Bytecodes::_fast_sgetfield, bc, Z_ARG5); + } + __ z_bru(Done); + BTB_END(is_Short, bsize, "getfield_or_static:is_Short"); + + // itos + BTB_BEGIN(is_Int, bsize, "getfield_or_static:is_Int"); + __ mem2reg_opt(Z_tos, field, false); + __ push(itos); + // Rewrite bytecode to be faster. + if (do_rewrite) { + patch_bytecode(Bytecodes::_fast_igetfield, bc, Z_ARG5); + } + __ z_bru(Done); + BTB_END(is_Int, bsize, "getfield_or_static:is_Int"); + + // ltos + BTB_BEGIN(is_Long, bsize, "getfield_or_static:is_Long"); + __ mem2reg_opt(Z_tos, field); + __ push(ltos); + // Rewrite bytecode to be faster. + if (do_rewrite) { + patch_bytecode(Bytecodes::_fast_lgetfield, bc, Z_ARG5); + } + __ z_bru(Done); + BTB_END(is_Long, bsize, "getfield_or_static:is_Long"); + + // ftos + BTB_BEGIN(is_Float, bsize, "getfield_or_static:is_Float"); + __ mem2freg_opt(Z_ftos, field, false); + __ push(ftos); + // Rewrite bytecode to be faster. + if (do_rewrite) { + patch_bytecode(Bytecodes::_fast_fgetfield, bc, Z_ARG5); + } + __ z_bru(Done); + BTB_END(is_Float, bsize, "getfield_or_static:is_Float"); + + // dtos + BTB_BEGIN(is_Double, bsize, "getfield_or_static:is_Double"); + __ mem2freg_opt(Z_ftos, field); + __ push(dtos); + // Rewrite bytecode to be faster. + if (do_rewrite) { + patch_bytecode(Bytecodes::_fast_dgetfield, bc, Z_ARG5); + } + __ z_bru(Done); + BTB_END(is_Double, bsize, "getfield_or_static:is_Double"); + + // atos + BTB_BEGIN(is_Object, bsize, "getfield_or_static:is_Object"); + __ z_bru(atosHandler); + BTB_END(is_Object, bsize, "getfield_or_static:is_Object"); + + // Bad state detection comes at no extra runtime cost. + BTB_BEGIN(is_badState8, bsize, "getfield_or_static:is_badState8"); + __ z_illtrap(); + __ z_bru(is_badState); + BTB_END( is_badState8, bsize, "getfield_or_static:is_badState8"); + BTB_BEGIN(is_badState9, bsize, "getfield_or_static:is_badState9"); + __ z_illtrap(); + __ z_bru(is_badState); + BTB_END( is_badState9, bsize, "getfield_or_static:is_badState9"); + BTB_BEGIN(is_badStateA, bsize, "getfield_or_static:is_badStateA"); + __ z_illtrap(); + __ z_bru(is_badState); + BTB_END( is_badStateA, bsize, "getfield_or_static:is_badStateA"); + BTB_BEGIN(is_badStateB, bsize, "getfield_or_static:is_badStateB"); + __ z_illtrap(); + __ z_bru(is_badState); + BTB_END( is_badStateB, bsize, "getfield_or_static:is_badStateB"); + BTB_BEGIN(is_badStateC, bsize, "getfield_or_static:is_badStateC"); + __ z_illtrap(); + __ z_bru(is_badState); + BTB_END( is_badStateC, bsize, "getfield_or_static:is_badStateC"); + BTB_BEGIN(is_badStateD, bsize, "getfield_or_static:is_badStateD"); + __ z_illtrap(); + __ z_bru(is_badState); + BTB_END( is_badStateD, bsize, "getfield_or_static:is_badStateD"); + BTB_BEGIN(is_badStateE, bsize, "getfield_or_static:is_badStateE"); + __ z_illtrap(); + __ z_bru(is_badState); + BTB_END( is_badStateE, bsize, "getfield_or_static:is_badStateE"); + BTB_BEGIN(is_badStateF, bsize, "getfield_or_static:is_badStateF"); + __ z_illtrap(); + __ z_bru(is_badState); + BTB_END( is_badStateF, bsize, "getfield_or_static:is_badStateF"); + + __ align_address(64); + BIND(is_badState); // Do this outside branch table. Needs a lot of space. + { + unsigned int b_off = __ offset(); + if (is_static) { + __ stop_static("Bad state in getstatic"); + } else { + __ stop_static("Bad state in getfield"); + } + unsigned int e_off = __ offset(); + } + + __ align_address(64); + BIND(atosHandler); // Oops are really complicated to handle. + // There is a lot of code generated. + // Therefore: generate the handler outside of branch table. + // There is no performance penalty. The additional branch + // to here is compensated for by the fallthru to "Done". + { + unsigned int b_off = __ offset(); + __ load_heap_oop(Z_tos, field); + __ verify_oop(Z_tos); + __ push(atos); + if (do_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, bc, Z_ARG5); + } + unsigned int e_off = __ offset(); + } + + BIND(Done); +} + +void TemplateTable::getfield(int byte_no) { + BLOCK_COMMENT("getfield {"); + getfield_or_static(byte_no, false); + BLOCK_COMMENT("} getfield"); +} + +void TemplateTable::nofast_getfield(int byte_no) { + getfield_or_static(byte_no, false, may_not_rewrite); +} + +void TemplateTable::getstatic(int byte_no) { + BLOCK_COMMENT("getstatic {"); + getfield_or_static(byte_no, true); + BLOCK_COMMENT("} getstatic"); +} + +// The registers cache and index expected to be set before call. The +// function may destroy various registers, just not the cache and +// index registers. +void TemplateTable::jvmti_post_field_mod(Register cache, + Register index, bool is_static) { + transition(vtos, vtos); + + if (!JvmtiExport::can_post_field_modification()) { + return; + } + + BLOCK_COMMENT("jvmti_post_field_mod {"); + + // Check to see if a field modification watch has been set before + // we take the time to call into the VM. + Label L1; + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + assert_different_registers(cache, index, Z_tos); + + __ load_absolute_address(Z_tos, (address)JvmtiExport::get_field_modification_count_addr()); + __ load_and_test_int(Z_R0, Address(Z_tos)); + __ z_brz(L1); + + // Index is returned as byte offset, do not shift! + __ get_cache_and_index_at_bcp(Z_ARG3, Z_R1_scratch, 1); + + if (is_static) { + // Life is simple. Null out the object pointer. + __ clear_reg(Z_ARG2, true, false); // Don't set CC. + } else { + // Life is harder. The stack holds the value on top, followed by + // the object. We don't know the size of the value, though. It + // could be one or two words depending on its type. As a result, + // we must find the type to determine where the object is. + __ mem2reg_opt(Z_ARG4, + Address(Z_ARG3, Z_R1_scratch, + in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset()) + + (BytesPerLong - BytesPerInt)), + false); + __ z_srl(Z_ARG4, ConstantPoolCacheEntry::tos_state_shift); + // Make sure we don't need to mask Z_ARG4 for tos_state after the above shift. + ConstantPoolCacheEntry::verify_tos_state_shift(); + __ mem2reg_opt(Z_ARG2, at_tos(1)); // Initially assume a one word jvalue. + + NearLabel load_dtos, cont; + + __ compareU32_and_branch(Z_ARG4, (intptr_t) ltos, + Assembler::bcondNotEqual, load_dtos); + __ mem2reg_opt(Z_ARG2, at_tos(2)); // ltos (two word jvalue) + __ z_bru(cont); + + __ bind(load_dtos); + __ compareU32_and_branch(Z_ARG4, (intptr_t)dtos, Assembler::bcondNotEqual, cont); + __ mem2reg_opt(Z_ARG2, at_tos(2)); // dtos (two word jvalue) + + __ bind(cont); + } + // cache entry pointer + + __ add2reg_with_index(Z_ARG3, in_bytes(cp_base_offset), Z_ARG3, Z_R1_scratch); + + // object(tos) + __ load_address(Z_ARG4, Address(Z_esp, Interpreter::stackElementSize)); + // Z_ARG2: object pointer set up above (NULL if static) + // Z_ARG3: cache entry pointer + // Z_ARG4: jvalue object on the stack + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), + Z_ARG2, Z_ARG3, Z_ARG4); + __ get_cache_and_index_at_bcp(cache, index, 1); + + __ bind(L1); + BLOCK_COMMENT("} jvmti_post_field_mod"); +} + + +void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) { + transition(vtos, vtos); + + const Register cache = Z_tmp_1; + const Register index = Z_ARG5; + const Register obj = Z_tmp_1; + const Register off = Z_tmp_2; + const Register flags = Z_R1_scratch; + const Register br_tab = Z_ARG5; + const Register bc = Z_tmp_1; + const Register oopStore_tmp1 = Z_R1_scratch; + const Register oopStore_tmp2 = Z_ARG5; + const Register oopStore_tmp3 = Z_R0_scratch; + + resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); + jvmti_post_field_mod(cache, index, is_static); + load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); + // begin of life for: + // obj, off long life range + // flags short life range, up to branch into branch table + // end of life for: + // cache, index + + const Address field(obj, off); + Label is_Byte, is_Bool, is_Int, is_Short, is_Char, + is_Long, is_Float, is_Object, is_Double; + Label is_badState8, is_badState9, is_badStateA, is_badStateB, + is_badStateC, is_badStateD, is_badStateE, is_badStateF, + is_badState; + Label branchTable, atosHandler, Done; + bool do_rewrite = !is_static && (rc == may_rewrite); + bool dont_rewrite = (is_static || (rc == may_not_rewrite)); + + assert(do_rewrite == !dont_rewrite, "Oops, code is not fit for that"); + + assert(btos == 0, "change code, btos != 0"); + +#ifdef ASSERT + const unsigned int bsize = is_static ? BTB_MINSIZE*1 : BTB_MINSIZE*4; +#else + const unsigned int bsize = is_static ? BTB_MINSIZE*1 : BTB_MINSIZE*8; +#endif + + // Calculate address of branch table entry and branch there. + { + const int bit_shift = exact_log2(bsize); // Size of each branch table entry. + const int r_bitpos = 63 - bit_shift; + const int l_bitpos = r_bitpos - ConstantPoolCacheEntry::tos_state_bits + 1; + const int n_rotate = (bit_shift-ConstantPoolCacheEntry::tos_state_shift); + __ z_larl(br_tab, branchTable); + __ rotate_then_insert(flags, flags, l_bitpos, r_bitpos, n_rotate, true); + __ z_bc(Assembler::bcondAlways, 0, flags, br_tab); + } + // end of life for: + // flags, br_tab + + __ align_address(bsize); + BIND(branchTable); + + // btos + BTB_BEGIN(is_Byte, bsize, "putfield_or_static:is_Byte"); + __ pop(btos); + if (!is_static) { + pop_and_check_object(obj); + } + __ z_stc(Z_tos, field); + if (do_rewrite) { + patch_bytecode(Bytecodes::_fast_bputfield, bc, Z_ARG5, true, byte_no); + } + __ z_bru(Done); + BTB_END( is_Byte, bsize, "putfield_or_static:is_Byte"); + + // ztos + BTB_BEGIN(is_Bool, bsize, "putfield_or_static:is_Bool"); + __ pop(ztos); + if (do_rewrite) { + pop_and_check_object(obj); + } + __ z_nilf(Z_tos, 0x1); + __ z_stc(Z_tos, field); + if (!is_static) { + patch_bytecode(Bytecodes::_fast_zputfield, bc, Z_ARG5, true, byte_no); + } + __ z_bru(Done); + BTB_END(is_Bool, bsize, "putfield_or_static:is_Bool"); + + // ctos + BTB_BEGIN(is_Char, bsize, "putfield_or_static:is_Char"); + __ pop(ctos); + if (!is_static) { + pop_and_check_object(obj); + } + __ z_sth(Z_tos, field); + if (do_rewrite) { + patch_bytecode(Bytecodes::_fast_cputfield, bc, Z_ARG5, true, byte_no); + } + __ z_bru(Done); + BTB_END( is_Char, bsize, "putfield_or_static:is_Char"); + + // stos + BTB_BEGIN(is_Short, bsize, "putfield_or_static:is_Short"); + __ pop(stos); + if (!is_static) { + pop_and_check_object(obj); + } + __ z_sth(Z_tos, field); + if (do_rewrite) { + patch_bytecode(Bytecodes::_fast_sputfield, bc, Z_ARG5, true, byte_no); + } + __ z_bru(Done); + BTB_END( is_Short, bsize, "putfield_or_static:is_Short"); + + // itos + BTB_BEGIN(is_Int, bsize, "putfield_or_static:is_Int"); + __ pop(itos); + if (!is_static) { + pop_and_check_object(obj); + } + __ reg2mem_opt(Z_tos, field, false); + if (do_rewrite) { + patch_bytecode(Bytecodes::_fast_iputfield, bc, Z_ARG5, true, byte_no); + } + __ z_bru(Done); + BTB_END( is_Int, bsize, "putfield_or_static:is_Int"); + + // ltos + BTB_BEGIN(is_Long, bsize, "putfield_or_static:is_Long"); + __ pop(ltos); + if (!is_static) { + pop_and_check_object(obj); + } + __ reg2mem_opt(Z_tos, field); + if (do_rewrite) { + patch_bytecode(Bytecodes::_fast_lputfield, bc, Z_ARG5, true, byte_no); + } + __ z_bru(Done); + BTB_END( is_Long, bsize, "putfield_or_static:is_Long"); + + // ftos + BTB_BEGIN(is_Float, bsize, "putfield_or_static:is_Float"); + __ pop(ftos); + if (!is_static) { + pop_and_check_object(obj); + } + __ freg2mem_opt(Z_ftos, field, false); + if (do_rewrite) { + patch_bytecode(Bytecodes::_fast_fputfield, bc, Z_ARG5, true, byte_no); + } + __ z_bru(Done); + BTB_END( is_Float, bsize, "putfield_or_static:is_Float"); + + // dtos + BTB_BEGIN(is_Double, bsize, "putfield_or_static:is_Double"); + __ pop(dtos); + if (!is_static) { + pop_and_check_object(obj); + } + __ freg2mem_opt(Z_ftos, field); + if (do_rewrite) { + patch_bytecode(Bytecodes::_fast_dputfield, bc, Z_ARG5, true, byte_no); + } + __ z_bru(Done); + BTB_END( is_Double, bsize, "putfield_or_static:is_Double"); + + // atos + BTB_BEGIN(is_Object, bsize, "putfield_or_static:is_Object"); + __ z_bru(atosHandler); + BTB_END( is_Object, bsize, "putfield_or_static:is_Object"); + + // Bad state detection comes at no extra runtime cost. + BTB_BEGIN(is_badState8, bsize, "putfield_or_static:is_badState8"); + __ z_illtrap(); + __ z_bru(is_badState); + BTB_END( is_badState8, bsize, "putfield_or_static:is_badState8"); + BTB_BEGIN(is_badState9, bsize, "putfield_or_static:is_badState9"); + __ z_illtrap(); + __ z_bru(is_badState); + BTB_END( is_badState9, bsize, "putfield_or_static:is_badState9"); + BTB_BEGIN(is_badStateA, bsize, "putfield_or_static:is_badStateA"); + __ z_illtrap(); + __ z_bru(is_badState); + BTB_END( is_badStateA, bsize, "putfield_or_static:is_badStateA"); + BTB_BEGIN(is_badStateB, bsize, "putfield_or_static:is_badStateB"); + __ z_illtrap(); + __ z_bru(is_badState); + BTB_END( is_badStateB, bsize, "putfield_or_static:is_badStateB"); + BTB_BEGIN(is_badStateC, bsize, "putfield_or_static:is_badStateC"); + __ z_illtrap(); + __ z_bru(is_badState); + BTB_END( is_badStateC, bsize, "putfield_or_static:is_badStateC"); + BTB_BEGIN(is_badStateD, bsize, "putfield_or_static:is_badStateD"); + __ z_illtrap(); + __ z_bru(is_badState); + BTB_END( is_badStateD, bsize, "putfield_or_static:is_badStateD"); + BTB_BEGIN(is_badStateE, bsize, "putfield_or_static:is_badStateE"); + __ z_illtrap(); + __ z_bru(is_badState); + BTB_END( is_badStateE, bsize, "putfield_or_static:is_badStateE"); + BTB_BEGIN(is_badStateF, bsize, "putfield_or_static:is_badStateF"); + __ z_illtrap(); + __ z_bru(is_badState); + BTB_END( is_badStateF, bsize, "putfield_or_static:is_badStateF"); + + __ align_address(64); + BIND(is_badState); // Do this outside branch table. Needs a lot of space. + { + unsigned int b_off = __ offset(); + if (is_static) __ stop_static("Bad state in putstatic"); + else __ stop_static("Bad state in putfield"); + unsigned int e_off = __ offset(); + } + + __ align_address(64); + BIND(atosHandler); // Oops are really complicated to handle. + // There is a lot of code generated. + // Therefore: generate the handler outside of branch table. + // There is no performance penalty. The additional branch + // to here is compensated for by the fallthru to "Done". + { + unsigned int b_off = __ offset(); + __ pop(atos); + if (!is_static) { + pop_and_check_object(obj); + } + // Store into the field + do_oop_store(_masm, obj, off, Z_tos, false, + oopStore_tmp1, oopStore_tmp2, oopStore_tmp3, _bs->kind(), false); + if (do_rewrite) { + patch_bytecode(Bytecodes::_fast_aputfield, bc, Z_ARG5, true, byte_no); + } + // __ z_bru(Done); // fallthru + unsigned int e_off = __ offset(); + } + + BIND(Done); + + // Check for volatile store. + Label notVolatile; + + __ testbit(Z_ARG4, ConstantPoolCacheEntry::is_volatile_shift); + __ z_brz(notVolatile); + __ z_fence(); + + BIND(notVolatile); +} + +void TemplateTable::putfield(int byte_no) { + BLOCK_COMMENT("putfield {"); + putfield_or_static(byte_no, false); + BLOCK_COMMENT("} putfield"); +} + +void TemplateTable::nofast_putfield(int byte_no) { + putfield_or_static(byte_no, false, may_not_rewrite); +} + +void TemplateTable::putstatic(int byte_no) { + BLOCK_COMMENT("putstatic {"); + putfield_or_static(byte_no, true); + BLOCK_COMMENT("} putstatic"); +} + +// Push the tos value back to the stack. +// gc will find oops there and update. +void TemplateTable::jvmti_post_fast_field_mod() { + + if (!JvmtiExport::can_post_field_modification()) { + return; + } + + // Check to see if a field modification watch has been set before + // we take the time to call into the VM. + Label exit; + + BLOCK_COMMENT("jvmti_post_fast_field_mod {"); + + __ load_absolute_address(Z_R1_scratch, + (address) JvmtiExport::get_field_modification_count_addr()); + __ load_and_test_int(Z_R0_scratch, Address(Z_R1_scratch)); + __ z_brz(exit); + + Register obj = Z_tmp_1; + + __ pop_ptr(obj); // Copy the object pointer from tos. + __ verify_oop(obj); + __ push_ptr(obj); // Put the object pointer back on tos. + + // Save tos values before call_VM() clobbers them. Since we have + // to do it for every data type, we use the saved values as the + // jvalue object. + switch (bytecode()) { // Load values into the jvalue object. + case Bytecodes::_fast_aputfield: + __ push_ptr(Z_tos); + break; + case Bytecodes::_fast_bputfield: + case Bytecodes::_fast_zputfield: + case Bytecodes::_fast_sputfield: + case Bytecodes::_fast_cputfield: + case Bytecodes::_fast_iputfield: + __ push_i(Z_tos); + break; + case Bytecodes::_fast_dputfield: + __ push_d(); + break; + case Bytecodes::_fast_fputfield: + __ push_f(); + break; + case Bytecodes::_fast_lputfield: + __ push_l(Z_tos); + break; + + default: + ShouldNotReachHere(); + } + + // jvalue on the stack + __ load_address(Z_ARG4, Address(Z_esp, Interpreter::stackElementSize)); + // Access constant pool cache entry. + __ get_cache_entry_pointer_at_bcp(Z_ARG3, Z_tos, 1); + __ verify_oop(obj); + + // obj : object pointer copied above + // Z_ARG3: cache entry pointer + // Z_ARG4: jvalue object on the stack + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), + obj, Z_ARG3, Z_ARG4); + + switch (bytecode()) { // Restore tos values. + case Bytecodes::_fast_aputfield: + __ pop_ptr(Z_tos); + break; + case Bytecodes::_fast_bputfield: + case Bytecodes::_fast_zputfield: + case Bytecodes::_fast_sputfield: + case Bytecodes::_fast_cputfield: + case Bytecodes::_fast_iputfield: + __ pop_i(Z_tos); + break; + case Bytecodes::_fast_dputfield: + __ pop_d(Z_ftos); + break; + case Bytecodes::_fast_fputfield: + __ pop_f(Z_ftos); + break; + case Bytecodes::_fast_lputfield: + __ pop_l(Z_tos); + break; + } + + __ bind(exit); + BLOCK_COMMENT("} jvmti_post_fast_field_mod"); +} + +void TemplateTable::fast_storefield(TosState state) { + transition(state, vtos); + + ByteSize base = ConstantPoolCache::base_offset(); + jvmti_post_fast_field_mod(); + + // Access constant pool cache. + Register cache = Z_tmp_1; + Register index = Z_tmp_2; + Register flags = Z_ARG5; + + // Index comes in bytes, don't shift afterwards! + __ get_cache_and_index_at_bcp(cache, index, 1); + + // Test for volatile. + assert(!flags->is_volatile(), "do_oop_store could perform leaf RT call"); + __ z_lg(flags, Address(cache, index, base + ConstantPoolCacheEntry::flags_offset())); + + // Replace index with field offset from cache entry. + Register field_offset = index; + __ z_lg(field_offset, Address(cache, index, base + ConstantPoolCacheEntry::f2_offset())); + + // Get object from stack. + Register obj = cache; + + pop_and_check_object(obj); + + // field address + const Address field(obj, field_offset); + + // access field + switch (bytecode()) { + case Bytecodes::_fast_aputfield: + do_oop_store(_masm, obj, field_offset, Z_tos, false, + Z_ARG2, Z_ARG3, Z_ARG4, _bs->kind(), false); + break; + case Bytecodes::_fast_lputfield: + __ reg2mem_opt(Z_tos, field); + break; + case Bytecodes::_fast_iputfield: + __ reg2mem_opt(Z_tos, field, false); + break; + case Bytecodes::_fast_zputfield: + __ z_nilf(Z_tos, 0x1); + // fall through to bputfield + case Bytecodes::_fast_bputfield: + __ z_stc(Z_tos, field); + break; + case Bytecodes::_fast_sputfield: + // fall through + case Bytecodes::_fast_cputfield: + __ z_sth(Z_tos, field); + break; + case Bytecodes::_fast_fputfield: + __ freg2mem_opt(Z_ftos, field, false); + break; + case Bytecodes::_fast_dputfield: + __ freg2mem_opt(Z_ftos, field); + break; + default: + ShouldNotReachHere(); + } + + // Check for volatile store. + Label notVolatile; + + __ testbit(flags, ConstantPoolCacheEntry::is_volatile_shift); + __ z_brz(notVolatile); + __ z_fence(); + + __ bind(notVolatile); +} + +void TemplateTable::fast_accessfield(TosState state) { + transition(atos, state); + + Register obj = Z_tos; + + // Do the JVMTI work here to avoid disturbing the register state below + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we + // take the time to call into the VM. + Label cont; + + __ load_absolute_address(Z_R1_scratch, + (address)JvmtiExport::get_field_access_count_addr()); + __ load_and_test_int(Z_R0_scratch, Address(Z_R1_scratch)); + __ z_brz(cont); + + // Access constant pool cache entry. + + __ get_cache_entry_pointer_at_bcp(Z_ARG3, Z_tmp_1, 1); + __ verify_oop(obj); + __ push_ptr(obj); // Save object pointer before call_VM() clobbers it. + __ z_lgr(Z_ARG2, obj); + + // Z_ARG2: object pointer copied above + // Z_ARG3: cache entry pointer + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), + Z_ARG2, Z_ARG3); + __ pop_ptr(obj); // Restore object pointer. + + __ bind(cont); + } + + // Access constant pool cache. + Register cache = Z_tmp_1; + Register index = Z_tmp_2; + + // Index comes in bytes, don't shift afterwards! + __ get_cache_and_index_at_bcp(cache, index, 1); + // Replace index with field offset from cache entry. + __ mem2reg_opt(index, + Address(cache, index, + ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); + + __ verify_oop(obj); + __ null_check(obj); + + Address field(obj, index); + + // access field + switch (bytecode()) { + case Bytecodes::_fast_agetfield: + __ load_heap_oop(Z_tos, field); + __ verify_oop(Z_tos); + return; + case Bytecodes::_fast_lgetfield: + __ mem2reg_opt(Z_tos, field); + return; + case Bytecodes::_fast_igetfield: + __ mem2reg_opt(Z_tos, field, false); + return; + case Bytecodes::_fast_bgetfield: + __ z_lb(Z_tos, field); + return; + case Bytecodes::_fast_sgetfield: + __ z_lh(Z_tos, field); + return; + case Bytecodes::_fast_cgetfield: + __ z_llgh(Z_tos, field); // Load into 64 bits, works on all CPUs. + return; + case Bytecodes::_fast_fgetfield: + __ mem2freg_opt(Z_ftos, field, false); + return; + case Bytecodes::_fast_dgetfield: + __ mem2freg_opt(Z_ftos, field); + return; + default: + ShouldNotReachHere(); + } +} + +void TemplateTable::fast_xaccess(TosState state) { + transition(vtos, state); + + Register receiver = Z_tos; + // Get receiver. + __ mem2reg_opt(Z_tos, aaddress(0)); + + // Access constant pool cache. + Register cache = Z_tmp_1; + Register index = Z_tmp_2; + + // Index comes in bytes, don't shift afterwards! + __ get_cache_and_index_at_bcp(cache, index, 2); + // Replace index with field offset from cache entry. + __ mem2reg_opt(index, + Address(cache, index, + ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); + + // Make sure exception is reported in correct bcp range (getfield is + // next instruction). + __ add2reg(Z_bcp, 1); + __ null_check(receiver); + switch (state) { + case itos: + __ mem2reg_opt(Z_tos, Address(receiver, index), false); + break; + case atos: + __ load_heap_oop(Z_tos, Address(receiver, index)); + __ verify_oop(Z_tos); + break; + case ftos: + __ mem2freg_opt(Z_ftos, Address(receiver, index)); + break; + default: + ShouldNotReachHere(); + } + + // Reset bcp to original position. + __ add2reg(Z_bcp, -1); +} + +//----------------------------------------------------------------------------- +// Calls + +void TemplateTable::prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) + Register index, // itable index, MethodType, etc. + Register recv, // If caller wants to see it. + Register flags) { // If caller wants to test it. + // Determine flags. + const Bytecodes::Code code = bytecode(); + const bool is_invokeinterface = code == Bytecodes::_invokeinterface; + const bool is_invokedynamic = code == Bytecodes::_invokedynamic; + const bool is_invokehandle = code == Bytecodes::_invokehandle; + const bool is_invokevirtual = code == Bytecodes::_invokevirtual; + const bool is_invokespecial = code == Bytecodes::_invokespecial; + const bool load_receiver = (recv != noreg); + assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), ""); + + // Setup registers & access constant pool cache. + if (recv == noreg) { recv = Z_ARG1; } + if (flags == noreg) { flags = Z_ARG2; } + assert_different_registers(method, Z_R14, index, recv, flags); + + BLOCK_COMMENT("prepare_invoke {"); + + load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); + + // Maybe push appendix to arguments. + if (is_invokedynamic || is_invokehandle) { + Label L_no_push; + Register resolved_reference = Z_R1_scratch; + __ testbit(flags, ConstantPoolCacheEntry::has_appendix_shift); + __ z_bfalse(L_no_push); + // Push the appendix as a trailing parameter. + // This must be done before we get the receiver, + // since the parameter_size includes it. + __ load_resolved_reference_at_index(resolved_reference, index); + __ verify_oop(resolved_reference); + __ push_ptr(resolved_reference); // Push appendix (MethodType, CallSite, etc.). + __ bind(L_no_push); + } + + // Load receiver if needed (after appendix is pushed so parameter size is correct). + if (load_receiver) { + assert(!is_invokedynamic, ""); + // recv := int2long(flags & ConstantPoolCacheEntry::parameter_size_mask) << 3 + // Flags is zero-extended int2long when loaded during load_invoke_cp_cache_entry(). + // Only the least significant byte (psize) of flags is used. + { + const unsigned int logSES = Interpreter::logStackElementSize; + const int bit_shift = logSES; + const int r_bitpos = 63 - bit_shift; + const int l_bitpos = r_bitpos - ConstantPoolCacheEntry::parameter_size_bits + 1; + const int n_rotate = bit_shift; + assert(ConstantPoolCacheEntry::parameter_size_mask == 255, "adapt bitpositions"); + __ rotate_then_insert(recv, flags, l_bitpos, r_bitpos, n_rotate, true); + } + // Recv now contains #arguments * StackElementSize. + + Address recv_addr(Z_esp, recv); + __ z_lg(recv, recv_addr); + __ verify_oop(recv); + } + + // Compute return type. + // ret_type is used by callers (invokespecial, invokestatic) at least. + Register ret_type = Z_R1_scratch; + assert_different_registers(ret_type, method); + + const address table_addr = (address)Interpreter::invoke_return_entry_table_for(code); + __ load_absolute_address(Z_R14, table_addr); + + { + const int bit_shift = LogBytesPerWord; // Size of each table entry. + const int r_bitpos = 63 - bit_shift; + const int l_bitpos = r_bitpos - ConstantPoolCacheEntry::tos_state_bits + 1; + const int n_rotate = bit_shift-ConstantPoolCacheEntry::tos_state_shift; + __ rotate_then_insert(ret_type, flags, l_bitpos, r_bitpos, n_rotate, true); + // Make sure we don't need to mask flags for tos_state after the above shift. + ConstantPoolCacheEntry::verify_tos_state_shift(); + } + + __ z_lg(Z_R14, Address(Z_R14, ret_type)); // Load return address. + BLOCK_COMMENT("} prepare_invoke"); +} + + +void TemplateTable::invokevirtual_helper(Register index, + Register recv, + Register flags) { + // Uses temporary registers Z_tmp_2, Z_ARG4. + assert_different_registers(index, recv, Z_tmp_2, Z_ARG4); + + // Test for an invoke of a final method. + Label notFinal; + + BLOCK_COMMENT("invokevirtual_helper {"); + + __ testbit(flags, ConstantPoolCacheEntry::is_vfinal_shift); + __ z_brz(notFinal); + + const Register method = index; // Method must be Z_ARG3. + assert(method == Z_ARG3, "method must be second argument for interpreter calling convention"); + + // Do the call - the index is actually the method to call. + // That is, f2 is a vtable index if !is_vfinal, else f2 is a method. + + // It's final, need a null check here! + __ null_check(recv); + + // Profile this call. + __ profile_final_call(Z_tmp_2); + __ profile_arguments_type(Z_tmp_2, method, Z_ARG5, true); // Argument type profiling. + __ jump_from_interpreted(method, Z_tmp_2); + + __ bind(notFinal); + + // Get receiver klass. + __ null_check(recv, Z_R0_scratch, oopDesc::klass_offset_in_bytes()); + __ load_klass(Z_tmp_2, recv); + + // Profile this call. + __ profile_virtual_call(Z_tmp_2, Z_ARG4, Z_ARG5); + + // Get target method & entry point. + __ z_sllg(index, index, exact_log2(vtableEntry::size_in_bytes())); + __ mem2reg_opt(method, + Address(Z_tmp_2, index, + InstanceKlass::vtable_start_offset() + in_ByteSize(vtableEntry::method_offset_in_bytes()))); + __ profile_arguments_type(Z_ARG4, method, Z_ARG5, true); + __ jump_from_interpreted(method, Z_ARG4); + BLOCK_COMMENT("} invokevirtual_helper"); +} + +void TemplateTable::invokevirtual(int byte_no) { + transition(vtos, vtos); + + assert(byte_no == f2_byte, "use this argument"); + prepare_invoke(byte_no, + Z_ARG3, // method or vtable index + noreg, // unused itable index + Z_ARG1, // recv + Z_ARG2); // flags + + // Z_ARG3 : index + // Z_ARG1 : receiver + // Z_ARG2 : flags + invokevirtual_helper(Z_ARG3, Z_ARG1, Z_ARG2); +} + +void TemplateTable::invokespecial(int byte_no) { + transition(vtos, vtos); + + assert(byte_no == f1_byte, "use this argument"); + Register Rmethod = Z_tmp_2; + prepare_invoke(byte_no, Rmethod, noreg, // Get f1 method. + Z_ARG3); // Get receiver also for null check. + __ verify_oop(Z_ARG3); + __ null_check(Z_ARG3); + // Do the call. + __ profile_call(Z_ARG2); + __ profile_arguments_type(Z_ARG2, Rmethod, Z_ARG5, false); + __ jump_from_interpreted(Rmethod, Z_R1_scratch); +} + +void TemplateTable::invokestatic(int byte_no) { + transition(vtos, vtos); + + assert(byte_no == f1_byte, "use this argument"); + Register Rmethod = Z_tmp_2; + prepare_invoke(byte_no, Rmethod); // Get f1 method. + // Do the call. + __ profile_call(Z_ARG2); + __ profile_arguments_type(Z_ARG2, Rmethod, Z_ARG5, false); + __ jump_from_interpreted(Rmethod, Z_R1_scratch); +} + +// Outdated feature, and we don't support it. +void TemplateTable::fast_invokevfinal(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f2_byte, "use this argument"); + __ stop("fast_invokevfinal not used on linuxs390x"); +} + +void TemplateTable::invokeinterface(int byte_no) { + transition(vtos, vtos); + + assert(byte_no == f1_byte, "use this argument"); + Register interface = Z_tos; + Register index = Z_ARG3; + Register receiver = Z_tmp_1; + Register flags = Z_ARG5; + + BLOCK_COMMENT("invokeinterface {"); + + // Destroys Z_ARG1 and Z_ARG2, thus use Z_ARG4 and copy afterwards. + prepare_invoke(byte_no, Z_ARG4, index, // Get f1 klassOop, f2 itable index. + receiver, flags); + + // Z_R14 (== Z_bytecode) : return entry + + __ z_lgr(interface, Z_ARG4); + + // Special case of invokeinterface called for virtual method of + // java.lang.Object. See cpCacheOop.cpp for details. + // This code isn't produced by javac, but could be produced by + // another compliant java compiler. + Label notMethod; + __ testbit(flags, ConstantPoolCacheEntry::is_forced_virtual_shift); + __ z_brz(notMethod); + invokevirtual_helper(index, receiver, flags); + __ bind(notMethod); + + // Get receiver klass into klass - also a null check. + Register klass = flags; + + __ restore_locals(); + __ load_klass(klass, receiver); + + // Profile this call. + __ profile_virtual_call(klass, Z_ARG2/*mdp*/, Z_ARG4/*scratch*/); + + NearLabel no_such_interface, no_such_method; + Register method = Z_tmp_2; + + // TK 2010-08-24: save the index to Z_ARG4. needed in case of an error + // in throw_AbstractMethodErrorByTemplateTable + __ z_lgr(Z_ARG4, index); + // TK 2011-03-24: copy also klass because it could be changed in + // lookup_interface_method + __ z_lgr(Z_ARG2, klass); + __ lookup_interface_method(// inputs: rec. class, interface, itable index + klass, interface, index, + // outputs: method, scan temp. reg + method, Z_tmp_2, Z_R1_scratch, + no_such_interface); + + // Check for abstract method error. + // Note: This should be done more efficiently via a throw_abstract_method_error + // interpreter entry point and a conditional jump to it in case of a null + // method. + __ compareU64_and_branch(method, (intptr_t) 0, + Assembler::bcondZero, no_such_method); + + __ profile_arguments_type(Z_ARG3, method, Z_ARG5, true); + + // Do the call. + __ jump_from_interpreted(method, Z_ARG5); + __ should_not_reach_here(); + + // exception handling code follows... + // Note: Must restore interpreter registers to canonical + // state for exception handling to work correctly! + + __ bind(no_such_method); + + // Throw exception. + __ restore_bcp(); // Bcp must be correct for exception handler (was destroyed). + __ restore_locals(); // Make sure locals pointer is correct as well (was destroyed). + // TK 2010-08-24: Call throw_AbstractMethodErrorByTemplateTable now with the + // relevant information for generating a better error message + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_AbstractMethodError), + Z_ARG2, interface, Z_ARG4); + // The call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + + __ bind(no_such_interface); + + // Throw exception. + __ restore_bcp(); // Bcp must be correct for exception handler (was destroyed). + __ restore_locals(); // Make sure locals pointer is correct as well (was destroyed). + // TK 2010-08-24: Call throw_IncompatibleClassChangeErrorByTemplateTable now with the + // relevant information for generating a better error message + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_IncompatibleClassChangeError), + Z_ARG2, interface); + // The call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + + BLOCK_COMMENT("} invokeinterface"); + return; +} + +void TemplateTable::invokehandle(int byte_no) { + transition(vtos, vtos); + + const Register method = Z_tmp_2; + const Register recv = Z_ARG5; + const Register mtype = Z_tmp_1; + prepare_invoke(byte_no, + method, mtype, // Get f2 method, f1 MethodType. + recv); + __ verify_method_ptr(method); + __ verify_oop(recv); + __ null_check(recv); + + // Note: Mtype is already pushed (if necessary) by prepare_invoke. + + // FIXME: profile the LambdaForm also. + __ profile_final_call(Z_ARG2); + __ profile_arguments_type(Z_ARG3, method, Z_ARG5, true); + + __ jump_from_interpreted(method, Z_ARG3); +} + +void TemplateTable::invokedynamic(int byte_no) { + transition(vtos, vtos); + + const Register Rmethod = Z_tmp_2; + const Register Rcallsite = Z_tmp_1; + + prepare_invoke(byte_no, Rmethod, Rcallsite); + + // Rmethod: CallSite object (from f1) + // Rcallsite: MH.linkToCallSite method (from f2) + + // Note: Callsite is already pushed by prepare_invoke. + + // TODO: should make a type profile for any invokedynamic that takes a ref argument. + // Profile this call. + __ profile_call(Z_ARG2); + __ profile_arguments_type(Z_ARG2, Rmethod, Z_ARG5, false); + __ jump_from_interpreted(Rmethod, Z_ARG2); +} + +//----------------------------------------------------------------------------- +// Allocation + +// Original comment on "allow_shared_alloc": +// Always go the slow path. +// + Eliminated optimization within the template-based interpreter: +// If an allocation is done within the interpreter without using +// tlabs, the interpreter tries to do the allocation directly +// on the heap. +// + That means the profiling hooks are not considered and allocations +// get lost for the profiling framework. +// + However, we do not think that this optimization is really needed, +// so we always go now the slow path through the VM in this case -- +// spec jbb2005 shows no measurable performance degradation. +void TemplateTable::_new() { + transition(vtos, atos); + address prev_instr_address = NULL; + Register tags = Z_tmp_1; + Register RallocatedObject = Z_tos; + Register cpool = Z_ARG2; + Register tmp = Z_ARG3; // RobjectFields==tmp and Rsize==offset must be a register pair. + Register offset = Z_ARG4; + Label slow_case; + Label done; + Label initialize_header; + Label initialize_object; // Including clearing the fields. + Label allocate_shared; + + BLOCK_COMMENT("TemplateTable::_new {"); + __ get_2_byte_integer_at_bcp(offset/*dest*/, 1, InterpreterMacroAssembler::Unsigned); + __ get_cpool_and_tags(cpool, tags); + // Make sure the class we're about to instantiate has been resolved. + // This is done before loading InstanceKlass to be consistent with the order + // how Constant Pool is updated (see ConstantPool::klass_at_put). + const int tags_offset = Array::base_offset_in_bytes(); + __ load_address(tmp, Address(tags, offset, tags_offset)); + __ z_cli(0, tmp, JVM_CONSTANT_Class); + __ z_brne(slow_case); + + __ z_sllg(offset, offset, LogBytesPerWord); // Convert to to offset. + // Get InstanceKlass. + Register iklass = cpool; + __ z_lg(iklass, Address(cpool, offset, sizeof(ConstantPool))); + + // Make sure klass is initialized & doesn't have finalizer. + // Make sure klass is fully initialized. + const int state_offset = in_bytes(InstanceKlass::init_state_offset()); + if (Immediate::is_uimm12(state_offset)) { + __ z_cli(state_offset, iklass, InstanceKlass::fully_initialized); + } else { + __ z_cliy(state_offset, iklass, InstanceKlass::fully_initialized); + } + __ z_brne(slow_case); + + // Get instance_size in InstanceKlass (scaled to a count of bytes). + Register Rsize = offset; + const int mask = 1 << Klass::_lh_instance_slow_path_bit; + __ z_llgf(Rsize, Address(iklass, Klass::layout_helper_offset())); + __ z_tmll(Rsize, mask); + __ z_btrue(slow_case); + + // Allocate the instance + // 1) Try to allocate in the TLAB. + // 2) If fail and the object is large allocate in the shared Eden. + // 3) If the above fails (or is not applicable), go to a slow case + // (creates a new TLAB, etc.). + + // Always go the slow path. See comment above this template. + const bool allow_shared_alloc = false; + + if (UseTLAB) { + Register RoldTopValue = RallocatedObject; + Register RnewTopValue = tmp; + __ z_lg(RoldTopValue, Address(Z_thread, JavaThread::tlab_top_offset())); + __ load_address(RnewTopValue, Address(RoldTopValue, Rsize)); + __ z_cg(RnewTopValue, Address(Z_thread, JavaThread::tlab_end_offset())); + __ z_brh(allow_shared_alloc ? allocate_shared : slow_case); + __ z_stg(RnewTopValue, Address(Z_thread, JavaThread::tlab_top_offset())); + if (ZeroTLAB) { + // The fields have been already cleared. + __ z_bru(initialize_header); + } else { + // Initialize both the header and fields. + if (allow_shared_alloc) { + __ z_bru(initialize_object); + } else { + // Fallthrough to initialize_object, but assert that it is on fall through path. + prev_instr_address = __ pc(); + } + } + } + + if (allow_shared_alloc) { + // Allocation in shared Eden not implemented, because sapjvm allocation trace does not allow it. + Unimplemented(); + } + + if (UseTLAB) { + Register RobjectFields = tmp; + Register Rzero = Z_R1_scratch; + + assert(ZeroTLAB || prev_instr_address == __ pc(), + "must not omit jump to initialize_object above, as it is not on the fall through path"); + __ clear_reg(Rzero, true /*whole reg*/, false); // Load 0L into Rzero. Don't set CC. + + // The object is initialized before the header. If the object size is + // zero, go directly to the header initialization. + __ bind(initialize_object); + __ z_aghi(Rsize, (int)-sizeof(oopDesc)); // Subtract header size, set CC. + __ z_bre(initialize_header); // Jump if size of fields is zero. + + // Initialize object fields. + // See documentation for MVCLE instruction!!! + assert(RobjectFields->encoding() % 2 == 0, "RobjectFields must be an even register"); + assert(Rsize->encoding() == (RobjectFields->encoding()+1), + "RobjectFields and Rsize must be a register pair"); + assert(Rzero->encoding() % 2 == 1, "Rzero must be an odd register"); + + // Set Rzero to 0 and use it as src length, then mvcle will copy nothing + // and fill the object with the padding value 0. + __ add2reg(RobjectFields, sizeof(oopDesc), RallocatedObject); + __ move_long_ext(RobjectFields, as_Register(Rzero->encoding() - 1), 0); + + // Initialize object header only. + __ bind(initialize_header); + if (UseBiasedLocking) { + Register prototype = RobjectFields; + __ z_lg(prototype, Address(iklass, Klass::prototype_header_offset())); + __ z_stg(prototype, Address(RallocatedObject, oopDesc::mark_offset_in_bytes())); + } else { + __ store_const(Address(RallocatedObject, oopDesc::mark_offset_in_bytes()), + (long)markOopDesc::prototype()); + } + + __ store_klass_gap(Rzero, RallocatedObject); // Zero klass gap for compressed oops. + __ store_klass(iklass, RallocatedObject); // Store klass last. + + { + SkipIfEqual skip(_masm, &DTraceAllocProbes, false, Z_ARG5 /*scratch*/); + // Trigger dtrace event for fastpath. + __ push(atos); // Save the return value. + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), RallocatedObject); + __ pop(atos); // Restore the return value. + } + __ z_bru(done); + } + + // slow case + __ bind(slow_case); + __ get_constant_pool(Z_ARG2); + __ get_2_byte_integer_at_bcp(Z_ARG3/*dest*/, 1, InterpreterMacroAssembler::Unsigned); + call_VM(Z_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), Z_ARG2, Z_ARG3); + __ verify_oop(Z_tos); + + // continue + __ bind(done); + + BLOCK_COMMENT("} TemplateTable::_new"); +} + +void TemplateTable::newarray() { + transition(itos, atos); + + // Call runtime. + __ z_llgc(Z_ARG2, at_bcp(1)); // type + // size in Z_tos + call_VM(Z_RET, + CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), + Z_ARG2, Z_tos); +} + +void TemplateTable::anewarray() { + transition(itos, atos); + __ get_2_byte_integer_at_bcp(Z_ARG3, 1, InterpreterMacroAssembler::Unsigned); + __ get_constant_pool(Z_ARG2); + __ z_llgfr(Z_ARG4, Z_tos); + call_VM(Z_tos, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), + Z_ARG2, Z_ARG3, Z_ARG4); +} + +void TemplateTable::arraylength() { + transition(atos, itos); + + int offset = arrayOopDesc::length_offset_in_bytes(); + + __ null_check(Z_tos, Z_R0_scratch, offset); + __ mem2reg_opt(Z_tos, Address(Z_tos, offset), false); +} + +void TemplateTable::checkcast() { + transition(atos, atos); + + NearLabel done, is_null, ok_is_subtype, quicked, resolved; + + BLOCK_COMMENT("checkcast {"); + // If object is NULL, we are almost done. + __ compareU64_and_branch(Z_tos, (intptr_t) 0, Assembler::bcondZero, is_null); + + // Get cpool & tags index. + Register cpool = Z_tmp_1; + Register tags = Z_tmp_2; + Register index = Z_ARG5; + + __ get_cpool_and_tags(cpool, tags); + __ get_2_byte_integer_at_bcp(index, 1, InterpreterMacroAssembler::Unsigned); + // See if bytecode has already been quicked. + // Note: For CLI, we would have to add the index to the tags pointer first, + // thus load and compare in a "classic" manner. + __ z_llgc(Z_R0_scratch, + Address(tags, index, Array::base_offset_in_bytes())); + __ compareU64_and_branch(Z_R0_scratch, JVM_CONSTANT_Class, + Assembler::bcondEqual, quicked); + + __ push(atos); // Save receiver for result, and for GC. + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); + __ get_vm_result_2(Z_tos); + + Register receiver = Z_ARG4; + Register klass = Z_tos; + Register subklass = Z_ARG5; + + __ pop_ptr(receiver); // restore receiver + __ z_bru(resolved); + + // Get superklass in klass and subklass in subklass. + __ bind(quicked); + + __ z_lgr(Z_ARG4, Z_tos); // Save receiver. + __ z_sllg(index, index, LogBytesPerWord); // index2bytes for addressing + __ mem2reg_opt(klass, Address(cpool, index, sizeof(ConstantPool))); + + __ bind(resolved); + + __ load_klass(subklass, receiver); + + // Generate subtype check. Object in receiver. + // Superklass in klass. Subklass in subklass. + __ gen_subtype_check(subklass, klass, Z_ARG3, Z_tmp_1, ok_is_subtype); + + // Come here on failure. + __ push_ptr(receiver); + // Object is at TOS, target klass oop expected in rax by convention. + __ z_brul((address) Interpreter::_throw_ClassCastException_entry); + + // Come here on success. + __ bind(ok_is_subtype); + + __ z_lgr(Z_tos, receiver); // Restore object. + + // Collect counts on whether this test sees NULLs a lot or not. + if (ProfileInterpreter) { + __ z_bru(done); + __ bind(is_null); + __ profile_null_seen(Z_tmp_1); + } else { + __ bind(is_null); // Same as 'done'. + } + + __ bind(done); + BLOCK_COMMENT("} checkcast"); +} + +void TemplateTable::instanceof() { + transition(atos, itos); + + NearLabel done, is_null, ok_is_subtype, quicked, resolved; + + BLOCK_COMMENT("instanceof {"); + // If object is NULL, we are almost done. + __ compareU64_and_branch(Z_tos, (intptr_t) 0, Assembler::bcondZero, is_null); + + // Get cpool & tags index. + Register cpool = Z_tmp_1; + Register tags = Z_tmp_2; + Register index = Z_ARG5; + + __ get_cpool_and_tags(cpool, tags); + __ get_2_byte_integer_at_bcp(index, 1, InterpreterMacroAssembler::Unsigned); + // See if bytecode has already been quicked. + // Note: For CLI, we would have to add the index to the tags pointer first, + // thus load and compare in a "classic" manner. + __ z_llgc(Z_R0_scratch, + Address(tags, index, Array::base_offset_in_bytes())); + __ compareU64_and_branch(Z_R0_scratch, JVM_CONSTANT_Class, Assembler::bcondEqual, quicked); + + __ push(atos); // Save receiver for result, and for GC. + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); + __ get_vm_result_2(Z_tos); + + Register receiver = Z_tmp_2; + Register klass = Z_tos; + Register subklass = Z_tmp_2; + + __ pop_ptr(receiver); // Restore receiver. + __ verify_oop(receiver); + __ load_klass(subklass, subklass); + __ z_bru(resolved); + + // Get superklass in klass and subklass in subklass. + __ bind(quicked); + + __ load_klass(subklass, Z_tos); + __ z_sllg(index, index, LogBytesPerWord); // index2bytes for addressing + __ mem2reg_opt(klass, + Address(cpool, index, sizeof(ConstantPool))); + + __ bind(resolved); + + // Generate subtype check. + // Superklass in klass. Subklass in subklass. + __ gen_subtype_check(subklass, klass, Z_ARG4, Z_ARG5, ok_is_subtype); + + // Come here on failure. + __ clear_reg(Z_tos, true, false); + __ z_bru(done); + + // Come here on success. + __ bind(ok_is_subtype); + __ load_const_optimized(Z_tos, 1); + + // Collect counts on whether this test sees NULLs a lot or not. + if (ProfileInterpreter) { + __ z_bru(done); + __ bind(is_null); + __ profile_null_seen(Z_tmp_1); + } else { + __ bind(is_null); // same as 'done' + } + + __ bind(done); + // tos = 0: obj == NULL or obj is not an instanceof the specified klass + // tos = 1: obj != NULL and obj is an instanceof the specified klass + BLOCK_COMMENT("} instanceof"); +} + +//----------------------------------------------------------------------------- +// Breakpoints +void TemplateTable::_breakpoint() { + + // Note: We get here even if we are single stepping. + // Jbug insists on setting breakpoints at every bytecode + // even if we are in single step mode. + + transition(vtos, vtos); + + // Get the unpatched byte code. + __ get_method(Z_ARG2); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at), + Z_ARG2, Z_bcp); + // Save the result to a register that is preserved over C-function calls. + __ z_lgr(Z_tmp_1, Z_RET); + + // Post the breakpoint event. + __ get_method(Z_ARG2); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), + Z_ARG2, Z_bcp); + + // Must restore the bytecode, because call_VM destroys Z_bytecode. + __ z_lgr(Z_bytecode, Z_tmp_1); + + // Complete the execution of original bytecode. + __ dispatch_only_normal(vtos); +} + + +// Exceptions + +void TemplateTable::athrow() { + transition(atos, vtos); + __ null_check(Z_tos); + __ load_absolute_address(Z_ARG2, Interpreter::throw_exception_entry()); + __ z_br(Z_ARG2); +} + +// Synchronization +// +// Note: monitorenter & exit are symmetric routines; which is reflected +// in the assembly code structure as well +// +// Stack layout: +// +// callers_sp <- Z_SP (callers_sp == Z_fp (own fp)) +// return_pc +// [rest of ABI_160] +// /slot o: free +// / ... free +// oper. | slot n+1: free <- Z_esp points to first free slot +// stack | slot n: val caches IJAVA_STATE.esp +// | ... +// \slot 0: val +// /slot m <- IJAVA_STATE.monitors = monitor block top +// | ... +// monitors| slot 2 +// | slot 1 +// \slot 0 +// /slot l <- monitor block bot +// ijava_state | ... +// | slot 2 +// \slot 0 +// <- Z_fp +void TemplateTable::monitorenter() { + transition(atos, vtos); + + BLOCK_COMMENT("monitorenter {"); + + // Check for NULL object. + __ null_check(Z_tos); + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + NearLabel allocated; + // Initialize entry pointer. + const Register Rfree_slot = Z_tmp_1; + __ clear_reg(Rfree_slot, true, false); // Points to free slot or NULL. Don't set CC. + + // Find a free slot in the monitor block from top to bot (result in Rfree_slot). + { + const Register Rcurr_monitor = Z_ARG2; + const Register Rbot = Z_ARG3; // Points to word under bottom of monitor block. + const Register Rlocked_obj = Z_ARG4; + NearLabel loop, exit, not_free; + // Starting with top-most entry. + __ get_monitors(Rcurr_monitor); // Rcur_monitor = IJAVA_STATE.monitors + __ add2reg(Rbot, -frame::z_ijava_state_size, Z_fp); + +#ifdef ASSERT + address reentry = NULL; + { NearLabel ok; + __ compareU64_and_branch(Rcurr_monitor, Rbot, Assembler::bcondNotHigh, ok); + reentry = __ stop_chain_static(reentry, "IJAVA_STATE.monitors points below monitor block bottom"); + __ bind(ok); + } + { NearLabel ok; + __ compareU64_and_branch(Rcurr_monitor, Z_esp, Assembler::bcondHigh, ok); + reentry = __ stop_chain_static(reentry, "IJAVA_STATE.monitors above Z_esp"); + __ bind(ok); + } +#endif + + // Check if bottom reached, i.e. if there is at least one monitor. + __ compareU64_and_branch(Rcurr_monitor, Rbot, Assembler::bcondEqual, exit); + + __ bind(loop); + // Check if current entry is used. + __ load_and_test_long(Rlocked_obj, Address(Rcurr_monitor, BasicObjectLock::obj_offset_in_bytes())); + __ z_brne(not_free); + // If not used then remember entry in Rfree_slot. + __ z_lgr(Rfree_slot, Rcurr_monitor); + __ bind(not_free); + // Exit if current entry is for same object; this guarantees, that new monitor + // used for recursive lock is above the older one. + __ compareU64_and_branch(Rlocked_obj, Z_tos, Assembler::bcondEqual, exit); + // otherwise advance to next entry + __ add2reg(Rcurr_monitor, entry_size); + // Check if bottom reached, if not at bottom then check this entry. + __ compareU64_and_branch(Rcurr_monitor, Rbot, Assembler::bcondNotEqual, loop); + __ bind(exit); + } + + // Rfree_slot != NULL -> found one + __ compareU64_and_branch(Rfree_slot, (intptr_t)0L, Assembler::bcondNotEqual, allocated); + + // Allocate one if there's no free slot. + __ add_monitor_to_stack(false, Z_ARG3, Z_ARG4, Z_ARG5); + __ get_monitors(Rfree_slot); + + // Rfree_slot: points to monitor entry. + __ bind(allocated); + + // Increment bcp to point to the next bytecode, so exception + // handling for async. exceptions work correctly. + // The object has already been poped from the stack, so the + // expression stack looks correct. + __ add2reg(Z_bcp, 1, Z_bcp); + + // Store object. + __ z_stg(Z_tos, BasicObjectLock::obj_offset_in_bytes(), Rfree_slot); + __ lock_object(Rfree_slot, Z_tos); + + // Check to make sure this monitor doesn't cause stack overflow after locking. + __ save_bcp(); // in case of exception + __ generate_stack_overflow_check(0); + + // The bcp has already been incremented. Just need to dispatch to + // next instruction. + __ dispatch_next(vtos); + + BLOCK_COMMENT("} monitorenter"); +} + + +void TemplateTable::monitorexit() { + transition(atos, vtos); + + BLOCK_COMMENT("monitorexit {"); + + // Check for NULL object. + __ null_check(Z_tos); + + NearLabel found, not_found; + const Register Rcurr_monitor = Z_ARG2; + + // Find matching slot. + { + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + NearLabel entry, loop; + + const Register Rbot = Z_ARG3; // Points to word under bottom of monitor block. + const Register Rlocked_obj = Z_ARG4; + // Starting with top-most entry. + __ get_monitors(Rcurr_monitor); // Rcur_monitor = IJAVA_STATE.monitors + __ add2reg(Rbot, -frame::z_ijava_state_size, Z_fp); + +#ifdef ASSERT + address reentry = NULL; + { NearLabel ok; + __ compareU64_and_branch(Rcurr_monitor, Rbot, Assembler::bcondNotHigh, ok); + reentry = __ stop_chain_static(reentry, "IJAVA_STATE.monitors points below monitor block bottom"); + __ bind(ok); + } + { NearLabel ok; + __ compareU64_and_branch(Rcurr_monitor, Z_esp, Assembler::bcondHigh, ok); + reentry = __ stop_chain_static(reentry, "IJAVA_STATE.monitors above Z_esp"); + __ bind(ok); + } +#endif + + // Check if bottom reached, i.e. if there is at least one monitor. + __ compareU64_and_branch(Rcurr_monitor, Rbot, Assembler::bcondEqual, not_found); + + __ bind(loop); + // Check if current entry is for same object. + __ z_lg(Rlocked_obj, Address(Rcurr_monitor, BasicObjectLock::obj_offset_in_bytes())); + // If same object then stop searching. + __ compareU64_and_branch(Rlocked_obj, Z_tos, Assembler::bcondEqual, found); + // Otherwise advance to next entry. + __ add2reg(Rcurr_monitor, entry_size); + // Check if bottom reached, if not at bottom then check this entry. + __ compareU64_and_branch(Rcurr_monitor, Rbot, Assembler::bcondNotEqual, loop); + } + + __ bind(not_found); + // Error handling. Unlocking was not block-structured. + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here(); + + __ bind(found); + __ push_ptr(Z_tos); // Make sure object is on stack (contract with oopMaps). + __ unlock_object(Rcurr_monitor, Z_tos); + __ pop_ptr(Z_tos); // Discard object. + BLOCK_COMMENT("} monitorexit"); +} + +// Wide instructions +void TemplateTable::wide() { + transition(vtos, vtos); + + __ z_llgc(Z_R1_scratch, at_bcp(1)); + __ z_sllg(Z_R1_scratch, Z_R1_scratch, LogBytesPerWord); + __ load_absolute_address(Z_tmp_1, (address) Interpreter::_wentry_point); + __ mem2reg_opt(Z_tmp_1, Address(Z_tmp_1, Z_R1_scratch)); + __ z_br(Z_tmp_1); + // Note: the bcp increment step is part of the individual wide + // bytecode implementations. +} + +// Multi arrays +void TemplateTable::multianewarray() { + transition(vtos, atos); + + __ z_llgc(Z_tmp_1, at_bcp(3)); // Get number of dimensions. + // Slot count to byte offset. + __ z_sllg(Z_tmp_1, Z_tmp_1, Interpreter::logStackElementSize); + // Z_esp points past last_dim, so set to Z_ARG2 to first_dim address. + __ load_address(Z_ARG2, Address(Z_esp, Z_tmp_1)); + call_VM(Z_RET, + CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), + Z_ARG2); + // Pop dimensions from expression stack. + __ z_agr(Z_esp, Z_tmp_1); +} diff --git a/hotspot/src/cpu/s390/vm/templateTable_s390.hpp b/hotspot/src/cpu/s390/vm/templateTable_s390.hpp new file mode 100644 index 00000000000..51a022cb3bc --- /dev/null +++ b/hotspot/src/cpu/s390/vm/templateTable_s390.hpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_TEMPLATETABLE_S390_HPP +#define CPU_S390_VM_TEMPLATETABLE_S390_HPP + + static void prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) + Register index = noreg, // itable index, MethodType, etc. + Register recv = noreg, // If caller wants to see it. + Register flags = noreg); // If caller wants to test it. + static void invokevirtual_helper(Register index, Register recv, + Register flags); + + // Helpers + static void index_check(Register array, Register index, unsigned int shift); + static void index_check_without_pop(Register array, Register index); + +#endif // CPU_S390_VM_TEMPLATETABLE_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/vmStructs_s390.hpp b/hotspot/src/cpu/s390/vm/vmStructs_s390.hpp new file mode 100644 index 00000000000..e4c69a7f6d2 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/vmStructs_s390.hpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_VMSTRUCTS_S390_HPP +#define CPU_S390_VM_VMSTRUCTS_S390_HPP + +// These are the CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) + +#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) + +#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#endif // CPU_S390_VM_VMSTRUCTS_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/vm_version_s390.cpp b/hotspot/src/cpu/s390/vm/vm_version_s390.cpp new file mode 100644 index 00000000000..37e1da9e00f --- /dev/null +++ b/hotspot/src/cpu/s390/vm/vm_version_s390.cpp @@ -0,0 +1,1182 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.inline.hpp" +#include "compiler/disassembler.hpp" +#include "code/compiledIC.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/java.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "vm_version_s390.hpp" + +# include + +bool VM_Version::_is_determine_features_test_running = false; + +unsigned long VM_Version::_features[_features_buffer_len] = {0, 0, 0, 0}; +unsigned long VM_Version::_cipher_features[_features_buffer_len] = {0, 0, 0, 0}; +unsigned long VM_Version::_msgdigest_features[_features_buffer_len] = {0, 0, 0, 0}; +unsigned int VM_Version::_nfeatures = 0; +unsigned int VM_Version::_ncipher_features = 0; +unsigned int VM_Version::_nmsgdigest_features = 0; +unsigned int VM_Version::_Dcache_lineSize = 256; +unsigned int VM_Version::_Icache_lineSize = 256; + +static const char* z_gen[] = {" ", "G1", "G2", "G3", "G4", "G5", "G6", "G7" }; +static const char* z_machine[] = {" ", "2064", "2084", "2094", "2097", "2817", " ", "2964" }; +static const char* z_name[] = {" ", "z900", "z990", "z9 EC", "z10 EC", "z196 EC", "ec12", "z13" }; + +void VM_Version::initialize() { + determine_features(); // Get processor capabilities. + set_features_string(); // Set a descriptive feature indication. + + if (Verbose) { + print_features(); + } + + intx cache_line_size = Dcache_lineSize(0); + + MaxVectorSize = 8; + + if (has_PrefetchRaw()) { + if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { // not preset + // 0 = no prefetch. + // 1 = Prefetch instructions for each allocation. + // 2 = Use TLAB watermark to gate allocation prefetch. + AllocatePrefetchStyle = 1; + } + + if (AllocatePrefetchStyle > 0) { // Prefetching turned on at all? + // Distance to prefetch ahead of allocation pointer. + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance) || (AllocatePrefetchDistance < 0)) { // not preset + AllocatePrefetchDistance = 0; + } + + // Number of lines to prefetch ahead of allocation pointer. + if (FLAG_IS_DEFAULT(AllocatePrefetchLines) || (AllocatePrefetchLines <= 0)) { // not preset + AllocatePrefetchLines = 3; + } + + // Step size in bytes of sequential prefetch instructions. + if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) || (AllocatePrefetchStepSize <= 0)) { // not preset + FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); + } else if (AllocatePrefetchStepSize < cache_line_size) { + FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); + } else { + FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); + } + } else { + FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); + AllocatePrefetchDistance = 0; + AllocatePrefetchLines = 0; + // Can't be zero. Will SIGFPE during constraints checking. + FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); + } + + } else { + FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); + AllocatePrefetchDistance = 0; + AllocatePrefetchLines = 0; + // Can't be zero. Will SIGFPE during constraints checking. + FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); + } + + // TODO: + // On z/Architecture, cache line size is significantly large (256 bytes). Do we really need + // to keep contended members that far apart? Performance tests are required. + if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && (cache_line_size > ContendedPaddingWidth)) { + ContendedPaddingWidth = cache_line_size; + } + + // On z/Architecture, the CRC32 intrinsics had to be implemented "by hand". + // They cannot be based on the CHECKSUM instruction which has been there + // since the very beginning (of z/Architecture). It computes "some kind of" a checksum + // which has nothing to do with the CRC32 algorithm. + if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { + FLAG_SET_DEFAULT(UseCRC32Intrinsics, true); + } + + // On z/Architecture, we take UseAES as the general switch to enable/disable the AES intrinsics. + // The specific, and yet to be defined, switches UseAESxxxIntrinsics will then be set + // depending on the actual machine capabilities. + // Explicitly setting them via CmdLine option takes precedence, of course. + // TODO: UseAESIntrinsics must be made keylength specific. + // As of March 2015 and Java8, only AES128 is supported by the Java Cryptographic Extensions. + // Therefore, UseAESIntrinsics is of minimal use at the moment. + if (FLAG_IS_DEFAULT(UseAES) && has_Crypto_AES()) { + FLAG_SET_DEFAULT(UseAES, true); + } + if (UseAES && !has_Crypto_AES()) { + warning("AES instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseAES, false); + } + if (UseAES) { + if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { + FLAG_SET_DEFAULT(UseAESIntrinsics, true); + } + } + if (UseAESIntrinsics && !has_Crypto_AES()) { + warning("AES intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } + + // TODO: implement AES/CTR intrinsics + if (UseAESCTRIntrinsics) { + warning("AES/CTR intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); + } + + // TODO: implement GHASH intrinsics + if (UseGHASHIntrinsics) { + warning("GHASH intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); + } + + if (UseFMA) { + warning("FMA instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseFMA, false); + } + + // On z/Architecture, we take UseSHA as the general switch to enable/disable the SHA intrinsics. + // The specific switches UseSHAxxxIntrinsics will then be set depending on the actual + // machine capabilities. + // Explicitly setting them via CmdLine option takes precedence, of course. + if (FLAG_IS_DEFAULT(UseSHA) && has_Crypto_SHA()) { + FLAG_SET_DEFAULT(UseSHA, true); + } + if (UseSHA && !has_Crypto_SHA()) { + warning("SHA instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseSHA, false); + } + if (UseSHA && has_Crypto_SHA1()) { + if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); + } + } else if (UseSHA1Intrinsics) { + warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); + } + if (UseSHA && has_Crypto_SHA256()) { + if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); + } + } else if (UseSHA256Intrinsics) { + warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); + } + if (UseSHA && has_Crypto_SHA512()) { + if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); + } + } else if (UseSHA512Intrinsics) { + warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); + } + + if (UseAdler32Intrinsics) { + warning("Adler32Intrinsics not available on this CPU."); + FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); + } + + if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { + FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true); + } + if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { + FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true); + } + if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { + FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true); + } + if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { + FLAG_SET_DEFAULT(UsePopCountInstruction, true); + } + + // z/Architecture supports 8-byte compare-exchange operations + // (see Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr) + // and 'atomic long memory ops' (see Unsafe_GetLongVolatile). + _supports_cx8 = true; + + _supports_atomic_getadd4 = VM_Version::has_LoadAndALUAtomicV1(); + _supports_atomic_getadd8 = VM_Version::has_LoadAndALUAtomicV1(); + + // z/Architecture supports unaligned memory accesses. + // Performance penalty is negligible. An additional tick or so + // is lost if the accessed data spans a cache line boundary. + // Unaligned accesses are not atomic, of course. + if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { + FLAG_SET_DEFAULT(UseUnalignedAccesses, true); + } +} + + +void VM_Version::set_features_string() { + + unsigned int ambiguity = 0; + if (is_z13()) { + _features_string = "System z G7-z13 (LDISP_fast, ExtImm, PCrel Load/Store, CmpB, Cond Load/Store, Interlocked Update, TxM, VectorInstr)"; + ambiguity++; + } + if (is_ec12()) { + _features_string = "System z G6-EC12 (LDISP_fast, ExtImm, PCrel Load/Store, CmpB, Cond Load/Store, Interlocked Update, TxM)"; + ambiguity++; + } + if (is_z196()) { + _features_string = "System z G5-z196 (LDISP_fast, ExtImm, PCrel Load/Store, CmpB, Cond Load/Store, Interlocked Update)"; + ambiguity++; + } + if (is_z10()) { + _features_string = "System z G4-z10 (LDISP_fast, ExtImm, PCrel Load/Store, CmpB)"; + ambiguity++; + } + if (is_z9()) { + _features_string = "System z G3-z9 (LDISP_fast, ExtImm), out-of-support as of 2016-04-01"; + ambiguity++; + } + if (is_z990()) { + _features_string = "System z G2-z990 (LDISP_fast), out-of-support as of 2014-07-01"; + ambiguity++; + } + if (is_z900()) { + _features_string = "System z G1-z900 (LDISP), out-of-support as of 2014-07-01"; + ambiguity++; + } + + if (ambiguity == 0) { + _features_string = "z/Architecture (unknown generation)"; + } else if (ambiguity > 1) { + tty->print_cr("*** WARNING *** Ambiguous z/Architecture detection, ambiguity = %d", ambiguity); + tty->print_cr(" oldest detected generation is %s", _features_string); + _features_string = "z/Architecture (ambiguous detection)"; + } +} + +// featureBuffer - bit array indicating availability of various features +// featureNum - bit index of feature to be tested +// Featurenum < 0 requests test for any nonzero bit in featureBuffer. +// bufLen - length of featureBuffer in bits +bool VM_Version::test_feature_bit(unsigned long* featureBuffer, int featureNum, unsigned int bufLen) { + assert(bufLen > 0, "buffer len must be positive"); + assert((bufLen & 0x0007) == 0, "unaligned buffer len"); + assert(((intptr_t)featureBuffer&0x0007) == 0, "unaligned feature buffer"); + if (featureNum < 0) { + // Any bit set at all? + bool anyBit = false; + for (size_t i = 0; i < bufLen/(8*sizeof(long)); i++) { + anyBit = anyBit || (featureBuffer[i] != 0); + } + return anyBit; + } else { + assert((unsigned int)featureNum < bufLen, "feature index out of range"); + unsigned char* byteBuffer = (unsigned char*)featureBuffer; + int byteIndex = featureNum/(8*sizeof(char)); + int bitIndex = featureNum%(8*sizeof(char)); + // Indexed bit set? + return (byteBuffer[byteIndex] & (1U<<(7-bitIndex))) != 0; + } +} + +void VM_Version::print_features_internal(const char* text, bool print_anyway) { + tty->print_cr("%s %s", text, features_string()); + tty->print("%s", text); + for (unsigned int i = 0; i < _nfeatures; i++) { + tty->print(" 0x%16.16lx", _features[i]); + } + tty->cr(); + + if (Verbose || print_anyway) { + // z900 + if (has_long_displacement() ) tty->print_cr("available: %s", "LongDispFacility"); + // z990 + if (has_long_displacement_fast() ) tty->print_cr("available: %s", "LongDispFacilityHighPerf"); + if (has_ETF2() && has_ETF3() ) tty->print_cr("available: %s", "ETF2 and ETF3"); + if (has_Crypto() ) tty->print_cr("available: %s", "CryptoFacility"); + // z9 + if (has_extended_immediate() ) tty->print_cr("available: %s", "ExtImmedFacility"); + if (has_StoreFacilityListExtended()) tty->print_cr("available: %s", "StoreFacilityListExtended"); + if (has_StoreClockFast() ) tty->print_cr("available: %s", "StoreClockFast"); + if (has_ETF2Enhancements() ) tty->print_cr("available: %s", "ETF2 Enhancements"); + if (has_ETF3Enhancements() ) tty->print_cr("available: %s", "ETF3 Enhancements"); + if (has_HFPUnnormalized() ) tty->print_cr("available: %s", "HFPUnnormalizedFacility"); + if (has_HFPMultiplyAndAdd() ) tty->print_cr("available: %s", "HFPMultiplyAndAddFacility"); + // z10 + if (has_ParsingEnhancements() ) tty->print_cr("available: %s", "Parsing Enhancements"); + if (has_ExtractCPUtime() ) tty->print_cr("available: %s", "ExtractCPUTime"); + if (has_CompareSwapStore() ) tty->print_cr("available: %s", "CompareSwapStore"); + if (has_GnrlInstrExtensions() ) tty->print_cr("available: %s", "General Instruction Extensions"); + if (has_CompareBranch() ) tty->print_cr(" available: %s", "Compare and Branch"); + if (has_CompareTrap() ) tty->print_cr(" available: %s", "Compare and Trap"); + if (has_RelativeLoadStore() ) tty->print_cr(" available: %s", "Relative Load/Store"); + if (has_MultiplySingleImm32() ) tty->print_cr(" available: %s", "MultiplySingleImm32"); + if (has_Prefetch() ) tty->print_cr(" available: %s", "Prefetch"); + if (has_MoveImmToMem() ) tty->print_cr(" available: %s", "Direct Moves Immediate to Memory"); + if (has_MemWithImmALUOps() ) tty->print_cr(" available: %s", "Direct ALU Ops Memory .op. Immediate"); + if (has_ExtractCPUAttributes() ) tty->print_cr(" available: %s", "Extract CPU Atributes"); + if (has_ExecuteExtensions() ) tty->print_cr("available: %s", "ExecuteExtensions"); + if (has_FPSupportEnhancements() ) tty->print_cr("available: %s", "FPSupportEnhancements"); + if (has_DecimalFloatingPoint() ) tty->print_cr("available: %s", "DecimalFloatingPoint"); + // z196 + if (has_DistinctOpnds() ) tty->print_cr("available: %s", "Distinct Operands"); + if (has_InterlockedAccessV1() ) tty->print_cr(" available: %s", "InterlockedAccess V1 (fast)"); + if (has_PopCount() ) tty->print_cr(" available: %s", "PopCount"); + if (has_LoadStoreConditional() ) tty->print_cr(" available: %s", "LoadStoreConditional"); + if (has_HighWordInstr() ) tty->print_cr(" available: %s", "HighWord Instructions"); + if (has_FastSync() ) tty->print_cr(" available: %s", "FastSync (bcr 14,0)"); + if (has_AtomicMemWithImmALUOps() ) tty->print_cr("available: %s", "Atomic Direct ALU Ops Memory .op. Immediate"); + if (has_FPExtensions() ) tty->print_cr("available: %s", "Floatingpoint Extensions"); + if (has_CryptoExt3() ) tty->print_cr("available: %s", "Crypto Extensions 3"); + if (has_CryptoExt4() ) tty->print_cr("available: %s", "Crypto Extensions 4"); + // EC12 + if (has_MiscInstrExt() ) tty->print_cr("available: %s", "Miscelaneous Instruction Extensions"); + if (has_ExecutionHint() ) tty->print_cr(" available: %s", "Execution Hints (branch prediction)"); + if (has_ProcessorAssist() ) tty->print_cr(" available: %s", "Processor Assists"); + if (has_LoadAndTrap() ) tty->print_cr(" available: %s", "Load and Trap"); + if (has_TxMem() ) tty->print_cr("available: %s", "Transactional Memory"); + if (has_InterlockedAccessV2() ) tty->print_cr(" available: %s", "InterlockedAccess V2 (fast)"); + if (has_DFPZonedConversion() ) tty->print_cr(" available: %s", "DFP Zoned Conversions"); + // z13 + if (has_LoadStoreConditional2() ) tty->print_cr("available: %s", "Load/Store Conditional 2"); + if (has_CryptoExt5() ) tty->print_cr("available: %s", "Crypto Extensions 5"); + if (has_DFPPackedConversion() ) tty->print_cr("available: %s", "DFP Packed Conversions"); + if (has_VectorFacility() ) tty->print_cr("available: %s", "Vector Facility"); + // test switches + if (has_TestFeature1Impl() ) tty->print_cr("available: %s", "TestFeature1Impl"); + if (has_TestFeature2Impl() ) tty->print_cr("available: %s", "TestFeature2Impl"); + if (has_TestFeature4Impl() ) tty->print_cr("available: %s", "TestFeature4Impl"); + if (has_TestFeature8Impl() ) tty->print_cr("available: %s", "TestFeature8Impl"); + + if (has_Crypto()) { + tty->cr(); + tty->print_cr("detailled availability of %s capabilities:", "CryptoFacility"); + if (test_feature_bit(&_cipher_features[0], -1, 2*Cipher::_featureBits)) { + tty->cr(); + tty->print_cr(" available: %s", "Message Cipher Functions"); + } + if (test_feature_bit(&_cipher_features[0], -1, (int)Cipher::_featureBits)) { + tty->print_cr(" available Crypto Features of KM (Cipher Message):"); + for (unsigned int i = 0; i < Cipher::_featureBits; i++) { + if (test_feature_bit(&_cipher_features[0], i, (int)Cipher::_featureBits)) { + switch (i) { + case Cipher::_Query: tty->print_cr(" available: KM Query"); break; + case Cipher::_DEA: tty->print_cr(" available: KM DEA"); break; + case Cipher::_TDEA128: tty->print_cr(" available: KM TDEA-128"); break; + case Cipher::_TDEA192: tty->print_cr(" available: KM TDEA-192"); break; + case Cipher::_EncryptedDEA: tty->print_cr(" available: KM Encrypted DEA"); break; + case Cipher::_EncryptedDEA128: tty->print_cr(" available: KM Encrypted DEA-128"); break; + case Cipher::_EncryptedDEA192: tty->print_cr(" available: KM Encrypted DEA-192"); break; + case Cipher::_AES128: tty->print_cr(" available: KM AES-128"); break; + case Cipher::_AES192: tty->print_cr(" available: KM AES-192"); break; + case Cipher::_AES256: tty->print_cr(" available: KM AES-256"); break; + case Cipher::_EnccryptedAES128: tty->print_cr(" available: KM Encrypted-AES-128"); break; + case Cipher::_EnccryptedAES192: tty->print_cr(" available: KM Encrypted-AES-192"); break; + case Cipher::_EnccryptedAES256: tty->print_cr(" available: KM Encrypted-AES-256"); break; + case Cipher::_XTSAES128: tty->print_cr(" available: KM XTS-AES-128"); break; + case Cipher::_XTSAES256: tty->print_cr(" available: KM XTS-AES-256"); break; + case Cipher::_EncryptedXTSAES128: tty->print_cr(" available: KM XTS-Encrypted-AES-128"); break; + case Cipher::_EncryptedXTSAES256: tty->print_cr(" available: KM XTS-Encrypted-AES-256"); break; + default: tty->print_cr(" available: unknown KM code %d", i); break; + } + } + } + } + if (test_feature_bit(&_cipher_features[2], -1, (int)Cipher::_featureBits)) { + tty->print_cr(" available Crypto Features of KMC (Cipher Message with Chaining):"); + for (unsigned int i = 0; i < Cipher::_featureBits; i++) { + if (test_feature_bit(&_cipher_features[2], i, (int)Cipher::_featureBits)) { + switch (i) { + case Cipher::_Query: tty->print_cr(" available: KMC Query"); break; + case Cipher::_DEA: tty->print_cr(" available: KMC DEA"); break; + case Cipher::_TDEA128: tty->print_cr(" available: KMC TDEA-128"); break; + case Cipher::_TDEA192: tty->print_cr(" available: KMC TDEA-192"); break; + case Cipher::_EncryptedDEA: tty->print_cr(" available: KMC Encrypted DEA"); break; + case Cipher::_EncryptedDEA128: tty->print_cr(" available: KMC Encrypted DEA-128"); break; + case Cipher::_EncryptedDEA192: tty->print_cr(" available: KMC Encrypted DEA-192"); break; + case Cipher::_AES128: tty->print_cr(" available: KMC AES-128"); break; + case Cipher::_AES192: tty->print_cr(" available: KMC AES-192"); break; + case Cipher::_AES256: tty->print_cr(" available: KMC AES-256"); break; + case Cipher::_EnccryptedAES128: tty->print_cr(" available: KMC Encrypted-AES-128"); break; + case Cipher::_EnccryptedAES192: tty->print_cr(" available: KMC Encrypted-AES-192"); break; + case Cipher::_EnccryptedAES256: tty->print_cr(" available: KMC Encrypted-AES-256"); break; + case Cipher::_PRNG: tty->print_cr(" available: KMC PRNG"); break; + default: tty->print_cr(" available: unknown KMC code %d", i); break; + } + } + } + } + + if (test_feature_bit(&_msgdigest_features[0], -1, 2*MsgDigest::_featureBits)) { + tty->cr(); + tty->print_cr(" available: %s", "Message Digest Functions for SHA"); + } + if (test_feature_bit(&_msgdigest_features[0], -1, (int)MsgDigest::_featureBits)) { + tty->print_cr(" available Features of KIMD (Msg Digest):"); + for (unsigned int i = 0; i < MsgDigest::_featureBits; i++) { + if (test_feature_bit(&_msgdigest_features[0], i, (int)MsgDigest::_featureBits)) { + switch (i) { + case MsgDigest::_Query: tty->print_cr(" available: KIMD Query"); break; + case MsgDigest::_SHA1: tty->print_cr(" available: KIMD SHA-1"); break; + case MsgDigest::_SHA256: tty->print_cr(" available: KIMD SHA-256"); break; + case MsgDigest::_SHA512: tty->print_cr(" available: KIMD SHA-512"); break; + case MsgDigest::_GHASH: tty->print_cr(" available: KIMD GHASH"); break; + default: tty->print_cr(" available: unknown code %d", i); break; + } + } + } + } + if (test_feature_bit(&_msgdigest_features[2], -1, (int)MsgDigest::_featureBits)) { + tty->print_cr(" available Features of KLMD (Msg Digest):"); + for (unsigned int i = 0; i < MsgDigest::_featureBits; i++) { + if (test_feature_bit(&_msgdigest_features[2], i, (int)MsgDigest::_featureBits)) { + switch (i) { + case MsgDigest::_Query: tty->print_cr(" available: KLMD Query"); break; + case MsgDigest::_SHA1: tty->print_cr(" available: KLMD SHA-1"); break; + case MsgDigest::_SHA256: tty->print_cr(" available: KLMD SHA-256"); break; + case MsgDigest::_SHA512: tty->print_cr(" available: KLMD SHA-512"); break; + default: tty->print_cr(" available: unknown code %d", i); break; + } + } + } + } + } + if (ContendedPaddingWidth > 0) { + tty->cr(); + tty->print_cr("ContendedPaddingWidth " INTX_FORMAT, ContendedPaddingWidth); + } + } +} + +void VM_Version::print_features() { + print_features_internal("Version:"); +} + +void VM_Version::reset_features(bool reset) { + if (reset) { + for (unsigned int i = 0; i < _features_buffer_len; i++) { + VM_Version::_features[i] = 0; + } + } +} + + +void VM_Version::set_features_z900(bool reset) { + reset_features(reset); + + set_has_long_displacement(); + set_has_ETF2(); +} + +void VM_Version::set_features_z990(bool reset) { + reset_features(reset); + + set_features_z900(false); + set_has_ETF3(); + set_has_long_displacement_fast(); + set_has_HFPMultiplyAndAdd(); +} + +void VM_Version::set_features_z9(bool reset) { + reset_features(reset); + + set_features_z990(false); + set_has_StoreFacilityListExtended(); + // set_has_Crypto(); // Do not set, crypto features must be retrieved separately. + set_has_ETF2Enhancements(); + set_has_ETF3Enhancements(); + set_has_extended_immediate(); + set_has_StoreClockFast(); + set_has_HFPUnnormalized(); +} + +void VM_Version::set_features_z10(bool reset) { + reset_features(reset); + + set_features_z9(false); + set_has_CompareSwapStore(); + set_has_RelativeLoadStore(); + set_has_CompareBranch(); + set_has_CompareTrap(); + set_has_MultiplySingleImm32(); + set_has_Prefetch(); + set_has_MoveImmToMem(); + set_has_MemWithImmALUOps(); + set_has_ExecuteExtensions(); + set_has_FPSupportEnhancements(); + set_has_DecimalFloatingPoint(); + set_has_ExtractCPUtime(); + set_has_CryptoExt3(); +} + +void VM_Version::set_features_z196(bool reset) { + reset_features(reset); + + set_features_z10(false); + set_has_InterlockedAccessV1(); + set_has_PopCount(); + set_has_LoadStoreConditional(); + set_has_HighWordInstr(); + set_has_FastSync(); + set_has_FPExtensions(); + set_has_DistinctOpnds(); + set_has_CryptoExt4(); +} + +void VM_Version::set_features_ec12(bool reset) { + reset_features(reset); + + set_features_z196(false); + set_has_MiscInstrExt(); + set_has_InterlockedAccessV2(); + set_has_LoadAndALUAtomicV2(); + set_has_TxMem(); +} + +void VM_Version::set_features_z13(bool reset) { + reset_features(reset); + + set_features_ec12(false); + set_has_LoadStoreConditional2(); + set_has_CryptoExt5(); + set_has_VectorFacility(); +} + +void VM_Version::set_features_from(const char* march) { + bool err = false; + bool prt = false; + + if ((march != NULL) && (march[0] != '\0')) { + const int buf_len = 16; + const int hdr_len = 5; + char buf[buf_len]; + if (strlen(march) >= hdr_len) { + memcpy(buf, march, hdr_len); + buf[hdr_len] = '\00'; + } else { + buf[0] = '\00'; + } + + if (!strcmp(march, "z900")) { + set_features_z900(); + } else if (!strcmp(march, "z990")) { + set_features_z990(); + } else if (!strcmp(march, "z9")) { + set_features_z9(); + } else if (!strcmp(march, "z10")) { + set_features_z10(); + } else if (!strcmp(march, "z196")) { + set_features_z196(); + } else if (!strcmp(march, "ec12")) { + set_features_ec12(); + } else if (!strcmp(march, "z13")) { + set_features_z13(); + } else if (!strcmp(buf, "ztest")) { + assert(!has_TestFeaturesImpl(), "possible facility list flag conflict"); + if (strlen(march) > hdr_len) { + int itest = 0; + if ((strlen(march)-hdr_len) >= buf_len) err = true; + if (!err) { + memcpy(buf, &march[hdr_len], strlen(march)-hdr_len); + buf[strlen(march)-hdr_len] = '\00'; + for (size_t i = 0; !err && (i < strlen(buf)); i++) { + itest = itest*10 + buf[i]-'0'; + err = err || ((buf[i]-'0') < 0) || ((buf[i]-'0') > 9) || (itest > 15); + } + } + if (!err) { + prt = true; + if (itest & 0x01) { set_has_TestFeature1Impl(); } + if (itest & 0x02) { set_has_TestFeature2Impl(); } + if (itest & 0x04) { set_has_TestFeature4Impl(); } + if (itest & 0x08) { set_has_TestFeature8Impl(); } + } + } else { + prt = true; + set_has_TestFeature1Impl(); + set_has_TestFeature2Impl(); + set_has_TestFeature4Impl(); + set_has_TestFeature8Impl(); + } + } else { + err = true; + } + if (!err) { + set_features_string(); + if (prt || PrintAssembly) { + print_features_internal("CPU Version as set by cmdline option:", prt); + } + } else { + tty->print_cr("***Warning: Unsupported ProcessorArchitecture: %s, internal settings left undisturbed.", march); + } + } + +} + +static long (*getFeatures)(unsigned long*, int, int) = NULL; + +void VM_Version::set_getFeatures(address entryPoint) { + if (getFeatures == NULL) { + getFeatures = (long(*)(unsigned long*, int, int))entryPoint; + } +} + +long VM_Version::call_getFeatures(unsigned long* buffer, int buflen, int functionCode) { + VM_Version::_is_determine_features_test_running = true; + long functionResult = (*getFeatures)(buffer, buflen, functionCode); + VM_Version::_is_determine_features_test_running = false; + return functionResult; +} + +// Helper function for "extract cache attribute" instruction. +int VM_Version::calculate_ECAG_functionCode(unsigned int attributeIndication, + unsigned int levelIndication, + unsigned int typeIndication) { + return (attributeIndication<<4) | (levelIndication<<1) | typeIndication; +} + +void VM_Version::determine_features() { + + const int cbuf_size = _code_buffer_len; + const int buf_len = _features_buffer_len; + + // Allocate code buffer space for the detection code. + ResourceMark rm; + CodeBuffer cbuf("determine CPU features", cbuf_size, 0); + MacroAssembler* a = new MacroAssembler(&cbuf); + + // Emit code. + set_getFeatures(a->pc()); + address code = a->pc(); + + // Try STFLE. Possible INVOP will cause defaults to be used. + Label getFEATURES; + Label getCPUFEATURES; // fcode = -1 (cache) + Label getCIPHERFEATURES; // fcode = -2 (cipher) + Label getMSGDIGESTFEATURES; // fcode = -3 (SHA) + Label checkLongDispFast; + Label noLongDisp; + Label posDisp, negDisp; + Label errRTN; + a->z_ltgfr(Z_R0, Z_ARG2); // Buf len to r0 and test. + a->z_brl(getFEATURES); // negative -> Get machine features. + a->z_brz(checkLongDispFast); // zero -> Check for high-speed Long Displacement Facility. + a->z_aghi(Z_R0, -1); + a->z_stfle(0, Z_ARG1); + a->z_lg(Z_R1, 0, Z_ARG1); // Get first DW of facility list. + a->z_lgr(Z_RET, Z_R0); // Calculate rtn value for success. + a->z_la(Z_RET, 1, Z_RET); + a->z_brnz(errRTN); // Instr failed if non-zero CC. + a->z_ltgr(Z_R1, Z_R1); // Instr failed if first DW == 0. + a->z_bcr(Assembler::bcondNotZero, Z_R14); // Successful return. + + a->bind(errRTN); + a->z_lngr(Z_RET, Z_RET); + a->z_ltgr(Z_R1, Z_R1); + a->z_bcr(Assembler::bcondNotZero, Z_R14); // Return "buffer too small". + a->z_xgr(Z_RET, Z_RET); + a->z_br(Z_R14); // Return "operation aborted". + + a->bind(getFEATURES); + a->z_cghi(Z_R0, -1); // -1: Extract CPU attributes, currently: cache layout only. + a->z_bre(getCPUFEATURES); + a->z_cghi(Z_R0, -2); // -2: Extract detailed crypto capabilities (cipher instructions). + a->z_bre(getCIPHERFEATURES); + a->z_cghi(Z_R0, -3); // -3: Extract detailed crypto capabilities (msg digest instructions). + a->z_bre(getMSGDIGESTFEATURES); + + a->z_xgr(Z_RET, Z_RET); // Not a valid function code. + a->z_br(Z_R14); // Return "operation aborted". + + // Try KIMD/KLMD query function to get details about msg digest (secure hash, SHA) instructions. + a->bind(getMSGDIGESTFEATURES); + a->z_lghi(Z_R0,(int)MsgDigest::_Query); // query function code + a->z_lgr(Z_R1,Z_R2); // param block addr, 2*16 bytes min size + a->z_kimd(Z_R2,Z_R2); // Get available KIMD functions (bit pattern in param blk). + a->z_la(Z_R1,16,Z_R1); // next param block addr + a->z_klmd(Z_R2,Z_R2); // Get available KLMD functions (bit pattern in param blk). + a->z_lghi(Z_RET,4); + a->z_br(Z_R14); + + // Try KM/KMC query function to get details about crypto instructions. + a->bind(getCIPHERFEATURES); + a->z_lghi(Z_R0,(int)Cipher::_Query); // query function code + a->z_lgr(Z_R1,Z_R2); // param block addr, 2*16 bytes min size (KIMD/KLMD output) + a->z_km(Z_R2,Z_R2); // get available KM functions + a->z_la(Z_R1,16,Z_R1); // next param block addr + a->z_kmc(Z_R2,Z_R2); // get available KMC functions + a->z_lghi(Z_RET,4); + a->z_br(Z_R14); + + // Use EXTRACT CPU ATTRIBUTE instruction to get information about cache layout. + a->bind(getCPUFEATURES); + a->z_xgr(Z_R0,Z_R0); // as recommended in instruction documentation + a->z_ecag(Z_RET,Z_R0,0,Z_ARG3); // Extract information as requested by Z_ARG1 contents. + a->z_br(Z_R14); + + // Check the performance of the Long Displacement Facility, i.e. find out if we are running on z900 or newer. + a->bind(checkLongDispFast); + a->z_llill(Z_R0, 0xffff); // preset #iterations + a->z_larl(Z_R1, posDisp); + a->z_stck(0, Z_ARG1); // Get begin timestamp. + + a->bind(posDisp); // Positive disp loop. + a->z_lg(Z_ARG2, 0, Z_ARG1); + a->z_bctgr(Z_R0, Z_R1); + + a->z_stck(0, Z_ARG1); // Get end timestamp. + a->z_sg(Z_ARG2, 0, Z_R0, Z_ARG1); // Calculate elapsed time. + a->z_lcgr(Z_ARG2, Z_ARG2); + a->z_srlg(Z_ARG2, Z_ARG2, 12); // LSB: now microseconds + a->z_stg(Z_ARG2, 8, Z_ARG1); // Store difference in buffer[1]. + + a->z_llill(Z_R0, 0xffff); // preset #iterations + a->z_larl(Z_R1, negDisp); + a->z_xgr(Z_ARG2, Z_ARG2); // Clear to detect absence of LongDisp facility. + a->z_stck(0, Z_ARG1); // Get begin timestamp. + a->z_la(Z_ARG1, 8, Z_ARG1); + + a->bind(negDisp); // Negative disp loop. + a->z_lg(Z_ARG2, -8, Z_ARG1); + a->z_bctgr(Z_R0, Z_R1); + + a->z_aghi(Z_ARG1, -8); + a->z_stck(0, Z_ARG1); // Get end timestamp. + a->z_ltgr(Z_ARG2, Z_ARG2); // Check for absence of LongDisp facility. + a->z_brz(noLongDisp); + a->z_sg(Z_ARG2, 0, Z_R0, Z_ARG1); // Calc elapsed time. + a->z_lcgr(Z_ARG2, Z_ARG2); + a->z_srlg(Z_ARG2, Z_ARG2, 12); // LSB: now microseconds + a->z_stg(Z_ARG2, 0, Z_ARG1); // store difference in buffer[0] + + a->z_llill(Z_RET,0xffff); + a->z_br(Z_R14); + + a->bind(noLongDisp); + a->z_lghi(Z_RET,-1); + a->z_br(Z_R14); + + address code_end = a->pc(); + a->flush(); + + // Print the detection code. + bool printVerbose = Verbose || PrintAssembly || PrintStubCode; + if (printVerbose) { + ttyLocker ttyl; + tty->print_cr("Decoding CPU feature detection stub at " INTPTR_FORMAT " before execution:", p2i(code)); + tty->print_cr("Stub length is %ld bytes, codebuffer reserves %d bytes, %ld bytes spare.", + code_end-code, cbuf_size, cbuf_size-(code_end-code)); + + // Use existing decode function. This enables the [Code] format which is needed to DecodeErrorFile. + Disassembler::decode((u_char*)code, (u_char*)code_end, tty); + } + + // Prepare for detection code execution and clear work buffer. + _nfeatures = 0; + _ncipher_features = 0; + unsigned long buffer[buf_len]; + + for (int i = 0; i < buf_len; i++) { + buffer[i] = 0L; + } + + // execute code + // Illegal instructions will be replaced by 0 in signal handler. + // In case of problems, call_getFeatures will return a not-positive result. + long used_len = call_getFeatures(buffer, buf_len, 0); + + bool ok; + if (used_len == 1) { + ok = true; + } else if (used_len > 1) { + unsigned int used_lenU = (unsigned int)used_len; + ok = true; + for (unsigned int i = 1; i < used_lenU; i++) { + ok = ok && (buffer[i] == 0L); + } + if (printVerbose && !ok) { + bool compact = false; + tty->print_cr("Note: feature list has %d (i.e. more than one) array elements.", used_lenU); + if (compact) { + tty->print("non-zero feature list elements:"); + for (unsigned int i = 0; i < used_lenU; i++) { + tty->print(" [%d]: 0x%16.16lx", i, buffer[i]); + } + tty->cr(); + } else { + for (unsigned int i = 0; i < used_lenU; i++) { + tty->print_cr("non-zero feature list[%d]: 0x%16.16lx", i, buffer[i]); + } + } + + if (compact) { + tty->print_cr("Active features (compact view):"); + for (unsigned int k = 0; k < used_lenU; k++) { + tty->print_cr(" buffer[%d]:", k); + for (unsigned int j = k*sizeof(long); j < (k+1)*sizeof(long); j++) { + bool line = false; + for (unsigned int i = j*8; i < (j+1)*8; i++) { + bool bit = test_feature_bit(buffer, i, used_lenU*sizeof(long)*8); + if (bit) { + if (!line) { + tty->print(" byte[%d]:", j); + line = true; + } + tty->print(" [%3.3d]", i); + } + } + if (line) { + tty->cr(); + } + } + } + } else { + tty->print_cr("Active features (full view):"); + for (unsigned int k = 0; k < used_lenU; k++) { + tty->print_cr(" buffer[%d]:", k); + for (unsigned int j = k*sizeof(long); j < (k+1)*sizeof(long); j++) { + tty->print(" byte[%d]:", j); + for (unsigned int i = j*8; i < (j+1)*8; i++) { + bool bit = test_feature_bit(buffer, i, used_lenU*sizeof(long)*8); + if (bit) { + tty->print(" [%3.3d]", i); + } else { + tty->print(" "); + } + } + tty->cr(); + } + } + } + } + ok = true; + } else { // No features retrieved if we reach here. Buffer too short or instr not available. + if (used_len < 0) { + ok = false; + if (printVerbose) { + tty->print_cr("feature list buffer[%d] too short, required: buffer[%ld]", buf_len, -used_len); + } + } else { + if (printVerbose) { + tty->print_cr("feature list could not be retrieved. Running on z900 or z990? Trying to find out..."); + } + used_len = call_getFeatures(buffer, 0, 0); // Must provide at least two DW buffer elements!!!! + + ok = used_len > 0; + if (ok) { + if (buffer[1]*10 < buffer[0]) { + set_features_z900(); + } else { + set_features_z990(); + } + + if (printVerbose) { + tty->print_cr("Note: high-speed long displacement test used %ld iterations.", used_len); + tty->print_cr(" Positive displacement loads took %8.8lu microseconds.", buffer[1]); + tty->print_cr(" Negative displacement loads took %8.8lu microseconds.", buffer[0]); + if (has_long_displacement_fast()) { + tty->print_cr(" assuming high-speed long displacement IS available."); + } else { + tty->print_cr(" assuming high-speed long displacement is NOT available."); + } + } + } else { + if (printVerbose) { + tty->print_cr("Note: high-speed long displacement test was not successful."); + tty->print_cr(" assuming long displacement is NOT available."); + } + } + return; // Do not copy buffer to _features, no test for cipher features. + } + } + + if (ok) { + // Fill features buffer. + // Clear work buffer. + for (int i = 0; i < buf_len; i++) { + _features[i] = buffer[i]; + _cipher_features[i] = 0; + _msgdigest_features[i] = 0; + buffer[i] = 0L; + } + _nfeatures = used_len; + } else { + for (int i = 0; i < buf_len; i++) { + _features[i] = 0; + _cipher_features[i] = 0; + _msgdigest_features[i] = 0; + buffer[i] = 0L; + } + _nfeatures = 0; + } + + // Extract Crypto Facility details. + if (has_Crypto()) { + // Get cipher features. + used_len = call_getFeatures(buffer, -2, 0); + for (int i = 0; i < buf_len; i++) { + _cipher_features[i] = buffer[i]; + } + _ncipher_features = used_len; + + // Get msg digest features. + used_len = call_getFeatures(buffer, -3, 0); + for (int i = 0; i < buf_len; i++) { + _msgdigest_features[i] = buffer[i]; + } + _nmsgdigest_features = used_len; + } + + static int levelProperties[_max_cache_levels]; // All property indications per level. + static int levelScope[_max_cache_levels]; // private/shared + static const char* levelScopeText[4] = {"No cache ", + "CPU private", + "shared ", + "reserved "}; + + static int levelType[_max_cache_levels]; // D/I/mixed + static const char* levelTypeText[4] = {"separate D and I caches", + "I cache only ", + "D-cache only ", + "combined D/I cache "}; + + static unsigned int levelReserved[_max_cache_levels]; // reserved property bits + static unsigned int levelLineSize[_max_cache_levels]; + static unsigned int levelTotalSize[_max_cache_levels]; + static unsigned int levelAssociativity[_max_cache_levels]; + + + // Extract Cache Layout details. + if (has_ExtractCPUAttributes() && printVerbose) { // For information only, as of now. + bool lineSize_mismatch; + bool print_something; + long functionResult; + unsigned int attributeIndication = 0; // 0..15 + unsigned int levelIndication = 0; // 0..8 + unsigned int typeIndication = 0; // 0..1 (D-Cache, I-Cache) + int functionCode = calculate_ECAG_functionCode(attributeIndication, levelIndication, typeIndication); + + // Get cache topology. + functionResult = call_getFeatures(buffer, -1, functionCode); + + for (unsigned int i = 0; i < _max_cache_levels; i++) { + if (functionResult > 0) { + int shiftVal = 8*(_max_cache_levels-(i+1)); + levelProperties[i] = (functionResult & (0xffUL<> shiftVal; + levelReserved[i] = (levelProperties[i] & 0xf0) >> 4; + levelScope[i] = (levelProperties[i] & 0x0c) >> 2; + levelType[i] = (levelProperties[i] & 0x03); + } else { + levelProperties[i] = 0; + levelReserved[i] = 0; + levelScope[i] = 0; + levelType[i] = 0; + } + levelLineSize[i] = 0; + levelTotalSize[i] = 0; + levelAssociativity[i] = 0; + } + + tty->cr(); + tty->print_cr("------------------------------------"); + tty->print_cr("--- Cache Topology Information ---"); + tty->print_cr("------------------------------------"); + for (unsigned int i = 0; (i < _max_cache_levels) && (levelProperties[i] != 0); i++) { + tty->print_cr(" Cache Level %d: %s | %s", + i+1, levelScopeText[levelScope[i]], levelTypeText[levelType[i]]); + } + + // Get D-cache details per level. + _Dcache_lineSize = 0; + lineSize_mismatch = false; + print_something = false; + typeIndication = 0; // 0..1 (D-Cache, I-Cache) + for (unsigned int i = 0; (i < _max_cache_levels) && (levelProperties[i] != 0); i++) { + if ((levelType[i] == 0) || (levelType[i] == 2)) { + print_something = true; + + // Get cache line size of level i. + attributeIndication = 1; + functionCode = calculate_ECAG_functionCode(attributeIndication, i, typeIndication); + levelLineSize[i] = (unsigned int)call_getFeatures(buffer, -1, functionCode); + + // Get cache total size of level i. + attributeIndication = 2; + functionCode = calculate_ECAG_functionCode(attributeIndication, i, typeIndication); + levelTotalSize[i] = (unsigned int)call_getFeatures(buffer, -1, functionCode); + + // Get cache associativity of level i. + attributeIndication = 3; + functionCode = calculate_ECAG_functionCode(attributeIndication, i, typeIndication); + levelAssociativity[i] = (unsigned int)call_getFeatures(buffer, -1, functionCode); + + _Dcache_lineSize = _Dcache_lineSize == 0 ? levelLineSize[i] : _Dcache_lineSize; + lineSize_mismatch = lineSize_mismatch || (_Dcache_lineSize != levelLineSize[i]); + } else { + levelLineSize[i] = 0; + } + } + + if (print_something) { + tty->cr(); + tty->print_cr("------------------------------------"); + tty->print_cr("--- D-Cache Detail Information ---"); + tty->print_cr("------------------------------------"); + if (lineSize_mismatch) { + tty->print_cr("WARNING: D-Cache line size mismatch!"); + } + for (unsigned int i = 0; (i < _max_cache_levels) && (levelProperties[i] != 0); i++) { + if (levelLineSize[i] > 0) { + tty->print_cr(" D-Cache Level %d: line size = %4d, total size = %6dKB, associativity = %2d", + i+1, levelLineSize[i], levelTotalSize[i]/(int)K, levelAssociativity[i]); + } + } + } + + // Get I-cache details per level. + _Icache_lineSize = 0; + lineSize_mismatch = false; + print_something = false; + typeIndication = 1; // 0..1 (D-Cache, I-Cache) + for (unsigned int i = 0; (i < _max_cache_levels) && (levelProperties[i] != 0); i++) { + if ((levelType[i] == 0) || (levelType[i] == 1)) { + print_something = true; + + // Get cache line size of level i. + attributeIndication = 1; + functionCode = calculate_ECAG_functionCode(attributeIndication, i, typeIndication); + levelLineSize[i] = (unsigned int)call_getFeatures(buffer, -1, functionCode); + + // Get cache total size of level i. + attributeIndication = 2; + functionCode = calculate_ECAG_functionCode(attributeIndication, i, typeIndication); + levelTotalSize[i] = (unsigned int)call_getFeatures(buffer, -1, functionCode); + + // Get cache associativity of level i. + attributeIndication = 3; + functionCode = calculate_ECAG_functionCode(attributeIndication, i, typeIndication); + levelAssociativity[i] = (unsigned int)call_getFeatures(buffer, -1, functionCode); + + _Icache_lineSize = _Icache_lineSize == 0 ? levelLineSize[i] : _Icache_lineSize; + lineSize_mismatch = lineSize_mismatch || (_Icache_lineSize != levelLineSize[i]); + } else { + levelLineSize[i] = 0; + } + } + + if (print_something) { + tty->cr(); + tty->print_cr("------------------------------------"); + tty->print_cr("--- I-Cache Detail Information ---"); + tty->print_cr("------------------------------------"); + if (lineSize_mismatch) { + tty->print_cr("WARNING: I-Cache line size mismatch!"); + } + for (unsigned int i = 0; (i < _max_cache_levels) && (levelProperties[i] != 0); i++) { + if (levelLineSize[i] > 0) { + tty->print_cr(" I-Cache Level %d: line size = %4d, total size = %6dKB, associativity = %2d", + i+1, levelLineSize[i], levelTotalSize[i]/(int)K, levelAssociativity[i]); + } + } + } + + // Get D/I-cache details per level. + lineSize_mismatch = false; + print_something = false; + typeIndication = 0; // 0..1 (D-Cache, I-Cache) + for (unsigned int i = 0; (i < _max_cache_levels) && (levelProperties[i] != 0); i++) { + if (levelType[i] == 3) { + print_something = true; + + // Get cache line size of level i. + attributeIndication = 1; + functionCode = calculate_ECAG_functionCode(attributeIndication, i, typeIndication); + levelLineSize[i] = (unsigned int)call_getFeatures(buffer, -1, functionCode); + + // Get cache total size of level i. + attributeIndication = 2; + functionCode = calculate_ECAG_functionCode(attributeIndication, i, typeIndication); + levelTotalSize[i] = (unsigned int)call_getFeatures(buffer, -1, functionCode); + + // Get cache associativity of level i. + attributeIndication = 3; + functionCode = calculate_ECAG_functionCode(attributeIndication, i, typeIndication); + levelAssociativity[i] = (unsigned int)call_getFeatures(buffer, -1, functionCode); + + _Dcache_lineSize = _Dcache_lineSize == 0 ? levelLineSize[i] : _Dcache_lineSize; + _Icache_lineSize = _Icache_lineSize == 0 ? levelLineSize[i] : _Icache_lineSize; + lineSize_mismatch = lineSize_mismatch || (_Dcache_lineSize != levelLineSize[i]) + || (_Icache_lineSize != levelLineSize[i]); + } else { + levelLineSize[i] = 0; + } + } + + if (print_something) { + tty->cr(); + tty->print_cr("--------------------------------------"); + tty->print_cr("--- D/I-Cache Detail Information ---"); + tty->print_cr("--------------------------------------"); + if (lineSize_mismatch) { + tty->print_cr("WARNING: D/I-Cache line size mismatch!"); + } + for (unsigned int i = 0; (i < _max_cache_levels) && (levelProperties[i] != 0); i++) { + if (levelLineSize[i] > 0) { + tty->print_cr(" D/I-Cache Level %d: line size = %4d, total size = %6dKB, associativity = %2d", + i+1, levelLineSize[i], levelTotalSize[i]/(int)K, levelAssociativity[i]); + } + } + } + tty->cr(); + } + return; +} + +unsigned long VM_Version::z_SIGILL() { + unsigned long ZeroBuffer = 0; + unsigned long work; + asm( + " LA %[work],%[buffer] \n\t" // Load address of buffer. + " LARL 14,+6 \n\t" // Load address of faulting instruction. + " BCR 15,%[work] \n\t" // Branch into buffer, execute whatever is in there. + : [buffer] "+Q" (ZeroBuffer) /* outputs */ + , [work] "=&a" (work) /* outputs */ + : /* inputs */ + : "cc" /* clobbered */ + ); + return ZeroBuffer; +} + +unsigned long VM_Version::z_SIGSEGV() { + unsigned long ZeroBuffer = 0; + unsigned long work; + asm( + " LG %[work],%[buffer] \n\t" // Load zero address. + " STG %[work],0(,%[work])\n\t" // Store to address zero. + : [buffer] "+Q" (ZeroBuffer) /* outputs */ + , [work] "=&a" (work) /* outputs */ + : /* inputs */ + : "cc" /* clobbered */ + ); + return ZeroBuffer; +} + diff --git a/hotspot/src/cpu/s390/vm/vm_version_s390.hpp b/hotspot/src/cpu/s390/vm/vm_version_s390.hpp new file mode 100644 index 00000000000..7aa66bffc39 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/vm_version_s390.hpp @@ -0,0 +1,486 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_VM_VERSION_S390_HPP +#define CPU_S390_VM_VM_VERSION_S390_HPP + + +#include "runtime/globals_extension.hpp" +#include "runtime/vm_version.hpp" + +class VM_Version: public Abstract_VM_Version { + + protected: +// The following list contains the (approximate) announcement/availability +// dates of the many System z generations in existence as of now which +// implement the z/Architecture. +// z900: 2000-10 +// z990: 2003-06 +// z9: 2005-09 +// z10: 2007-04 +// z10: 2008-02 +// z196: 2010-08 +// ec12: 2012-09 +// z13: 2015-03 +// +// z/Architecture is the name of the 64-bit extension of the 31-bit s390 +// architecture. +// +// ---------------------------------------------- +// --- FeatureBitString Bits 0.. 63 (DW[0]) --- +// ---------------------------------------------- +// 11222334445566 +// 04826048260482604 +#define StoreFacilityListExtendedMask 0x0100000000000000UL // z9 +#define ETF2Mask 0x0000800000000000UL // z900 +#define CryptoFacilityMask 0x0000400000000000UL // z990 +#define LongDispFacilityMask 0x0000200000000000UL // z900 with microcode update +#define LongDispFacilityHighPerfMask 0x0000300000000000UL // z990 +#define HFPMultiplyAndAddMask 0x0000080000000000UL // z990 +#define ExtImmedFacilityMask 0x0000040000000000UL // z9 +#define ETF3Mask 0x0000020000000000UL // z990/z9 (?) +#define HFPUnnormalizedMask 0x0000010000000000UL // z9 +#define ETF2EnhancementMask 0x0000008000000000UL // z9 +#define StoreClockFastMask 0x0000004000000000UL // z9 +#define ParsingEnhancementsMask 0x0000002000000000UL // z10(?) +#define ETF3EnhancementMask 0x0000000200000000UL // z9 +#define ExtractCPUTimeMask 0x0000000100000000UL // z10 +#define CompareSwapStoreMask 0x00000000c0000000UL // z10 +#define GnrlInstrExtFacilityMask 0x0000000020000000UL // z10 +#define ExecuteExtensionsMask 0x0000000010000000UL // z10 +#define FPExtensionsMask 0x0000000004000000UL // z196 +#define FPSupportEnhancementsMask 0x0000000000400000UL // z10 +#define DecimalFloatingPointMask 0x0000000000300000UL // z10 +// z196 begin +#define DistinctOpndsMask 0x0000000000040000UL // z196 +#define FastBCRSerializationMask DistinctOpndsMask +#define HighWordMask DistinctOpndsMask +#define LoadStoreConditionalMask DistinctOpndsMask +#define PopulationCountMask DistinctOpndsMask +#define InterlockedAccess1Mask DistinctOpndsMask +// z196 end +// EC12 begin +#define DFPZonedConversionMask 0x0000000000008000UL // ec12 +#define MiscInstrExtMask 0x0000000000004000UL // ec12 +#define ExecutionHintMask MiscInstrExtMask +#define LoadAndTrapMask MiscInstrExtMask +#define ProcessorAssistMask MiscInstrExtMask +#define ConstrainedTxExecutionMask 0x0000000000002000UL // ec12 +#define InterlockedAccess2Mask 0x0000000000000800UL // ec12 +// EC12 end +// z13 begin +#define LoadStoreConditional2Mask 0x0000000000000400UL // z13 +#define CryptoExtension5Mask 0x0000000000000040UL // z13 +// z13 end +// Feature-DW[0] starts to fill up. Use of these masks is risky. +#define TestFeature1ImplMask 0x0000000000000001UL +#define TestFeature2ImplMask 0x0000000000000002UL +#define TestFeature4ImplMask 0x0000000000000004UL +#define TestFeature8ImplMask 0x0000000000000008UL +// ---------------------------------------------- +// --- FeatureBitString Bits 64..127 (DW[1]) --- +// ---------------------------------------------- +// 11111111 +// 66778889900011222 +// 48260482604826048 +#define TransactionalExecutionMask 0x0040000000000000UL // ec12 +#define CryptoExtension3Mask 0x0008000000000000UL // z196 +#define CryptoExtension4Mask 0x0004000000000000UL // z196 +#define DFPPackedConversionMask 0x0000800000000000UL // z13 +// ---------------------------------------------- +// --- FeatureBitString Bits 128..192 (DW[2]) --- +// ---------------------------------------------- +// 11111111111111111 +// 23344455666778889 +// 82604826048260482 +#define VectorFacilityMask 0x4000000000000000UL // z13, not avail in VM guest mode! + + enum { + _max_cache_levels = 8, // As limited by ECAG instruction. + _features_buffer_len = 4, // in DW + _code_buffer_len = 2*256 // For feature detection code. + }; + static unsigned long _features[_features_buffer_len]; + static unsigned long _cipher_features[_features_buffer_len]; + static unsigned long _msgdigest_features[_features_buffer_len]; + static unsigned int _nfeatures; + static unsigned int _ncipher_features; + static unsigned int _nmsgdigest_features; + static unsigned int _Dcache_lineSize; + static unsigned int _Icache_lineSize; + static bool _is_determine_features_test_running; + + static bool test_feature_bit(unsigned long* featureBuffer, int featureNum, unsigned int bufLen); + static void set_features_string(); + static void print_features_internal(const char* text, bool print_anyway=false); + static void determine_features(); + static long call_getFeatures(unsigned long* buffer, int buflen, int functionCode); + static void set_getFeatures(address entryPoint); + static int calculate_ECAG_functionCode(unsigned int attributeIndication, + unsigned int levelIndication, + unsigned int typeIndication); + + // Setting features via march=z900|z990|z9|z10|z196|ec12|z13|ztest commandline option. + static void reset_features(bool reset); + static void set_features_z900(bool reset = true); + static void set_features_z990(bool reset = true); + static void set_features_z9(bool reset = true); + static void set_features_z10(bool reset = true); + static void set_features_z196(bool reset = true); + static void set_features_ec12(bool reset = true); + static void set_features_z13(bool reset = true); + static void set_features_from(const char* march); + + // Get the CPU type from feature bit settings. + static bool is_z900() { return has_long_displacement() && !has_long_displacement_fast(); } + static bool is_z990() { return has_long_displacement_fast() && !has_extended_immediate(); } + static bool is_z9() { return has_extended_immediate() && !has_GnrlInstrExtensions(); } + static bool is_z10() { return has_GnrlInstrExtensions() && !has_DistinctOpnds(); } + static bool is_z196() { return has_DistinctOpnds() && !has_MiscInstrExt(); } + static bool is_ec12() { return has_MiscInstrExt() && !has_CryptoExt5(); } + static bool is_z13() { return has_CryptoExt5();} + + // Get information about cache line sizes. + // As of now and the foreseeable future, line size of all levels will be the same and 256. + static unsigned int Dcache_lineSize(unsigned int level = 0) { return _Dcache_lineSize; } + static unsigned int Icache_lineSize(unsigned int level = 0) { return _Icache_lineSize; } + + public: + + // Need to use nested class with unscoped enum. + // C++11 declaration "enum class Cipher { ... } is not supported. + class CipherMode { + public: + enum { + cipher = 0x00, + decipher = 0x80 + }; + }; + class Cipher { + public: + enum { // KM only!!! KMC uses different parmBlk sizes. + _Query = 0, + _DEA = 1, + _TDEA128 = 2, + _TDEA192 = 3, + _EncryptedDEA = 9, + _EncryptedDEA128 = 10, + _EncryptedDEA192 = 11, + _AES128 = 18, + _AES192 = 19, + _AES256 = 20, + _EnccryptedAES128 = 26, + _EnccryptedAES192 = 27, + _EnccryptedAES256 = 28, + _XTSAES128 = 50, + _XTSAES256 = 52, + _EncryptedXTSAES128 = 58, + _EncryptedXTSAES256 = 60, + _PRNG = 67, + _featureBits = 128, + + // Parameter block sizes (in bytes) for KM instruction. + _Query_parmBlk = 16, + _DEA_parmBlk = 8, + _TDEA128_parmBlk = 16, + _TDEA192_parmBlk = 24, + _EncryptedDEA_parmBlk = 32, + _EncryptedDEA128_parmBlk = 40, + _EncryptedDEA192_parmBlk = 48, + _AES128_parmBlk = 16, + _AES192_parmBlk = 24, + _AES256_parmBlk = 32, + _EnccryptedAES128_parmBlk = 48, + _EnccryptedAES192_parmBlk = 56, + _EnccryptedAES256_parmBlk = 64, + _XTSAES128_parmBlk = 32, + _XTSAES256_parmBlk = 48, + _EncryptedXTSAES128_parmBlk = 64, + _EncryptedXTSAES256_parmBlk = 80, + + // Parameter block sizes (in bytes) for KMC instruction. + _Query_parmBlk_C = 16, + _DEA_parmBlk_C = 16, + _TDEA128_parmBlk_C = 24, + _TDEA192_parmBlk_C = 32, + _EncryptedDEA_parmBlk_C = 40, + _EncryptedDEA128_parmBlk_C = 48, + _EncryptedDEA192_parmBlk_C = 56, + _AES128_parmBlk_C = 32, + _AES192_parmBlk_C = 40, + _AES256_parmBlk_C = 48, + _EnccryptedAES128_parmBlk_C = 64, + _EnccryptedAES192_parmBlk_C = 72, + _EnccryptedAES256_parmBlk_C = 80, + _XTSAES128_parmBlk_C = 32, + _XTSAES256_parmBlk_C = 48, + _EncryptedXTSAES128_parmBlk_C = 64, + _EncryptedXTSAES256_parmBlk_C = 80, + _PRNG_parmBlk_C = 32, + + // Data block sizes (in bytes). + _Query_dataBlk = 0, + _DEA_dataBlk = 8, + _TDEA128_dataBlk = 8, + _TDEA192_dataBlk = 8, + _EncryptedDEA_dataBlk = 8, + _EncryptedDEA128_dataBlk = 8, + _EncryptedDEA192_dataBlk = 8, + _AES128_dataBlk = 16, + _AES192_dataBlk = 16, + _AES256_dataBlk = 16, + _EnccryptedAES128_dataBlk = 16, + _EnccryptedAES192_dataBlk = 16, + _EnccryptedAES256_dataBlk = 16, + _XTSAES128_dataBlk = 16, + _XTSAES256_dataBlk = 16, + _EncryptedXTSAES128_dataBlk = 16, + _EncryptedXTSAES256_dataBlk = 16, + _PRNG_dataBlk = 8, + }; + }; + class MsgDigest { + public: + enum { + _Query = 0, + _SHA1 = 1, + _SHA256 = 2, + _SHA512 = 3, + _GHASH = 65, + _featureBits = 128, + + // Parameter block sizes (in bytes) for KIMD. + _Query_parmBlk_I = 16, + _SHA1_parmBlk_I = 20, + _SHA256_parmBlk_I = 32, + _SHA512_parmBlk_I = 64, + _GHASH_parmBlk_I = 32, + + // Parameter block sizes (in bytes) for KLMD. + _Query_parmBlk_L = 16, + _SHA1_parmBlk_L = 28, + _SHA256_parmBlk_L = 40, + _SHA512_parmBlk_L = 80, + + // Data block sizes (in bytes). + _Query_dataBlk = 0, + _SHA1_dataBlk = 64, + _SHA256_dataBlk = 64, + _SHA512_dataBlk = 128, + _GHASH_dataBlk = 16 + }; + }; + class MsgAuthent { + public: + enum { + _Query = 0, + _DEA = 1, + _TDEA128 = 2, + _TDEA192 = 3, + _EncryptedDEA = 9, + _EncryptedDEA128 = 10, + _EncryptedDEA192 = 11, + _AES128 = 18, + _AES192 = 19, + _AES256 = 20, + _EnccryptedAES128 = 26, + _EnccryptedAES192 = 27, + _EnccryptedAES256 = 28, + _featureBits = 128, + + _Query_parmBlk = 16, + _DEA_parmBlk = 16, + _TDEA128_parmBlk = 24, + _TDEA192_parmBlk = 32, + _EncryptedDEA_parmBlk = 40, + _EncryptedDEA128_parmBlk = 48, + _EncryptedDEA192_parmBlk = 56, + _AES128_parmBlk = 32, + _AES192_parmBlk = 40, + _AES256_parmBlk = 48, + _EnccryptedAES128_parmBlk = 64, + _EnccryptedAES192_parmBlk = 72, + _EnccryptedAES256_parmBlk = 80, + + _Query_dataBlk = 0, + _DEA_dataBlk = 8, + _TDEA128_dataBlk = 8, + _TDEA192_dataBlk = 8, + _EncryptedDEA_dataBlk = 8, + _EncryptedDEA128_dataBlk = 8, + _EncryptedDEA192_dataBlk = 8, + _AES128_dataBlk = 16, + _AES192_dataBlk = 16, + _AES256_dataBlk = 16, + _EnccryptedAES128_dataBlk = 16, + _EnccryptedAES192_dataBlk = 16, + _EnccryptedAES256_dataBlk = 16 + }; + }; + + // Initialization + static void initialize(); + static void print_features(); + static bool is_determine_features_test_running() { return _is_determine_features_test_running; } + + // CPU feature query functions + static bool has_StoreFacilityListExtended() { return (_features[0] & StoreFacilityListExtendedMask) == StoreFacilityListExtendedMask; } + static bool has_Crypto() { return (_features[0] & CryptoFacilityMask) == CryptoFacilityMask; } + static bool has_ETF2() { return (_features[0] & ETF2Mask) == ETF2Mask; } + static bool has_ETF3() { return (_features[0] & ETF3Mask) == ETF3Mask; } + static bool has_ETF2Enhancements() { return (_features[0] & ETF2EnhancementMask) == ETF2EnhancementMask; } + static bool has_ETF3Enhancements() { return (_features[0] & ETF3EnhancementMask) == ETF3EnhancementMask; } + static bool has_ParsingEnhancements() { return (_features[0] & ParsingEnhancementsMask) == ParsingEnhancementsMask; } + static bool has_long_displacement() { return (_features[0] & LongDispFacilityMask) == LongDispFacilityMask; } + static bool has_long_displacement_fast() { return (_features[0] & LongDispFacilityHighPerfMask) == LongDispFacilityHighPerfMask; } + static bool has_extended_immediate() { return (_features[0] & ExtImmedFacilityMask) == ExtImmedFacilityMask; } + static bool has_StoreClockFast() { return (_features[0] & StoreClockFastMask) == StoreClockFastMask; } + static bool has_ExtractCPUtime() { return (_features[0] & ExtractCPUTimeMask) == ExtractCPUTimeMask; } + static bool has_CompareSwapStore() { return (_features[0] & CompareSwapStoreMask) == CompareSwapStoreMask; } + + static bool has_HFPMultiplyAndAdd() { return (_features[0] & HFPMultiplyAndAddMask) == HFPMultiplyAndAddMask; } + static bool has_HFPUnnormalized() { return (_features[0] & HFPUnnormalizedMask) == HFPUnnormalizedMask; } + + // Make sure we don't run on older ... + static bool has_GnrlInstrExtensions() { guarantee((_features[0] & GnrlInstrExtFacilityMask) == GnrlInstrExtFacilityMask, "We no more support older than z10."); return true; } + static bool has_CompareBranch() { return has_GnrlInstrExtensions() && is_z10(); } // Only z10 benefits from these. + static bool has_CompareTrap() { return has_GnrlInstrExtensions(); } + static bool has_RelativeLoadStore() { return has_GnrlInstrExtensions(); } + static bool has_MultiplySingleImm32() { return has_GnrlInstrExtensions(); } + static bool has_Prefetch() { return has_GnrlInstrExtensions() && (AllocatePrefetchStyle > 0); } + static bool has_PrefetchRaw() { return has_GnrlInstrExtensions(); } + static bool has_MoveImmToMem() { return has_GnrlInstrExtensions(); } + static bool has_ExtractCPUAttributes() { return has_GnrlInstrExtensions(); } + static bool has_ExecuteExtensions() { return (_features[0] & ExecuteExtensionsMask) == ExecuteExtensionsMask; } + // Memory-immediate arithmetic instructions. There is no performance penalty in using them. + // Moreover, these memory-immediate instructions are quasi-atomic (>99.99%) on z10 + // and 100% atomic from z196 onwards, thanks to the specific operand serialization that comes new with z196. + static bool has_MemWithImmALUOps() { return has_GnrlInstrExtensions(); } + static bool has_AtomicMemWithImmALUOps() { return has_MemWithImmALUOps() && has_InterlockedAccessV1(); } + static bool has_FPExtensions() { return (_features[0] & FPExtensionsMask) == FPExtensionsMask; } + static bool has_FPSupportEnhancements() { return (_features[0] & FPSupportEnhancementsMask) == FPSupportEnhancementsMask; } + static bool has_DecimalFloatingPoint() { return (_features[0] & DecimalFloatingPointMask) == DecimalFloatingPointMask; } + static bool has_InterlockedAccessV1() { return (_features[0] & InterlockedAccess1Mask) == InterlockedAccess1Mask; } + static bool has_LoadAndALUAtomicV1() { return (_features[0] & InterlockedAccess1Mask) == InterlockedAccess1Mask; } + static bool has_PopCount() { return (_features[0] & PopulationCountMask) == PopulationCountMask; } + static bool has_LoadStoreConditional() { return (_features[0] & LoadStoreConditionalMask) == LoadStoreConditionalMask; } + static bool has_HighWordInstr() { return (_features[0] & HighWordMask) == HighWordMask; } + static bool has_FastSync() { return (_features[0] & FastBCRSerializationMask) == FastBCRSerializationMask; } + static bool has_DistinctOpnds() { return (_features[0] & DistinctOpndsMask) == DistinctOpndsMask; } + static bool has_CryptoExt3() { return (_features[1] & CryptoExtension3Mask) == CryptoExtension3Mask; } + static bool has_CryptoExt4() { return (_features[1] & CryptoExtension4Mask) == CryptoExtension4Mask; } + static bool has_DFPZonedConversion() { return (_features[0] & DFPZonedConversionMask) == DFPZonedConversionMask; } + static bool has_DFPPackedConversion() { return (_features[1] & DFPPackedConversionMask) == DFPPackedConversionMask; } + static bool has_MiscInstrExt() { return (_features[0] & MiscInstrExtMask) == MiscInstrExtMask; } + static bool has_ExecutionHint() { return (_features[0] & ExecutionHintMask) == ExecutionHintMask; } + static bool has_LoadAndTrap() { return (_features[0] & LoadAndTrapMask) == LoadAndTrapMask; } + static bool has_ProcessorAssist() { return (_features[0] & ProcessorAssistMask) == ProcessorAssistMask; } + static bool has_InterlockedAccessV2() { return (_features[0] & InterlockedAccess2Mask) == InterlockedAccess2Mask; } + static bool has_LoadAndALUAtomicV2() { return (_features[0] & InterlockedAccess2Mask) == InterlockedAccess2Mask; } + static bool has_TxMem() { return ((_features[1] & TransactionalExecutionMask) == TransactionalExecutionMask) && + ((_features[0] & ConstrainedTxExecutionMask) == ConstrainedTxExecutionMask); } + static bool has_CryptoExt5() { return (_features[0] & CryptoExtension5Mask) == CryptoExtension5Mask; } + static bool has_LoadStoreConditional2() { return (_features[0] & LoadStoreConditional2Mask) == LoadStoreConditional2Mask; } + static bool has_VectorFacility() { return (_features[2] & VectorFacilityMask) == VectorFacilityMask; } + + static bool has_TestFeatureImpl() { return (_features[0] & TestFeature1ImplMask) == TestFeature1ImplMask; } + static bool has_TestFeature1Impl() { return (_features[0] & TestFeature1ImplMask) == TestFeature1ImplMask; } + static bool has_TestFeature2Impl() { return (_features[0] & TestFeature2ImplMask) == TestFeature2ImplMask; } + static bool has_TestFeature4Impl() { return (_features[0] & TestFeature4ImplMask) == TestFeature4ImplMask; } + static bool has_TestFeature8Impl() { return (_features[0] & TestFeature8ImplMask) == TestFeature8ImplMask; } + static bool has_TestFeaturesImpl() { return has_TestFeature1Impl() || has_TestFeature2Impl() || has_TestFeature4Impl() || has_TestFeature8Impl(); } + + // Crypto features query functions. + static bool has_Crypto_AES128() { return has_Crypto() && test_feature_bit(&_cipher_features[0], Cipher::_AES128, Cipher::_featureBits); } + static bool has_Crypto_AES192() { return has_Crypto() && test_feature_bit(&_cipher_features[0], Cipher::_AES192, Cipher::_featureBits); } + static bool has_Crypto_AES256() { return has_Crypto() && test_feature_bit(&_cipher_features[0], Cipher::_AES256, Cipher::_featureBits); } + static bool has_Crypto_AES() { return has_Crypto_AES128() || has_Crypto_AES192() || has_Crypto_AES256(); } + + static bool has_Crypto_SHA1() { return has_Crypto() && test_feature_bit(&_msgdigest_features[0], MsgDigest::_SHA1, MsgDigest::_featureBits); } + static bool has_Crypto_SHA256() { return has_Crypto() && test_feature_bit(&_msgdigest_features[0], MsgDigest::_SHA256, MsgDigest::_featureBits); } + static bool has_Crypto_SHA512() { return has_Crypto() && test_feature_bit(&_msgdigest_features[0], MsgDigest::_SHA512, MsgDigest::_featureBits); } + static bool has_Crypto_GHASH() { return has_Crypto() && test_feature_bit(&_msgdigest_features[0], MsgDigest::_GHASH, MsgDigest::_featureBits); } + static bool has_Crypto_SHA() { return has_Crypto_SHA1() || has_Crypto_SHA256() || has_Crypto_SHA512() || has_Crypto_GHASH(); } + + // CPU feature setters (to force model-specific behaviour). Test/debugging only. + static void set_has_TestFeature1Impl() { _features[0] |= TestFeature1ImplMask; } + static void set_has_TestFeature2Impl() { _features[0] |= TestFeature2ImplMask; } + static void set_has_TestFeature4Impl() { _features[0] |= TestFeature4ImplMask; } + static void set_has_TestFeature8Impl() { _features[0] |= TestFeature8ImplMask; } + static void set_has_DecimalFloatingPoint() { _features[0] |= DecimalFloatingPointMask; } + static void set_has_FPSupportEnhancements() { _features[0] |= FPSupportEnhancementsMask; } + static void set_has_ExecuteExtensions() { _features[0] |= ExecuteExtensionsMask; } + static void set_has_MemWithImmALUOps() { _features[0] |= GnrlInstrExtFacilityMask; } + static void set_has_MoveImmToMem() { _features[0] |= GnrlInstrExtFacilityMask; } + static void set_has_Prefetch() { _features[0] |= GnrlInstrExtFacilityMask; } + static void set_has_MultiplySingleImm32() { _features[0] |= GnrlInstrExtFacilityMask; } + static void set_has_CompareBranch() { _features[0] |= GnrlInstrExtFacilityMask; } + static void set_has_CompareTrap() { _features[0] |= GnrlInstrExtFacilityMask; } + static void set_has_RelativeLoadStore() { _features[0] |= GnrlInstrExtFacilityMask; } + static void set_has_GnrlInstrExtensions() { _features[0] |= GnrlInstrExtFacilityMask; } + static void set_has_CompareSwapStore() { _features[0] |= CompareSwapStoreMask; } + static void set_has_HFPMultiplyAndAdd() { _features[0] |= HFPMultiplyAndAddMask; } + static void set_has_HFPUnnormalized() { _features[0] |= HFPUnnormalizedMask; } + static void set_has_ExtractCPUtime() { _features[0] |= ExtractCPUTimeMask; } + static void set_has_StoreClockFast() { _features[0] |= StoreClockFastMask; } + static void set_has_extended_immediate() { _features[0] |= ExtImmedFacilityMask; } + static void set_has_long_displacement_fast() { _features[0] |= LongDispFacilityHighPerfMask; } + static void set_has_long_displacement() { _features[0] |= LongDispFacilityMask; } + static void set_has_ETF2() { _features[0] |= ETF2Mask; } + static void set_has_ETF3() { _features[0] |= ETF3Mask; } + static void set_has_ETF2Enhancements() { _features[0] |= ETF2EnhancementMask; } + static void set_has_ETF3Enhancements() { _features[0] |= ETF3EnhancementMask; } + static void set_has_Crypto() { _features[0] |= CryptoFacilityMask; } + static void set_has_StoreFacilityListExtended() { _features[0] |= StoreFacilityListExtendedMask; } + + static void set_has_InterlockedAccessV1() { _features[0] |= InterlockedAccess1Mask; } + static void set_has_PopCount() { _features[0] |= PopulationCountMask; } + static void set_has_LoadStoreConditional() { _features[0] |= LoadStoreConditionalMask; } + static void set_has_HighWordInstr() { _features[0] |= HighWordMask; } + static void set_has_FastSync() { _features[0] |= FastBCRSerializationMask; } + static void set_has_DistinctOpnds() { _features[0] |= DistinctOpndsMask; } + static void set_has_FPExtensions() { _features[0] |= FPExtensionsMask; } + static void set_has_MiscInstrExt() { _features[0] |= MiscInstrExtMask; } + static void set_has_ProcessorAssist() { _features[0] |= ProcessorAssistMask; } + static void set_has_InterlockedAccessV2() { _features[0] |= InterlockedAccess2Mask; } + static void set_has_LoadAndALUAtomicV2() { _features[0] |= InterlockedAccess2Mask; } + static void set_has_TxMem() { _features[0] |= ConstrainedTxExecutionMask; _features[1] |= TransactionalExecutionMask; } + static void set_has_CryptoExt3() { _features[1] |= CryptoExtension3Mask; } + static void set_has_CryptoExt4() { _features[1] |= CryptoExtension4Mask; } + static void set_has_LoadStoreConditional2() { _features[0] |= LoadStoreConditional2Mask; } + static void set_has_CryptoExt5() { _features[0] |= CryptoExtension5Mask; } + static void set_has_VectorFacility() { _features[2] |= VectorFacilityMask; } + + // Assembler testing. + static void allow_all(); + static void revert(); + + // Generate trapping instructions into C-code. + // Sometimes helpful for debugging. + static unsigned long z_SIGILL(); + static unsigned long z_SIGSEGV(); +}; + +#endif // CPU_S390_VM_VM_VERSION_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/vmreg_s390.cpp b/hotspot/src/cpu/s390/vm/vmreg_s390.cpp new file mode 100644 index 00000000000..9ef53ae1e9f --- /dev/null +++ b/hotspot/src/cpu/s390/vm/vmreg_s390.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "code/vmreg.hpp" + +void VMRegImpl::set_regName() { + // Not clear why we have this duplication (triplication?) + Register reg = ::as_Register(0); + int i; + for (i = 0; i < ConcreteRegisterImpl::max_gpr;) { + regName[i++] = reg->name(); + regName[i++] = reg->name(); + reg = reg->successor(); + } + + FloatRegister freg = ::as_FloatRegister(0); + for (; i < ConcreteRegisterImpl::max_fpr;) { + regName[i++] = freg->name(); + regName[i++] = freg->name(); + freg = freg->successor(); + } + for (; i < ConcreteRegisterImpl::number_of_registers; i ++) { + regName[i] = "NON-GPR-XMM"; + } +} diff --git a/hotspot/src/cpu/s390/vm/vmreg_s390.hpp b/hotspot/src/cpu/s390/vm/vmreg_s390.hpp new file mode 100644 index 00000000000..d417f2718f3 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/vmreg_s390.hpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_VMREG_S390_HPP +#define CPU_S390_VM_VMREG_S390_HPP + +inline bool is_Register() { + return (unsigned int)value() < (unsigned int)ConcreteRegisterImpl::max_gpr; +} + +inline bool is_FloatRegister() { + return value() >= ConcreteRegisterImpl::max_gpr && + value() < ConcreteRegisterImpl::max_fpr; +} + +inline Register as_Register() { + assert(is_Register() && is_even(value()), "even-aligned GPR name"); + return ::as_Register(value() >> 1); +} + +inline FloatRegister as_FloatRegister() { + assert(is_FloatRegister() && is_even(value()), "must be"); + return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1); +} + +inline bool is_concrete() { + assert(is_reg(), "must be"); + return is_even(value()); +} + +#endif // CPU_S390_VM_VMREG_S390_HPP diff --git a/hotspot/src/cpu/s390/vm/vmreg_s390.inline.hpp b/hotspot/src/cpu/s390/vm/vmreg_s390.inline.hpp new file mode 100644 index 00000000000..b708ac365b0 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/vmreg_s390.inline.hpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_S390_VM_VMREG_S390_INLINE_HPP +#define CPU_S390_VM_VMREG_S390_INLINE_HPP + +inline VMReg RegisterImpl::as_VMReg() { + if (this == noreg) { + return VMRegImpl::Bad(); + } + return VMRegImpl::as_VMReg(encoding() << 1); +} + +inline VMReg FloatRegisterImpl::as_VMReg() { + return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr); +} + +inline VMReg ConditionRegisterImpl::as_VMReg() { + return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_fpr); +} + +#endif // CPU_S390_VM_VMREG_S390_INLINE_HPP diff --git a/hotspot/src/cpu/s390/vm/vtableStubs_s390.cpp b/hotspot/src/cpu/s390/vm/vtableStubs_s390.cpp new file mode 100644 index 00000000000..c2c3ab246b8 --- /dev/null +++ b/hotspot/src/cpu/s390/vm/vtableStubs_s390.cpp @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/vtableStubs.hpp" +#include "interp_masm_s390.hpp" +#include "memory/resourceArea.hpp" +#include "oops/instanceKlass.hpp" +#include "oops/klassVtable.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_s390.inline.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + +// Machine-dependent part of VtableStubs: create vtableStub of correct +// size and initialize its code. + +#define __ masm-> + +#ifndef PRODUCT +extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); +#endif + +// Used by compiler only; may use only caller saved, non-argument registers. +VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { + + const int code_length = VtableStub::pd_code_size_limit(true); + VtableStub *s = new(code_length) VtableStub(true, vtable_index); + if (s == NULL) { // Indicates OOM In the code cache. + return NULL; + } + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), code_length); + MacroAssembler *masm = new MacroAssembler(&cb); + address start_pc; + int padding_bytes = 0; + +#if (!defined(PRODUCT) && defined(COMPILER2)) + if (CountCompiledCalls) { + // Count unused bytes + // worst case actual size + padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true); + + // Use generic emitter for direct memory increment. + // Abuse Z_method as scratch register for generic emitter. + // It is loaded further down anyway before it is first used. + __ add2mem_32(Address(Z_R1_scratch), 1, Z_method); + } +#endif + + assert(VtableStub::receiver_location() == Z_R2->as_VMReg(), "receiver expected in Z_ARG1"); + + // Get receiver klass. + // Must do an explicit check if implicit checks are disabled. + address npe_addr = __ pc(); // npe == NULL ptr exception + __ null_check(Z_ARG1, Z_R1_scratch, oopDesc::klass_offset_in_bytes()); + const Register rcvr_klass = Z_R1_scratch; + __ load_klass(rcvr_klass, Z_ARG1); + + // Set method (in case of interpreted method), and destination address. + int entry_offset = in_bytes(InstanceKlass::vtable_start_offset()) + + vtable_index * vtableEntry::size_in_bytes(); + +#ifndef PRODUCT + if (DebugVtables) { + Label L; + // Check offset vs vtable length. + const Register vtable_idx = Z_R0_scratch; + + // Count unused bytes. + // worst case actual size + padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(vtable_idx, vtable_index*vtableEntry::size_in_bytes(), true); + + assert(Immediate::is_uimm12(in_bytes(InstanceKlass::vtable_length_offset())), "disp to large"); + __ z_cl(vtable_idx, in_bytes(InstanceKlass::vtable_length_offset()), rcvr_klass); + __ z_brl(L); + __ z_lghi(Z_ARG3, vtable_index); // Debug code, don't optimize. + __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), Z_ARG1, Z_ARG3, false); + // Count unused bytes (assume worst case here). + padding_bytes += 12; + __ bind(L); + } +#endif + + int v_off = entry_offset + vtableEntry::method_offset_in_bytes(); + + // Duplicate safety code from enc_class Java_Dynamic_Call_dynTOC. + if (Displacement::is_validDisp(v_off)) { + __ z_lg(Z_method/*method oop*/, v_off, rcvr_klass/*class oop*/); + // Account for the load_const in the else path. + padding_bytes += __ load_const_size(); + } else { + // Worse case, offset does not fit in displacement field. + __ load_const(Z_method, v_off); // Z_method temporarily holds the offset value. + __ z_lg(Z_method/*method oop*/, 0, Z_method/*method offset*/, rcvr_klass/*class oop*/); + } + +#ifndef PRODUCT + if (DebugVtables) { + Label L; + __ z_ltgr(Z_method, Z_method); + __ z_brne(L); + __ stop("Vtable entry is ZERO",102); + __ bind(L); + } +#endif + + address ame_addr = __ pc(); // ame = abstract method error + + // Must do an explicit check if implicit checks are disabled. + __ null_check(Z_method, Z_R1_scratch, in_bytes(Method::from_compiled_offset())); + __ z_lg(Z_R1_scratch, in_bytes(Method::from_compiled_offset()), Z_method); + __ z_br(Z_R1_scratch); + + masm->flush(); + + s->set_exception_points(npe_addr, ame_addr); + + return s; +} + +VtableStub* VtableStubs::create_itable_stub(int vtable_index) { + const int code_length = VtableStub::pd_code_size_limit(false); + VtableStub *s = new(code_length) VtableStub(false, vtable_index); + if (s == NULL) { // Indicates OOM in the code cache. + return NULL; + } + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), code_length); + MacroAssembler *masm = new MacroAssembler(&cb); + address start_pc; + int padding_bytes = 0; + +#if (!defined(PRODUCT) && defined(COMPILER2)) + if (CountCompiledCalls) { + // Count unused bytes + // worst case actual size + padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true); + + // Use generic emitter for direct memory increment. + // Use Z_tmp_1 as scratch register for generic emitter. + __ add2mem_32((Z_R1_scratch), 1, Z_tmp_1); + } +#endif + + assert(VtableStub::receiver_location() == Z_R2->as_VMReg(), "receiver expected in Z_ARG1"); + + // Entry arguments: + // Z_method: Interface + // Z_ARG1: Receiver + const Register rcvr_klass = Z_tmp_1; // Used to compute itable_entry_addr. + // Use extra reg to avoid re-load. + const Register vtable_len = Z_tmp_2; // Used to compute itable_entry_addr. + const Register itable_entry_addr = Z_R1_scratch; + const Register itable_interface = Z_R0_scratch; + + // Get receiver klass. + // Must do an explicit check if implicit checks are disabled. + address npe_addr = __ pc(); // npe == NULL ptr exception + __ null_check(Z_ARG1, Z_R1_scratch, oopDesc::klass_offset_in_bytes()); + __ load_klass(rcvr_klass, Z_ARG1); + + // Load start of itable entries into itable_entry. + __ z_llgf(vtable_len, Address(rcvr_klass, InstanceKlass::vtable_length_offset())); + __ z_sllg(vtable_len, vtable_len, exact_log2(vtableEntry::size_in_bytes())); + + // Loop over all itable entries until desired interfaceOop(Rinterface) found. + const int vtable_base_offset = in_bytes(InstanceKlass::vtable_start_offset()); + // Count unused bytes. + start_pc = __ pc(); + __ add2reg_with_index(itable_entry_addr, vtable_base_offset + itableOffsetEntry::interface_offset_in_bytes(), rcvr_klass, vtable_len); + padding_bytes += 20 - (__ pc() - start_pc); + + const int itable_offset_search_inc = itableOffsetEntry::size() * wordSize; + Label search; + __ bind(search); + + // Handle IncompatibleClassChangeError in itable stubs. + // If the entry is NULL then we've reached the end of the table + // without finding the expected interface, so throw an exception. + NearLabel throw_icce; + __ load_and_test_long(itable_interface, Address(itable_entry_addr)); + __ z_bre(throw_icce); // Throw the exception out-of-line. + // Count unused bytes. + start_pc = __ pc(); + __ add2reg(itable_entry_addr, itable_offset_search_inc); + padding_bytes += 20 - (__ pc() - start_pc); + __ z_cgr(itable_interface, Z_method); + __ z_brne(search); + + // Entry found. Itable_entry_addr points to the subsequent entry (itable_offset_search_inc too far). + // Get offset of vtable for interface. + + const Register vtable_offset = Z_R1_scratch; + const Register itable_method = rcvr_klass; // Calculated before. + + const int vtable_offset_offset = (itableOffsetEntry::offset_offset_in_bytes() - + itableOffsetEntry::interface_offset_in_bytes()) - + itable_offset_search_inc; + __ z_llgf(vtable_offset, vtable_offset_offset, itable_entry_addr); + + // Compute itableMethodEntry and get method and entry point for compiler. + const int method_offset = (itableMethodEntry::size() * wordSize * vtable_index) + + itableMethodEntry::method_offset_in_bytes(); + + __ z_lg(Z_method, method_offset, vtable_offset, itable_method); + +#ifndef PRODUCT + if (DebugVtables) { + Label ok1; + __ z_ltgr(Z_method, Z_method); + __ z_brne(ok1); + __ stop("method is null",103); + __ bind(ok1); + } +#endif + + address ame_addr = __ pc(); + // Must do an explicit check if implicit checks are disabled. + if (!ImplicitNullChecks) { + __ compare64_and_branch(Z_method, (intptr_t) 0, Assembler::bcondEqual, throw_icce); + } + __ z_lg(Z_R1_scratch, in_bytes(Method::from_compiled_offset()), Z_method); + __ z_br(Z_R1_scratch); + + // Handle IncompatibleClassChangeError in itable stubs. + __ bind(throw_icce); + // Count unused bytes + // worst case actual size + // We force resolving of the call site by jumping to + // the "handle wrong method" stub, and so let the + // interpreter runtime do all the dirty work. + padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::get_handle_wrong_method_stub(), true); + __ z_br(Z_R1_scratch); + + masm->flush(); + + s->set_exception_points(npe_addr, ame_addr); + return s; +} + +// In order to tune these parameters, run the JVM with VM options +// +PrintMiscellaneous and +WizardMode to see information about +// actual itable stubs. Run it with -Xmx31G -XX:+UseCompressedOops. +int VtableStub::pd_code_size_limit(bool is_vtable_stub) { + int size = DebugVtables ? 216 : 0; + if (CountCompiledCalls) { + size += 6 * 4; + } + if (is_vtable_stub) { + size += 52; + } else { + size += 104; + } + if (Universe::narrow_klass_base() != NULL) { + size += 16; // A guess. + } + return size; +} + +int VtableStub::pd_code_alignment() { + const unsigned int icache_line_size = 32; + return icache_line_size; +} diff --git a/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp b/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp index 9cef4721b92..10e6799f21e 100644 --- a/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp +++ b/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -64,6 +64,7 @@ define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoScheduling, true); define_pd_global(bool, OptoRegScheduling, false); define_pd_global(bool, SuperWordLoopUnrollAnalysis, false); +define_pd_global(bool, IdealizeClearArrayNode, true); #ifdef _LP64 // We need to make sure that all generated code is within diff --git a/hotspot/src/cpu/sparc/vm/frame_sparc.cpp b/hotspot/src/cpu/sparc/vm/frame_sparc.cpp index f1664a2b6df..c666f1eca47 100644 --- a/hotspot/src/cpu/sparc/vm/frame_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/frame_sparc.cpp @@ -279,7 +279,13 @@ bool frame::safe_for_sender(JavaThread *thread) { } if (sender.is_entry_frame()) { - return sender.is_entry_frame_valid(thread); + // Validate the JavaCallWrapper an entry frame must have + + address jcw = (address)sender.entry_frame_call_wrapper(); + + bool jcw_safe = (jcw <= thread->stack_base()) && (jcw > sender_fp); + + return jcw_safe; } // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size diff --git a/hotspot/src/cpu/sparc/vm/globals_sparc.hpp b/hotspot/src/cpu/sparc/vm/globals_sparc.hpp index fe26b39d736..ae9ba7e467f 100644 --- a/hotspot/src/cpu/sparc/vm/globals_sparc.hpp +++ b/hotspot/src/cpu/sparc/vm/globals_sparc.hpp @@ -44,6 +44,7 @@ define_pd_global(bool, ImplicitNullChecks, true); // Generate code for define_pd_global(bool, TrapBasedNullChecks, false); // Not needed on sparc. define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast +define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment. define_pd_global(intx, CodeEntryAlignment, 32); // The default setting 16/16 seems to work best. // (For _228_jack 16/16 is 2% better than 4/4, 16/4, 32/32, 32/16, or 16/32.) diff --git a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp index e9fee0c9a51..51f793928ab 100644 --- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp @@ -457,9 +457,10 @@ void VM_Version::revert() { unsigned int VM_Version::calc_parallel_worker_threads() { unsigned int result; - if (is_M_series()) { - // for now, use same gc thread calculation for M-series as for niagara-plus - // in future, we may want to tweak parameters for nof_parallel_worker_thread + if (is_M_series() || is_S_series()) { + // for now, use same gc thread calculation for M-series and S-series as for + // niagara-plus. In future, we may want to tweak parameters for + // nof_parallel_worker_thread result = nof_parallel_worker_threads(5, 16, 8); } else if (is_niagara_plus()) { result = nof_parallel_worker_threads(5, 16, 8); @@ -483,6 +484,9 @@ int VM_Version::parse_features(const char* implementation) { } else if (strstr(impl, "SPARC-M") != NULL) { // M-series SPARC is based on T-series. features |= (M_family_m | T_family_m); + } else if (strstr(impl, "SPARC-S") != NULL) { + // S-series SPARC is based on T-series. + features |= (S_family_m | T_family_m); } else if (strstr(impl, "SPARC-T") != NULL) { features |= T_family_m; if (strstr(impl, "SPARC-T1") != NULL) { diff --git a/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp b/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp index 6c3b72b23cf..f5cb00d75a7 100644 --- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp +++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp @@ -49,14 +49,15 @@ protected: cbcond_instructions = 12, sparc64_family = 13, M_family = 14, - T_family = 15, - T1_model = 16, - sparc5_instructions = 17, - aes_instructions = 18, - sha1_instruction = 19, - sha256_instruction = 20, - sha512_instruction = 21, - crc32c_instruction = 22 + S_family = 15, + T_family = 16, + T1_model = 17, + sparc5_instructions = 18, + aes_instructions = 19, + sha1_instruction = 20, + sha256_instruction = 21, + sha512_instruction = 22, + crc32c_instruction = 23 }; enum Feature_Flag_Set { @@ -78,6 +79,7 @@ protected: cbcond_instructions_m = 1 << cbcond_instructions, sparc64_family_m = 1 << sparc64_family, M_family_m = 1 << M_family, + S_family_m = 1 << S_family, T_family_m = 1 << T_family, T1_model_m = 1 << T1_model, sparc5_instructions_m = 1 << sparc5_instructions, @@ -105,6 +107,7 @@ protected: // Returns true if the platform is in the niagara line (T series) static bool is_M_family(int features) { return (features & M_family_m) != 0; } + static bool is_S_family(int features) { return (features & S_family_m) != 0; } static bool is_T_family(int features) { return (features & T_family_m) != 0; } static bool is_niagara() { return is_T_family(_features); } #ifdef ASSERT @@ -153,6 +156,7 @@ public: static bool is_niagara_plus() { return is_T_family(_features) && !is_T1_model(_features); } static bool is_M_series() { return is_M_family(_features); } + static bool is_S_series() { return is_S_family(_features); } static bool is_T4() { return is_T_family(_features) && has_cbcond(); } static bool is_T7() { return is_T_family(_features) && has_sparc5_instr(); } diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.cpp b/hotspot/src/cpu/x86/vm/assembler_x86.cpp index d00802e2708..eb016ad6844 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp @@ -2461,6 +2461,7 @@ void Assembler::movdqu(Address dst, XMMRegister src) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.reset_is_clear_context(); simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x7F); emit_operand(src, dst); @@ -2490,6 +2491,7 @@ void Assembler::vmovdqu(Address dst, XMMRegister src) { InstructionMark im(this); InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.reset_is_clear_context(); // swap src<->dst for encoding assert(src != xnoreg, "sanity"); vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); @@ -2590,6 +2592,7 @@ void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, int vect InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.reset_is_clear_context(); attributes.set_embedded_opmask_register_specifier(mask); attributes.set_is_evex_instruction(); vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); @@ -2623,6 +2626,7 @@ void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) { InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.reset_is_clear_context(); attributes.set_is_evex_instruction(); vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x7F); @@ -2655,6 +2659,7 @@ void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) { InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.reset_is_clear_context(); attributes.set_is_evex_instruction(); vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x7F); @@ -2794,6 +2799,7 @@ void Assembler::movsd(Address dst, XMMRegister src) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + attributes.reset_is_clear_context(); attributes.set_rex_vex_w_reverted(); simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int8(0x11); @@ -2823,6 +2829,7 @@ void Assembler::movss(Address dst, XMMRegister src) { InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + attributes.reset_is_clear_context(); simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x11); emit_operand(src, dst); @@ -3291,6 +3298,15 @@ void Assembler::vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, i emit_int8(imm8); } +void Assembler::vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) { + assert(VM_Version::supports_avx(), ""); + InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int8(0x06); + emit_int8(0xC0 | encode); + emit_int8(imm8); +} + void Assembler::pause() { emit_int8((unsigned char)0xF3); @@ -3362,6 +3378,7 @@ void Assembler::evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Addre InstructionMark im(this); InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.reset_is_clear_context(); attributes.set_embedded_opmask_register_specifier(mask); attributes.set_is_evex_instruction(); int dst_enc = kdst->encoding(); @@ -3384,6 +3401,7 @@ void Assembler::evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMReg assert(is_vector_masking(), ""); assert(VM_Version::supports_avx512vlbw(), ""); InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); + attributes.reset_is_clear_context(); attributes.set_embedded_opmask_register_specifier(mask); attributes.set_is_evex_instruction(); int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); @@ -3423,6 +3441,7 @@ void Assembler::evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Addre InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_reg_mask */ false, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.reset_is_clear_context(); attributes.set_embedded_opmask_register_specifier(mask); attributes.set_is_evex_instruction(); vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -3493,6 +3512,7 @@ void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int assert(VM_Version::supports_evex(), ""); InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_is_evex_instruction(); + attributes.reset_is_clear_context(); int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x76); emit_int8((unsigned char)(0xC0 | encode)); @@ -3503,6 +3523,7 @@ void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vect InstructionMark im(this); InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); + attributes.reset_is_clear_context(); attributes.set_is_evex_instruction(); int dst_enc = kdst->encoding(); vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -3532,6 +3553,7 @@ void Assembler::vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_evex(), ""); InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.reset_is_clear_context(); attributes.set_is_evex_instruction(); int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int8(0x29); @@ -3543,6 +3565,7 @@ void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vect assert(VM_Version::supports_evex(), ""); InstructionMark im(this); InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.reset_is_clear_context(); attributes.set_is_evex_instruction(); attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit); int dst_enc = kdst->encoding(); @@ -3763,6 +3786,7 @@ void Assembler::evpmovwb(Address dst, KRegister mask, XMMRegister src, int vecto InstructionMark im(this); InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit); + attributes.reset_is_clear_context(); attributes.set_embedded_opmask_register_specifier(mask); attributes.set_is_evex_instruction(); vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes); @@ -6208,6 +6232,7 @@ void Assembler::vextracti128(Address dst, XMMRegister src, uint8_t imm8) { InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); + attributes.reset_is_clear_context(); vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x39); emit_operand(src, dst); @@ -6238,6 +6263,7 @@ void Assembler::vextracti32x4(Address dst, XMMRegister src, uint8_t imm8) { InstructionMark im(this); InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); + attributes.reset_is_clear_context(); vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x39); emit_operand(src, dst); @@ -6298,6 +6324,7 @@ void Assembler::vextractf128(Address dst, XMMRegister src, uint8_t imm8) { InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); + attributes.reset_is_clear_context(); vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x19); emit_operand(src, dst); @@ -6328,6 +6355,7 @@ void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) { InstructionMark im(this); InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit); + attributes.reset_is_clear_context(); vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x19); emit_operand(src, dst); @@ -6371,6 +6399,7 @@ void Assembler::vextractf64x4(Address dst, XMMRegister src, uint8_t imm8) { InstructionMark im(this); InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */ EVEX_64bit); + attributes.reset_is_clear_context(); vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x1B); emit_operand(src, dst); @@ -7181,7 +7210,9 @@ void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, boo // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024 byte4 |= ((_attributes->get_vector_len())& 0x3) << 5; // last is EVEX.z for zero/merge actions - byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0); + if (_attributes->is_no_reg_mask() == false) { + byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0); + } emit_int8(byte4); } @@ -7337,7 +7368,7 @@ void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop emit_int8((unsigned char)(0xF & cop)); } -void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) { +void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) { assert(VM_Version::supports_avx(), ""); assert(!VM_Version::supports_evex(), ""); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); @@ -7348,6 +7379,15 @@ void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMM emit_int8((unsigned char)(0xF0 & src2_enc<<4)); } +void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) { + assert(VM_Version::supports_avx2(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int8((unsigned char)0x02); + emit_int8((unsigned char)(0xC0 | encode)); + emit_int8((unsigned char)imm8); +} + void Assembler::shlxl(Register dst, Register src1, Register src2) { assert(VM_Version::supports_bmi2(), ""); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false); diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.hpp b/hotspot/src/cpu/x86/vm/assembler_x86.hpp index f787ec5533a..ada59ae288c 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp @@ -1550,6 +1550,7 @@ private: void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len); void vpermq(XMMRegister dst, XMMRegister src, int imm8); void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); + void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); void pause(); @@ -2105,7 +2106,8 @@ private: // AVX support for vectorized conditional move (double). The following two instructions used only coupled. void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len); - void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len); + void blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len); + void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len); protected: // Next instructions require address alignment 16 bytes SSE mode. @@ -2139,7 +2141,7 @@ public: _input_size_in_bits(Assembler::EVEX_NObit), _is_evex_instruction(false), _evex_encoding(0), - _is_clear_context(false), + _is_clear_context(true), _is_extended_context(false), _current_assembler(NULL), _embedded_opmask_register_specifier(1) { // hard code k1, it will be initialized for now @@ -2205,7 +2207,7 @@ public: void set_evex_encoding(int value) { _evex_encoding = value; } // Set the Evex.Z field to be used to clear all non directed XMM/YMM/ZMM components - void set_is_clear_context(void) { _is_clear_context = true; } + void reset_is_clear_context(void) { _is_clear_context = false; } // Map back to current asembler so that we can manage object level assocation void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; } diff --git a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp index afc815c6739..43e1e00e485 100644 --- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp @@ -3187,7 +3187,6 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { if (flags & LIR_OpArrayCopy::length_positive_check) { __ testl(length, length); __ jcc(Assembler::less, *stub->entry()); - __ jcc(Assembler::zero, *stub->continuation()); } #ifdef _LP64 diff --git a/hotspot/src/cpu/x86/vm/c2_globals_x86.hpp b/hotspot/src/cpu/x86/vm/c2_globals_x86.hpp index 311ae27c8d0..c193029ea73 100644 --- a/hotspot/src/cpu/x86/vm/c2_globals_x86.hpp +++ b/hotspot/src/cpu/x86/vm/c2_globals_x86.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -83,6 +83,7 @@ define_pd_global(bool, OptoScheduling, false); define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoRegScheduling, true); define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); +define_pd_global(bool, IdealizeClearArrayNode, true); define_pd_global(intx, ReservedCodeCacheSize, 48*M); define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); diff --git a/hotspot/src/cpu/x86/vm/frame_x86.cpp b/hotspot/src/cpu/x86/vm/frame_x86.cpp index 0a636f44ec0..9cbc29af4c9 100644 --- a/hotspot/src/cpu/x86/vm/frame_x86.cpp +++ b/hotspot/src/cpu/x86/vm/frame_x86.cpp @@ -140,6 +140,10 @@ bool frame::safe_for_sender(JavaThread *thread) { } sender_sp = _unextended_sp + _cb->frame_size(); + // Is sender_sp safe? + if ((address)sender_sp >= thread->stack_base()) { + return false; + } sender_unextended_sp = sender_sp; // On Intel the return_address is always the word on the stack sender_pc = (address) *(sender_sp-1); @@ -199,8 +203,15 @@ bool frame::safe_for_sender(JavaThread *thread) { } // construct the potential sender + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); - return sender.is_entry_frame_valid(thread); + + // Validate the JavaCallWrapper an entry frame must have + address jcw = (address)sender.entry_frame_call_wrapper(); + + bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp()); + + return jcw_safe; } CompiledMethod* nm = sender_blob->as_compiled_method_or_null(); diff --git a/hotspot/src/cpu/x86/vm/globals_x86.hpp b/hotspot/src/cpu/x86/vm/globals_x86.hpp index 925ae225476..9081d0ce6b5 100644 --- a/hotspot/src/cpu/x86/vm/globals_x86.hpp +++ b/hotspot/src/cpu/x86/vm/globals_x86.hpp @@ -38,6 +38,7 @@ define_pd_global(bool, ImplicitNullChecks, true); // Generate code for im define_pd_global(bool, TrapBasedNullChecks, false); // Not needed on x86. define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast +define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment. // See 4827828 for this change. There is no globals_core_i486.hpp. I can't // assign a different value for C2 without touching a number of files. Use // #ifdef to minimize the change as it's late in Mantis. -- FIXME. diff --git a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp index 150cf6d8301..99286eaa9da 100644 --- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp @@ -4309,6 +4309,15 @@ void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int v } } +void MacroAssembler::vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) { + if (reachable(src)) { + Assembler::vpand(dst, nds, as_Address(src), vector_len); + } else { + lea(rscratch1, src); + Assembler::vpand(dst, nds, Address(rscratch1, 0), vector_len); + } +} + void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src) { int dst_enc = dst->encoding(); int src_enc = src->encoding(); diff --git a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp index 382b0011882..02fb401511e 100644 --- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp @@ -943,6 +943,23 @@ class MacroAssembler: public Assembler { bool multi_block, XMMRegister shuf_mask); #endif +#ifdef _LP64 + private: + void sha512_AVX2_one_round_compute(Register old_h, Register a, Register b, Register c, Register d, + Register e, Register f, Register g, Register h, int iteration); + + void sha512_AVX2_one_round_and_schedule(XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, + Register a, Register b, Register c, Register d, Register e, Register f, + Register g, Register h, int iteration); + + void addmq(int disp, Register r1, Register r2); + public: + void sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, + XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, + Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block, + XMMRegister shuf_mask); +#endif + void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0, XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask, Register buf, Register state, Register ofs, Register limit, Register rsp, @@ -1177,6 +1194,10 @@ public: void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } + void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); } + void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len); + void vpbroadcastw(XMMRegister dst, XMMRegister src); void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); diff --git a/hotspot/src/cpu/x86/vm/macroAssembler_x86_sha.cpp b/hotspot/src/cpu/x86/vm/macroAssembler_x86_sha.cpp index b8af7c33158..37d93a48f21 100644 --- a/hotspot/src/cpu/x86/vm/macroAssembler_x86_sha.cpp +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86_sha.cpp @@ -674,6 +674,11 @@ void MacroAssembler::addm(int disp, Register r1, Register r2) { movl(Address(r1, disp), r2); } +void MacroAssembler::addmq(int disp, Register r1, Register r2) { + addq(r2, Address(r1, disp)); + movq(Address(r1, disp), r2); +} + void MacroAssembler::sha256_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, Register buf, Register state, Register ofs, Register limit, Register rsp, @@ -1026,4 +1031,488 @@ bind(compute_size1); bind(compute_size_end1); } } + +void MacroAssembler::sha512_AVX2_one_round_compute(Register old_h, Register a, Register b, Register c, + Register d, Register e, Register f, Register g, Register h, + int iteration) +{ + + const Register& y0 = r13; + const Register& y1 = r14; + const Register& y2 = r15; +#ifdef _WIN64 + const Register& y3 = rcx; +#else + const Register& y3 = rdi; +#endif + const Register& T1 = r12; + + if (iteration % 4 > 0) { + addq(old_h, y2); //h = k + w + h + S0 + S1 + CH = t1 + S0; + } + movq(y2, f); //y2 = f; CH + rorxq(y0, e, 41); //y0 = e >> 41; S1A + rorxq(y1, e, 18); //y1 = e >> 18; S1B + xorq(y2, g); //y2 = f^g; CH + + xorq(y0, y1); //y0 = (e >> 41) ^ (e >> 18); S1 + rorxq(y1, e, 14); //y1 = (e >> 14); S1 + andq(y2, e); //y2 = (f^g)&e; CH + + if (iteration % 4 > 0 ) { + addq(old_h, y3); //h = t1 + S0 + MAJ + } + xorq(y0, y1); //y0 = (e >> 41) ^ (e >> 18) ^ (e >> 14); S1 + rorxq(T1, a, 34); //T1 = a >> 34; S0B + xorq(y2, g); //y2 = CH = ((f^g)&e) ^g; CH + rorxq(y1, a, 39); //y1 = a >> 39; S0A + movq(y3, a); //y3 = a; MAJA + + xorq(y1, T1); //y1 = (a >> 39) ^ (a >> 34); S0 + rorxq(T1, a, 28); //T1 = (a >> 28); S0 + addq(h, Address(rsp, (8 * iteration))); //h = k + w + h; -- + orq(y3, c); //y3 = a | c; MAJA + + xorq(y1, T1); //y1 = (a >> 39) ^ (a >> 34) ^ (a >> 28); S0 + movq(T1, a); //T1 = a; MAJB + andq(y3, b); //y3 = (a | c)&b; MAJA + andq(T1, c); //T1 = a&c; MAJB + addq(y2, y0); //y2 = S1 + CH; -- + + addq(d, h); //d = k + w + h + d; -- + orq(y3, T1); //y3 = MAJ = (a | c)&b) | (a&c); MAJ + addq(h, y1); //h = k + w + h + S0; -- + + addq(d, y2); //d = k + w + h + d + S1 + CH = d + t1; -- + + if (iteration % 4 == 3) { + addq(h, y2); //h = k + w + h + S0 + S1 + CH = t1 + S0; -- + addq(h, y3); //h = t1 + S0 + MAJ; -- + } +} + +void MacroAssembler::sha512_AVX2_one_round_and_schedule( + XMMRegister xmm4, // ymm4 + XMMRegister xmm5, // ymm5 + XMMRegister xmm6, // ymm6 + XMMRegister xmm7, // ymm7 + Register a, //rax + Register b, //rbx + Register c, //rdi + Register d, //rsi + Register e, //r8 + Register f, //r9 + Register g, //r10 + Register h, //r11 + int iteration) +{ + + const Register& y0 = r13; + const Register& y1 = r14; + const Register& y2 = r15; +#ifdef _WIN64 + const Register& y3 = rcx; +#else + const Register& y3 = rdi; +#endif + const Register& T1 = r12; + + if (iteration % 4 == 0) { + // Extract w[t - 7] + // xmm0 = W[-7] + vperm2f128(xmm0, xmm7, xmm6, 3); + vpalignr(xmm0, xmm0, xmm6, 8, AVX_256bit); + + // Calculate w[t - 16] + w[t - 7] + vpaddq(xmm0, xmm0, xmm4, AVX_256bit); //xmm0 = W[-7] + W[-16] + // Extract w[t - 15] + //xmm1 = W[-15] + vperm2f128(xmm1, xmm5, xmm4, 3); + vpalignr(xmm1, xmm1, xmm4, 8, AVX_256bit); + + // Calculate sigma0 + // Calculate w[t - 15] ror 1 + vpsrlq(xmm2, xmm1, 1, AVX_256bit); + vpsllq(xmm3, xmm1, (64 - 1), AVX_256bit); + vpor(xmm3, xmm3, xmm2, AVX_256bit); //xmm3 = W[-15] ror 1 + // Calculate w[t - 15] shr 7 + vpsrlq(xmm8, xmm1, 7, AVX_256bit); //xmm8 = W[-15] >> 7 + + } else if (iteration % 4 == 1) { + //Calculate w[t - 15] ror 8 + vpsrlq(xmm2, xmm1, 8, AVX_256bit); + vpsllq(xmm1, xmm1, (64 - 8), AVX_256bit); + vpor(xmm1, xmm1, xmm2, AVX_256bit); //xmm1 = W[-15] ror 8 + + //XOR the three components + vpxor(xmm3, xmm3, xmm8, AVX_256bit); //xmm3 = W[-15] ror 1 ^ W[-15] >> 7 + vpxor(xmm1, xmm3, xmm1, AVX_256bit); //xmm1 = s0 + + //Add three components, w[t - 16], w[t - 7] and sigma0 + vpaddq(xmm0, xmm0, xmm1, AVX_256bit); //xmm0 = W[-16] + W[-7] + s0 + + // Move to appropriate lanes for calculating w[16] and w[17] + vperm2f128(xmm4, xmm0, xmm0, 0); //xmm4 = W[-16] + W[-7] + s0{ BABA } + + address MASK_YMM_LO = StubRoutines::x86::pshuffle_byte_flip_mask_addr_sha512(); + //Move to appropriate lanes for calculating w[18] and w[19] + vpand(xmm0, xmm0, ExternalAddress(MASK_YMM_LO + 32), AVX_256bit); //xmm0 = W[-16] + W[-7] + s0{ DC00 } + //Calculate w[16] and w[17] in both 128 bit lanes + //Calculate sigma1 for w[16] and w[17] on both 128 bit lanes + vperm2f128(xmm2, xmm7, xmm7, 17); //xmm2 = W[-2] {BABA} + vpsrlq(xmm8, xmm2, 6, AVX_256bit); //xmm8 = W[-2] >> 6 {BABA} + + } else if (iteration % 4 == 2) { + vpsrlq(xmm3, xmm2, 19, AVX_256bit); //xmm3 = W[-2] >> 19 {BABA} + vpsllq(xmm1, xmm2, (64 - 19), AVX_256bit); //xmm1 = W[-2] << 19 {BABA} + vpor(xmm3, xmm3, xmm1, AVX_256bit); //xmm3 = W[-2] ror 19 {BABA} + vpxor(xmm8, xmm8, xmm3, AVX_256bit);// xmm8 = W[-2] ror 19 ^ W[-2] >> 6 {BABA} + vpsrlq(xmm3, xmm2, 61, AVX_256bit); //xmm3 = W[-2] >> 61 {BABA} + vpsllq(xmm1, xmm2, (64 - 61), AVX_256bit); //xmm1 = W[-2] << 61 {BABA} + vpor(xmm3, xmm3, xmm1, AVX_256bit); //xmm3 = W[-2] ror 61 {BABA} + vpxor(xmm8, xmm8, xmm3, AVX_256bit); //xmm8 = s1 = (W[-2] ror 19) ^ (W[-2] ror 61) ^ (W[-2] >> 6) { BABA } + + //Add sigma1 to the other components to get w[16] and w[17] + vpaddq(xmm4, xmm4, xmm8, AVX_256bit); //xmm4 = { W[1], W[0], W[1], W[0] } + + //Calculate sigma1 for w[18] and w[19] for upper 128 bit lane + vpsrlq(xmm8, xmm4, 6, AVX_256bit); //xmm8 = W[-2] >> 6 {DC--} + + } else if (iteration % 4 == 3){ + vpsrlq(xmm3, xmm4, 19, AVX_256bit); //xmm3 = W[-2] >> 19 {DC--} + vpsllq(xmm1, xmm4, (64 - 19), AVX_256bit); //xmm1 = W[-2] << 19 {DC--} + vpor(xmm3, xmm3, xmm1, AVX_256bit); //xmm3 = W[-2] ror 19 {DC--} + vpxor(xmm8, xmm8, xmm3, AVX_256bit); //xmm8 = W[-2] ror 19 ^ W[-2] >> 6 {DC--} + vpsrlq(xmm3, xmm4, 61, AVX_256bit); //xmm3 = W[-2] >> 61 {DC--} + vpsllq(xmm1, xmm4, (64 - 61), AVX_256bit); //xmm1 = W[-2] << 61 {DC--} + vpor(xmm3, xmm3, xmm1, AVX_256bit); //xmm3 = W[-2] ror 61 {DC--} + vpxor(xmm8, xmm8, xmm3, AVX_256bit); //xmm8 = s1 = (W[-2] ror 19) ^ (W[-2] ror 61) ^ (W[-2] >> 6) { DC-- } + + //Add the sigma0 + w[t - 7] + w[t - 16] for w[18] and w[19] to newly calculated sigma1 to get w[18] and w[19] + vpaddq(xmm2, xmm0, xmm8, AVX_256bit); //xmm2 = { W[3], W[2], --, -- } + + //Form w[19, w[18], w17], w[16] + vpblendd(xmm4, xmm4, xmm2, 0xF0, AVX_256bit); //xmm4 = { W[3], W[2], W[1], W[0] } + } + + movq(y3, a); //y3 = a; MAJA + rorxq(y0, e, 41); // y0 = e >> 41; S1A + rorxq(y1, e, 18); //y1 = e >> 18; S1B + addq(h, Address(rsp, (iteration * 8))); //h = k + w + h; -- + orq(y3, c); //y3 = a | c; MAJA + movq(y2, f); //y2 = f; CH + + xorq(y2, g); //y2 = f^g; CH + + rorxq(T1, a, 34); //T1 = a >> 34; S0B + xorq(y0, y1); //y0 = (e >> 41) ^ (e >> 18); S1 + + rorxq(y1, e, 14); //y1 = (e >> 14); S1 + + andq(y2, e); //y2 = (f^g) & e; CH + addq(d, h); //d = k + w + h + d; -- + + andq(y3, b); //y3 = (a | c)&b; MAJA + xorq(y0, y1); //y0 = (e >> 41) ^ (e >> 18) ^ (e >> 14); S1 + rorxq(y1, a, 39); //y1 = a >> 39; S0A + + xorq(y1, T1); //y1 = (a >> 39) ^ (a >> 34); S0 + rorxq(T1, a, 28); //T1 = (a >> 28); S0 + xorq(y2, g); //y2 = CH = ((f^g)&e) ^ g; CH + + xorq(y1, T1); //y1 = (a >> 39) ^ (a >> 34) ^ (a >> 28); S0 + movq(T1, a); //T1 = a; MAJB + + andq(T1, c); //T1 = a&c; MAJB + addq(y2, y0); //y2 = S1 + CH; -- + + orq(y3, T1); //y3 = MAJ = (a | c)&b) | (a&c); MAJ + addq(h, y1); //h = k + w + h + S0; -- + + addq(d, y2); //d = k + w + h + d + S1 + CH = d + t1; -- + addq(h, y2); //h = k + w + h + S0 + S1 + CH = t1 + S0; -- + addq(h, y3); //h = t1 + S0 + MAJ; -- +} + +void MacroAssembler::sha512_AVX2(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, + XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, + Register buf, Register state, Register ofs, Register limit, Register rsp, + bool multi_block, XMMRegister shuf_mask) +{ + + Label loop0, loop1, loop2, done_hash, + compute_block_size, compute_size, + compute_block_size_end, compute_size_end; + + address K512_W = StubRoutines::x86::k512_W_addr(); + address pshuffle_byte_flip_mask_sha512 = StubRoutines::x86::pshuffle_byte_flip_mask_addr_sha512(); + address pshuffle_byte_flip_mask_addr = 0; + + const XMMRegister& XFER = xmm0; // YTMP0 + const XMMRegister& BYTE_FLIP_MASK = xmm9; // ymm9 +#ifdef _WIN64 + const Register& INP = rcx; //1st arg + const Register& CTX = rdx; //2nd arg + const Register& NUM_BLKS = r8; //3rd arg + const Register& c = rdi; + const Register& d = rsi; + const Register& e = r8; + const Register& y3 = rcx; + const Register& offset = r8; + const Register& input_limit = r9; +#else + const Register& INP = rdi; //1st arg + const Register& CTX = rsi; //2nd arg + const Register& NUM_BLKS = rdx; //3rd arg + const Register& c = rcx; + const Register& d = r8; + const Register& e = rdx; + const Register& y3 = rdi; + const Register& offset = rdx; + const Register& input_limit = rcx; +#endif + + const Register& TBL = rbp; + + const Register& a = rax; + const Register& b = rbx; + + const Register& f = r9; + const Register& g = r10; + const Register& h = r11; + + //Local variables as defined in assembly file. + enum + { + _XFER_SIZE = 4 * 8, // resq 4 => reserve 4 quadwords. Hence 4 * 8 + _SRND_SIZE = 8, // resq 1 + _INP_SIZE = 8, + _INP_END_SIZE = 8, + _RSP_SAVE_SIZE = 8, // defined as resq 1 + +#ifdef _WIN64 + _GPR_SAVE_SIZE = 8 * 8, // defined as resq 8 +#else + _GPR_SAVE_SIZE = 6 * 8 // resq 6 +#endif + }; + + enum + { + _XFER = 0, + _SRND = _XFER + _XFER_SIZE, // 32 + _INP = _SRND + _SRND_SIZE, // 40 + _INP_END = _INP + _INP_SIZE, // 48 + _RSP = _INP_END + _INP_END_SIZE, // 56 + _GPR = _RSP + _RSP_SAVE_SIZE, // 64 + _STACK_SIZE = _GPR + _GPR_SAVE_SIZE // 128 for windows and 112 for linux. + }; + +//Saving offset and limit as it will help with blocksize calculation for multiblock SHA512. +#ifdef _WIN64 + push(r8); // win64: this is ofs + push(r9); // win64: this is limit, we need them again at the very end. +#else + push(rdx); // linux : this is ofs, need at the end for multiblock calculation + push(rcx); // linux: This is the limit. +#endif + + //Allocate Stack Space + movq(rax, rsp); + subq(rsp, _STACK_SIZE); + andq(rsp, -32); + movq(Address(rsp, _RSP), rax); + + //Save GPRs + movq(Address(rsp, _GPR), rbp); + movq(Address(rsp, (_GPR + 8)), rbx); + movq(Address(rsp, (_GPR + 16)), r12); + movq(Address(rsp, (_GPR + 24)), r13); + movq(Address(rsp, (_GPR + 32)), r14); + movq(Address(rsp, (_GPR + 40)), r15); + +#ifdef _WIN64 + movq(Address(rsp, (_GPR + 48)), rsi); + movq(Address(rsp, (_GPR + 56)), rdi); +#endif + + vpblendd(xmm0, xmm0, xmm1, 0xF0, AVX_128bit); + vpblendd(xmm0, xmm0, xmm1, 0xF0, AVX_256bit); + + if (multi_block) { + xorq(rax, rax); + bind(compute_block_size); + cmpptr(offset, input_limit); // Assuming that offset is less than limit. + jccb(Assembler::aboveEqual, compute_block_size_end); + addq(offset, 128); + addq(rax, 128); + jmpb(compute_block_size); + + bind(compute_block_size_end); + movq(NUM_BLKS, rax); + + cmpq(NUM_BLKS, 0); + jcc(Assembler::equal, done_hash); + } else { + xorq(NUM_BLKS, NUM_BLKS); //If single block. + addq(NUM_BLKS, 128); + } + + addq(NUM_BLKS, INP); //pointer to end of data + movq(Address(rsp, _INP_END), NUM_BLKS); + + //load initial digest + movq(a, Address(CTX, 8 * 0)); + movq(b, Address(CTX, 8 * 1)); + movq(c, Address(CTX, 8 * 2)); + movq(d, Address(CTX, 8 * 3)); + movq(e, Address(CTX, 8 * 4)); + movq(f, Address(CTX, 8 * 5)); + movq(g, Address(CTX, 8 * 6)); + movq(h, Address(CTX, 8 * 7)); + + pshuffle_byte_flip_mask_addr = pshuffle_byte_flip_mask_sha512; + vmovdqu(BYTE_FLIP_MASK, ExternalAddress(pshuffle_byte_flip_mask_addr + 0)); //PSHUFFLE_BYTE_FLIP_MASK wrt rip + + bind(loop0); + lea(TBL, ExternalAddress(K512_W)); + + //byte swap first 16 dwords + vmovdqu(xmm4, Address(INP, 32 * 0)); + vpshufb(xmm4, xmm4, BYTE_FLIP_MASK, AVX_256bit); + vmovdqu(xmm5, Address(INP, 32 * 1)); + vpshufb(xmm5, xmm5, BYTE_FLIP_MASK, AVX_256bit); + vmovdqu(xmm6, Address(INP, 32 * 2)); + vpshufb(xmm6, xmm6, BYTE_FLIP_MASK, AVX_256bit); + vmovdqu(xmm7, Address(INP, 32 * 3)); + vpshufb(xmm7, xmm7, BYTE_FLIP_MASK, AVX_256bit); + + movq(Address(rsp, _INP), INP); + + movslq(Address(rsp, _SRND), 4); + align(16); + + //Schedule 64 input dwords, by calling sha512_AVX2_one_round_and_schedule + bind(loop1); + vpaddq(xmm0, xmm4, Address(TBL, 0 * 32), AVX_256bit); + vmovdqu(Address(rsp, _XFER), xmm0); + //four rounds and schedule + sha512_AVX2_one_round_and_schedule(xmm4, xmm5, xmm6, xmm7, a, b, c, d, e, f, g, h, 0); + sha512_AVX2_one_round_and_schedule(xmm4, xmm5, xmm6, xmm7, h, a, b, c, d, e, f, g, 1); + sha512_AVX2_one_round_and_schedule(xmm4, xmm5, xmm6, xmm7, g, h, a, b, c, d, e, f, 2); + sha512_AVX2_one_round_and_schedule(xmm4, xmm5, xmm6, xmm7, f, g, h, a, b, c, d, e, 3); + + vpaddq(xmm0, xmm5, Address(TBL, 1 * 32), AVX_256bit); + vmovdqu(Address(rsp, _XFER), xmm0); + //four rounds and schedule + sha512_AVX2_one_round_and_schedule(xmm5, xmm6, xmm7, xmm4, e, f, g, h, a, b, c, d, 0); + sha512_AVX2_one_round_and_schedule(xmm5, xmm6, xmm7, xmm4, d, e, f, g, h, a, b, c, 1); + sha512_AVX2_one_round_and_schedule(xmm5, xmm6, xmm7, xmm4, c, d, e, f, g, h, a, b, 2); + sha512_AVX2_one_round_and_schedule(xmm5, xmm6, xmm7, xmm4, b, c, d, e, f, g, h, a, 3); + + vpaddq(xmm0, xmm6, Address(TBL, 2 * 32), AVX_256bit); + vmovdqu(Address(rsp, _XFER), xmm0); + //four rounds and schedule + sha512_AVX2_one_round_and_schedule(xmm6, xmm7, xmm4, xmm5, a, b, c, d, e, f, g, h, 0); + sha512_AVX2_one_round_and_schedule(xmm6, xmm7, xmm4, xmm5, h, a, b, c, d, e, f, g, 1); + sha512_AVX2_one_round_and_schedule(xmm6, xmm7, xmm4, xmm5, g, h, a, b, c, d, e, f, 2); + sha512_AVX2_one_round_and_schedule(xmm6, xmm7, xmm4, xmm5, f, g, h, a, b, c, d, e, 3); + + vpaddq(xmm0, xmm7, Address(TBL, 3 * 32), AVX_256bit); + vmovdqu(Address(rsp, _XFER), xmm0); + addq(TBL, 4 * 32); + //four rounds and schedule + sha512_AVX2_one_round_and_schedule(xmm7, xmm4, xmm5, xmm6, e, f, g, h, a, b, c, d, 0); + sha512_AVX2_one_round_and_schedule(xmm7, xmm4, xmm5, xmm6, d, e, f, g, h, a, b, c, 1); + sha512_AVX2_one_round_and_schedule(xmm7, xmm4, xmm5, xmm6, c, d, e, f, g, h, a, b, 2); + sha512_AVX2_one_round_and_schedule(xmm7, xmm4, xmm5, xmm6, b, c, d, e, f, g, h, a, 3); + + subq(Address(rsp, _SRND), 1); + jcc(Assembler::notEqual, loop1); + + movslq(Address(rsp, _SRND), 2); + + bind(loop2); + vpaddq(xmm0, xmm4, Address(TBL, 0 * 32), AVX_256bit); + vmovdqu(Address(rsp, _XFER), xmm0); + //four rounds and compute. + sha512_AVX2_one_round_compute(a, a, b, c, d, e, f, g, h, 0); + sha512_AVX2_one_round_compute(h, h, a, b, c, d, e, f, g, 1); + sha512_AVX2_one_round_compute(g, g, h, a, b, c, d, e, f, 2); + sha512_AVX2_one_round_compute(f, f, g, h, a, b, c, d, e, 3); + + vpaddq(xmm0, xmm5, Address(TBL, 1 * 32), AVX_256bit); + vmovdqu(Address(rsp, _XFER), xmm0); + addq(TBL, 2 * 32); + // four rounds and compute. + sha512_AVX2_one_round_compute(e, e, f, g, h, a, b, c, d, 0); + sha512_AVX2_one_round_compute(d, d, e, f, g, h, a, b, c, 1); + sha512_AVX2_one_round_compute(c, c, d, e, f, g, h, a, b, 2); + sha512_AVX2_one_round_compute(b, b, c, d, e, f, g, h, a, 3); + + vmovdqu(xmm4, xmm6); + vmovdqu(xmm5, xmm7); + + subq(Address(rsp, _SRND), 1); + jcc(Assembler::notEqual, loop2); + + addmq(8 * 0, CTX, a); + addmq(8 * 1, CTX, b); + addmq(8 * 2, CTX, c); + addmq(8 * 3, CTX, d); + addmq(8 * 4, CTX, e); + addmq(8 * 5, CTX, f); + addmq(8 * 6, CTX, g); + addmq(8 * 7, CTX, h); + + movq(INP, Address(rsp, _INP)); + addq(INP, 128); + cmpq(INP, Address(rsp, _INP_END)); + jcc(Assembler::notEqual, loop0); + + bind(done_hash); + + //Restore GPRs + movq(rbp, Address(rsp, (_GPR + 0))); + movq(rbx, Address(rsp, (_GPR + 8))); + movq(r12, Address(rsp, (_GPR + 16))); + movq(r13, Address(rsp, (_GPR + 24))); + movq(r14, Address(rsp, (_GPR + 32))); + movq(r15, Address(rsp, (_GPR + 40))); + +#ifdef _WIN64 + movq(rsi, Address(rsp, (_GPR + 48))); + movq(rdi, Address(rsp, (_GPR + 56))); +#endif + + //Restore Stack Pointer + movq(rsp, Address(rsp, _RSP)); + +#ifdef _WIN64 + pop(r9); + pop(r8); +#else + pop(rcx); + pop(rdx); +#endif + + if (multi_block) { +#ifdef _WIN64 + const Register& limit_end = r9; + const Register& ofs_end = r8; +#else + const Register& limit_end = rcx; + const Register& ofs_end = rdx; +#endif + movq(rax, ofs_end); + bind(compute_size); + cmpptr(rax, limit_end); + jccb(Assembler::aboveEqual, compute_size_end); + addq(rax, 128); + jmpb(compute_size); + bind(compute_size_end); + } +} + #endif //#ifdef _LP64 + diff --git a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp index 4906f9dc89f..7ffe08b29fc 100644 --- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp @@ -3718,6 +3718,25 @@ class StubGenerator: public StubCodeGenerator { return start; } + //Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. + address generate_pshuffle_byte_flip_mask_sha512() { + __ align(32); + StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask_sha512"); + address start = __ pc(); + if (VM_Version::supports_avx2()) { + __ emit_data64(0x0001020304050607, relocInfo::none); // PSHUFFLE_BYTE_FLIP_MASK + __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none); + __ emit_data64(0x1011121314151617, relocInfo::none); + __ emit_data64(0x18191a1b1c1d1e1f, relocInfo::none); + __ emit_data64(0x0000000000000000, relocInfo::none); //MASK_YMM_LO + __ emit_data64(0x0000000000000000, relocInfo::none); + __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none); + __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none); + } + + return start; + } + // ofs and limit are use for multi-block byte array. // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) address generate_sha256_implCompress(bool multi_block, const char *name) { @@ -3761,6 +3780,39 @@ class StubGenerator: public StubCodeGenerator { return start; } + address generate_sha512_implCompress(bool multi_block, const char *name) { + assert(VM_Version::supports_avx2(), ""); + assert(VM_Version::supports_bmi2(), ""); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Register buf = c_rarg0; + Register state = c_rarg1; + Register ofs = c_rarg2; + Register limit = c_rarg3; + + const XMMRegister msg = xmm0; + const XMMRegister state0 = xmm1; + const XMMRegister state1 = xmm2; + const XMMRegister msgtmp0 = xmm3; + const XMMRegister msgtmp1 = xmm4; + const XMMRegister msgtmp2 = xmm5; + const XMMRegister msgtmp3 = xmm6; + const XMMRegister msgtmp4 = xmm7; + + const XMMRegister shuf_mask = xmm8; + + __ enter(); + + __ sha512_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4, + buf, state, ofs, limit, rsp, multi_block, shuf_mask); + + __ leave(); + __ ret(0); + return start; + } + // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time // to hide instruction latency // @@ -5081,6 +5133,12 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress"); StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB"); } + if (UseSHA512Intrinsics) { + StubRoutines::x86::_k512_W_addr = (address)StubRoutines::x86::_k512_W; + StubRoutines::x86::_pshuffle_byte_flip_mask_addr_sha512 = generate_pshuffle_byte_flip_mask_sha512(); + StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress"); + StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB"); + } // Generate GHASH intrinsics code if (UseGHASHIntrinsics) { diff --git a/hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp b/hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp index ea7952e9660..3922d45091c 100644 --- a/hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp +++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp @@ -48,6 +48,8 @@ address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL; address StubRoutines::x86::_k256_adr = NULL; #ifdef _LP64 address StubRoutines::x86::_k256_W_adr = NULL; +address StubRoutines::x86::_k512_W_addr = NULL; +address StubRoutines::x86::_pshuffle_byte_flip_mask_addr_sha512 = NULL; #endif address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL; @@ -297,4 +299,49 @@ ALIGNED_(64) juint StubRoutines::x86::_k256[] = // used in MacroAssembler::sha256_AVX2 // dynamically built from _k256 ALIGNED_(64) juint StubRoutines::x86::_k256_W[2*sizeof(StubRoutines::x86::_k256)]; + +// used in MacroAssembler::sha512_AVX2 +ALIGNED_(64) julong StubRoutines::x86::_k512_W[] = +{ + 0x428a2f98d728ae22LL, 0x7137449123ef65cdLL, + 0xb5c0fbcfec4d3b2fLL, 0xe9b5dba58189dbbcLL, + 0x3956c25bf348b538LL, 0x59f111f1b605d019LL, + 0x923f82a4af194f9bLL, 0xab1c5ed5da6d8118LL, + 0xd807aa98a3030242LL, 0x12835b0145706fbeLL, + 0x243185be4ee4b28cLL, 0x550c7dc3d5ffb4e2LL, + 0x72be5d74f27b896fLL, 0x80deb1fe3b1696b1LL, + 0x9bdc06a725c71235LL, 0xc19bf174cf692694LL, + 0xe49b69c19ef14ad2LL, 0xefbe4786384f25e3LL, + 0x0fc19dc68b8cd5b5LL, 0x240ca1cc77ac9c65LL, + 0x2de92c6f592b0275LL, 0x4a7484aa6ea6e483LL, + 0x5cb0a9dcbd41fbd4LL, 0x76f988da831153b5LL, + 0x983e5152ee66dfabLL, 0xa831c66d2db43210LL, + 0xb00327c898fb213fLL, 0xbf597fc7beef0ee4LL, + 0xc6e00bf33da88fc2LL, 0xd5a79147930aa725LL, + 0x06ca6351e003826fLL, 0x142929670a0e6e70LL, + 0x27b70a8546d22ffcLL, 0x2e1b21385c26c926LL, + 0x4d2c6dfc5ac42aedLL, 0x53380d139d95b3dfLL, + 0x650a73548baf63deLL, 0x766a0abb3c77b2a8LL, + 0x81c2c92e47edaee6LL, 0x92722c851482353bLL, + 0xa2bfe8a14cf10364LL, 0xa81a664bbc423001LL, + 0xc24b8b70d0f89791LL, 0xc76c51a30654be30LL, + 0xd192e819d6ef5218LL, 0xd69906245565a910LL, + 0xf40e35855771202aLL, 0x106aa07032bbd1b8LL, + 0x19a4c116b8d2d0c8LL, 0x1e376c085141ab53LL, + 0x2748774cdf8eeb99LL, 0x34b0bcb5e19b48a8LL, + 0x391c0cb3c5c95a63LL, 0x4ed8aa4ae3418acbLL, + 0x5b9cca4f7763e373LL, 0x682e6ff3d6b2b8a3LL, + 0x748f82ee5defb2fcLL, 0x78a5636f43172f60LL, + 0x84c87814a1f0ab72LL, 0x8cc702081a6439ecLL, + 0x90befffa23631e28LL, 0xa4506cebde82bde9LL, + 0xbef9a3f7b2c67915LL, 0xc67178f2e372532bLL, + 0xca273eceea26619cLL, 0xd186b8c721c0c207LL, + 0xeada7dd6cde0eb1eLL, 0xf57d4f7fee6ed178LL, + 0x06f067aa72176fbaLL, 0x0a637dc5a2c898a6LL, + 0x113f9804bef90daeLL, 0x1b710b35131c471bLL, + 0x28db77f523047d84LL, 0x32caab7b40c72493LL, + 0x3c9ebe0a15c9bebcLL, 0x431d67c49c100d4cLL, + 0x4cc5d4becb3e42b6LL, 0x597f299cfc657e2aLL, + 0x5fcb6fab3ad6faecLL, 0x6c44198c4a475817LL, +}; #endif diff --git a/hotspot/src/cpu/x86/vm/stubRoutines_x86.hpp b/hotspot/src/cpu/x86/vm/stubRoutines_x86.hpp index 3caac41393e..afd9e90d736 100644 --- a/hotspot/src/cpu/x86/vm/stubRoutines_x86.hpp +++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86.hpp @@ -33,7 +33,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _call_ enum platform_dependent_constants { code_size1 = 20000 LP64_ONLY(+10000), // simply increase if too small (assembler will crash if too small) - code_size2 = 33800 LP64_ONLY(+1200) // simply increase if too small (assembler will crash if too small) + code_size2 = 33800 LP64_ONLY(+10000) // simply increase if too small (assembler will crash if too small) }; class x86 { @@ -134,6 +134,10 @@ class x86 { #ifdef _LP64 static juint _k256_W[]; static address _k256_W_adr; + static julong _k512_W[]; + static address _k512_W_addr; + // byte flip mask for sha512 + static address _pshuffle_byte_flip_mask_addr_sha512; #endif // byte flip mask for sha256 static address _pshuffle_byte_flip_mask_addr; @@ -192,6 +196,8 @@ class x86 { static address k256_addr() { return _k256_adr; } #ifdef _LP64 static address k256_W_addr() { return _k256_W_adr; } + static address k512_W_addr() { return _k512_W_addr; } + static address pshuffle_byte_flip_mask_addr_sha512() { return _pshuffle_byte_flip_mask_addr_sha512; } #endif static address pshuffle_byte_flip_mask_addr() { return _pshuffle_byte_flip_mask_addr; } static void generate_CRC32C_table(bool is_pclmulqdq_supported); diff --git a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp index aa10ef276f2..93c6e580062 100644 --- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp +++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp @@ -362,6 +362,19 @@ class VM_Version_StubGenerator: public StubCodeGenerator { VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts UseAVX = 3; UseSSE = 2; +#ifdef _WINDOWS + // xmm5-xmm15 are not preserved by caller on windows + // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx + __ subptr(rsp, 64); + __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit); +#ifdef _LP64 + __ subptr(rsp, 64); + __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit); + __ subptr(rsp, 64); + __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit); +#endif // _LP64 +#endif // _WINDOWS + // load value into all 64 bytes of zmm7 register __ movl(rcx, VM_Version::ymm_test_value()); __ movdl(xmm0, rcx); @@ -381,6 +394,17 @@ class VM_Version_StubGenerator: public StubCodeGenerator { VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts UseAVX = 1; UseSSE = 2; +#ifdef _WINDOWS + __ subptr(rsp, 32); + __ vmovdqu(Address(rsp, 0), xmm7); +#ifdef _LP64 + __ subptr(rsp, 32); + __ vmovdqu(Address(rsp, 0), xmm8); + __ subptr(rsp, 32); + __ vmovdqu(Address(rsp, 0), xmm15); +#endif // _LP64 +#endif // _WINDOWS + // load value into all 32 bytes of ymm7 register __ movl(rcx, VM_Version::ymm_test_value()); @@ -428,6 +452,17 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); #endif + +#ifdef _WINDOWS +#ifdef _LP64 + __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); + __ addptr(rsp, 64); + __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit); + __ addptr(rsp, 64); +#endif // _LP64 + __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit); + __ addptr(rsp, 64); +#endif // _WINDOWS VM_Version::clean_cpuFeatures(); UseAVX = saved_useavx; UseSSE = saved_usesse; @@ -445,6 +480,17 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ vmovdqu(Address(rsi, 64), xmm8); __ vmovdqu(Address(rsi, 96), xmm15); #endif + +#ifdef _WINDOWS +#ifdef _LP64 + __ vmovdqu(xmm15, Address(rsp, 0)); + __ addptr(rsp, 32); + __ vmovdqu(xmm8, Address(rsp, 0)); + __ addptr(rsp, 32); +#endif // _LP64 + __ vmovdqu(xmm7, Address(rsp, 0)); + __ addptr(rsp, 32); +#endif // _WINDOWS VM_Version::clean_cpuFeatures(); UseAVX = saved_useavx; UseSSE = saved_usesse; @@ -769,7 +815,11 @@ void VM_Version::get_processor_features() { FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); } - if (UseSHA512Intrinsics) { + if (UseSHA) { + if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); + } + } else if (UseSHA512Intrinsics) { warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); } diff --git a/hotspot/src/cpu/x86/vm/x86.ad b/hotspot/src/cpu/x86/vm/x86.ad index 24eab7f3785..3dd25561c0d 100644 --- a/hotspot/src/cpu/x86/vm/x86.ad +++ b/hotspot/src/cpu/x86/vm/x86.ad @@ -8173,13 +8173,13 @@ instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd cop match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); effect(TEMP dst, USE src1, USE src2); format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" - "vpblendd $dst,$src1,$src2,$dst ! vcmovevd\n\t" + "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" %} ins_encode %{ int vector_len = 1; int cond = (Assembler::Condition)($copnd$$cmpcode); __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len); - __ vpblendd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); + __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); %} diff --git a/hotspot/src/cpu/zero/vm/globals_zero.hpp b/hotspot/src/cpu/zero/vm/globals_zero.hpp index 7ce33a94447..f9a4172ea84 100644 --- a/hotspot/src/cpu/zero/vm/globals_zero.hpp +++ b/hotspot/src/cpu/zero/vm/globals_zero.hpp @@ -39,6 +39,7 @@ define_pd_global(bool, ImplicitNullChecks, true); define_pd_global(bool, TrapBasedNullChecks, false); define_pd_global(bool, UncommonNullCast, true); +define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment. define_pd_global(intx, CodeEntryAlignment, 32); define_pd_global(intx, OptoLoopAlignment, 16); define_pd_global(intx, InlineFrequencyCount, 100); diff --git a/hotspot/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/hotspot/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h index 8ee83df4557..007ea533f65 100644 --- a/hotspot/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h +++ b/hotspot/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h @@ -76,6 +76,9 @@ combination of ptrace and /proc calls. #include #define user_regs_struct user_pt_regs #endif +#if defined(s390x) +#include +#endif // This C bool type must be int for compatibility with Linux calls and // it would be a mistake to equivalence it to C++ bool on many platforms diff --git a/hotspot/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m b/hotspot/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m index 044340aa511..d2a52c22b3a 100644 --- a/hotspot/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m +++ b/hotspot/src/jdk.hotspot.agent/macosx/native/libsaproc/MacosxDebuggerLocal.m @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,6 +25,7 @@ #include #import #import +#import #include diff --git a/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/interpreter/BytecodeInvoke.java b/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/interpreter/BytecodeInvoke.java index b6b6460a3fd..393bb83c9b5 100644 --- a/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/interpreter/BytecodeInvoke.java +++ b/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/interpreter/BytecodeInvoke.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -112,21 +112,28 @@ public class BytecodeInvoke extends BytecodeWithCPIndex { buf.append('#'); buf.append(Integer.toString(indexForFieldOrMethod())); if (isInvokedynamic()) { - buf.append('('); - buf.append(Integer.toString(index())); - buf.append(')'); + ConstantPool cp = method.getConstants(); + buf.append('('); + int poolIndex = cp.invokeDynamicNameAndTypeRefIndexAt(indexForFieldOrMethod()); + buf.append(Integer.toString(poolIndex)); + buf.append(')'); + buf.append(" [Name and Type "); + buf.append(name().asString()); + buf.append(":"); + buf.append(signature().asString().replace('/', '.')); + } else { + buf.append(" [Method "); + StringBuffer sigBuf = new StringBuffer(); + new SignatureConverter(signature(), sigBuf).iterateReturntype(); + buf.append(sigBuf.toString().replace('/', '.')); + buf.append(spaces); + buf.append(name().asString()); + buf.append('('); + sigBuf = new StringBuffer(); + new SignatureConverter(signature(), sigBuf).iterateParameters(); + buf.append(sigBuf.toString().replace('/', '.')); + buf.append(')'); } - buf.append(" [Method "); - StringBuffer sigBuf = new StringBuffer(); - new SignatureConverter(signature(), sigBuf).iterateReturntype(); - buf.append(sigBuf.toString().replace('/', '.')); - buf.append(spaces); - buf.append(name().asString()); - buf.append('('); - sigBuf = new StringBuffer(); - new SignatureConverter(signature(), sigBuf).iterateParameters(); - buf.append(sigBuf.toString().replace('/', '.')); - buf.append(')'); buf.append(']'); if (code() != javaCode()) { buf.append(spaces); diff --git a/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/ConstantPool.java b/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/ConstantPool.java index 8a443833f8c..fc1f461acd4 100644 --- a/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/ConstantPool.java +++ b/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/ConstantPool.java @@ -307,7 +307,7 @@ public class ConstantPool extends Metadata implements ClassConstants { return member_index; } - int invokeDynamicNameAndTypeRefIndexAt(int which) { + public int invokeDynamicNameAndTypeRefIndexAt(int which) { // assert(tag_at(which).is_invoke_dynamic(), "Corrupted constant pool"); return extractHighShortFromInt(getIntAt(which)); } diff --git a/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/ui/classbrowser/HTMLGenerator.java b/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/ui/classbrowser/HTMLGenerator.java index 37b730693f0..a5c811b8280 100644 --- a/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/ui/classbrowser/HTMLGenerator.java +++ b/hotspot/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/ui/classbrowser/HTMLGenerator.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -691,17 +691,21 @@ public class HTMLGenerator implements /* imports */ ClassConstants { } else { buf.append(instrStr); } - } else if(instr instanceof BytecodeInvoke) { + } else if (instr instanceof BytecodeInvoke) { BytecodeInvoke invokeBytecode = (BytecodeInvoke) instr; - Method m = invokeBytecode.getInvokedMethod(); - if (m != null) { - buf.link(genMethodHref(m), instrStr); - buf.append(" of "); - InstanceKlass klass = (InstanceKlass) m.getMethodHolder(); - buf.link(genKlassHref(klass), genKlassTitle(klass)); + if (invokeBytecode.isInvokedynamic()) { + buf.append(instrStr); } else { - buf.append(instrStr); - } + Method m = invokeBytecode.getInvokedMethod(); + if (m != null) { + buf.link(genMethodHref(m), instrStr); + buf.append(" of "); + InstanceKlass klass = (InstanceKlass) m.getMethodHolder(); + buf.link(genKlassHref(klass), genKlassTitle(klass)); + } else { + buf.append(instrStr); + } + } } else if (instr instanceof BytecodeGetPut) { BytecodeGetPut getPut = (BytecodeGetPut) instr; sun.jvm.hotspot.oops.Field f = getPut.getField(); diff --git a/hotspot/src/jdk.hotspot.agent/solaris/native/libsaproc/libproc.h b/hotspot/src/jdk.hotspot.agent/solaris/native/libsaproc/libproc.h index da918394674..a91f72a7e31 100644 --- a/hotspot/src/jdk.hotspot.agent/solaris/native/libsaproc/libproc.h +++ b/hotspot/src/jdk.hotspot.agent/solaris/native/libsaproc/libproc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -420,8 +420,6 @@ extern uintptr_t Ppltdest(struct ps_prochandle *, uintptr_t, int *); /* * Stack frame iteration interface. */ -#ifdef SOLARIS_11_B159_OR_LATER -/* building on Nevada-B159 or later so define the new callback */ typedef int proc_stack_f( void *, /* the cookie given to Pstack_iter() */ const prgregset_t, /* the frame's registers */ @@ -432,10 +430,6 @@ typedef int proc_stack_f( #define PR_SIGNAL_FRAME 1 /* called by a signal handler */ #define PR_FOUND_SIGNAL 2 /* we found the corresponding signal number */ -#else -/* building on Nevada-B158 or earlier so define the old callback */ -typedef int proc_stack_f(void *, const prgregset_t, uint_t, const long *); -#endif extern int Pstack_iter(struct ps_prochandle *, const prgregset_t, proc_stack_f *, void *); diff --git a/hotspot/src/jdk.hotspot.agent/solaris/native/libsaproc/salibproc.h b/hotspot/src/jdk.hotspot.agent/solaris/native/libsaproc/salibproc.h index 1e6d9cfbd6b..8bd6b1076ab 100644 --- a/hotspot/src/jdk.hotspot.agent/solaris/native/libsaproc/salibproc.h +++ b/hotspot/src/jdk.hotspot.agent/solaris/native/libsaproc/salibproc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -46,6 +46,17 @@ extern "C" { #endif +/* extended symbol table information */ +typedef struct { + const char *prs_object; /* object name */ + const char *prs_name; /* symbol name */ + Lmid_t prs_lmid; /* link map id */ + uint_t prs_id; /* symbol id */ + uint_t prs_table; /* symbol table id */ +} prsyminfo_t; + +typedef struct ps_prochandle ps_prochandle_t; + /* * 'object_name' is the name of a load object obtained from an * iteration over the process's address space mappings (Pmapping_iter), @@ -53,8 +64,10 @@ extern "C" { * or else it is one of the special PR_OBJ_* values above. */ -extern int Plookup_by_addr(struct ps_prochandle *, - uintptr_t, char *, size_t, GElf_Sym *); +extern int Plookup_by_addr(ps_prochandle_t *, uintptr_t, char *, + size_t, GElf_Sym *, prsyminfo_t *); +extern ps_prochandle_t *proc_arg_grab(const char *, int, int, + int *, const char **); typedef int proc_map_f(void *, const prmap_t *, const char *); extern int Pobject_iter(struct ps_prochandle *, proc_map_f *, void *); @@ -88,7 +101,6 @@ extern int Pobject_iter(struct ps_prochandle *, proc_map_f *, void *); #define G_ELF 13 /* Libelf error, elf_errno() is meaningful */ #define G_NOTE 14 /* Required PT_NOTE Phdr not present in core */ -extern struct ps_prochandle *proc_arg_grab(const char *, int, int, int *); extern const pstatus_t *Pstatus(struct ps_prochandle *); /* Flags accepted by Prelease (partial) */ @@ -101,8 +113,6 @@ extern int Pstop(struct ps_prochandle *, uint_t); /* * Stack frame iteration interface. */ -#ifdef SOLARIS_11_B159_OR_LATER -/* building on Nevada-B159 or later so define the new callback */ typedef int proc_stack_f( void *, /* the cookie given to Pstack_iter() */ const prgregset_t, /* the frame's registers */ @@ -113,10 +123,6 @@ typedef int proc_stack_f( #define PR_SIGNAL_FRAME 1 /* called by a signal handler */ #define PR_FOUND_SIGNAL 2 /* we found the corresponding signal number */ -#else -/* building on Nevada-B158 or earlier so define the old callback */ -typedef int proc_stack_f(void *, const prgregset_t, uint_t, const long *); -#endif extern int Pstack_iter(struct ps_prochandle *, const prgregset_t, proc_stack_f *, void *); diff --git a/hotspot/src/jdk.hotspot.agent/solaris/native/libsaproc/saproc.cpp b/hotspot/src/jdk.hotspot.agent/solaris/native/libsaproc/saproc.cpp index d727740ee34..f584856e351 100644 --- a/hotspot/src/jdk.hotspot.agent/solaris/native/libsaproc/saproc.cpp +++ b/hotspot/src/jdk.hotspot.agent/solaris/native/libsaproc/saproc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,9 +24,6 @@ #include "salibproc.h" #include "sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal.h" -#ifndef SOLARIS_11_B159_OR_LATER -#include -#endif #include #include #include @@ -45,20 +42,6 @@ // debug modes static int _libsaproc_debug = 0; -#ifndef SOLARIS_11_B159_OR_LATER -static bool _Pstack_iter_debug = false; - -static void dprintf_2(const char* format,...) { - if (_Pstack_iter_debug) { - va_list alist; - - va_start(alist, format); - fputs("Pstack_iter DEBUG: ", stderr); - vfprintf(stderr, format, alist); - va_end(alist); - } -} -#endif // !SOLARIS_11_B159_OR_LATER static void print_debug(const char* format,...) { if (_libsaproc_debug) { @@ -757,7 +740,8 @@ static void attach_internal(JNIEnv* env, jobject this_obj, jstring cmdLine, jboo #endif // connect to process/core - struct ps_prochandle* ph = proc_arg_grab(cmdLine_cstr, (isProcess? PR_ARG_PIDS : PR_ARG_CORES), PGRAB_FORCE, &gcode); + ps_prochandle_t* ph = proc_arg_grab(cmdLine_cstr, (isProcess? PR_ARG_PIDS : PR_ARG_CORES), PGRAB_FORCE, &gcode, NULL); + env->ReleaseStringUTFChars(cmdLine, cmdLine_cstr); if (! ph) { if (gcode > 0 && gcode < sizeof(proc_arg_grab_errmsgs)/sizeof(const char*)) { @@ -997,11 +981,6 @@ JNIEXPORT void JNICALL Java_sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal_fill TD_THR_ANY_STATE, TD_THR_LOWEST_PRIORITY, TD_SIGNO_MASK, TD_THR_ANY_USER_FLAGS); } -#ifndef SOLARIS_11_B159_OR_LATER -// building on Nevada-B158 or earlier so more hoops to jump through -static bool has_newer_Pstack_iter = false; // older version by default -#endif - /* * Class: sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal * Method: fillCFrameList0 @@ -1030,23 +1009,8 @@ JNIEXPORT jobject JNICALL Java_sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal_f env->ReleaseLongArrayElements(regsArray, ptr, JNI_ABORT); CHECK_EXCEPTION_(0); -#ifdef SOLARIS_11_B159_OR_LATER - // building on Nevada-B159 or later so use the new callback Pstack_iter((struct ps_prochandle*) p_ps_prochandle, gregs, wrapper_fill_cframe_list, &dbgo2); -#else - // building on Nevada-B158 or earlier so figure out which callback to use - - if (has_newer_Pstack_iter) { - // Since we're building on Nevada-B158 or earlier, we have to - // cast wrapper_fill_cframe_list to make the compiler happy. - Pstack_iter((struct ps_prochandle*) p_ps_prochandle, gregs, - (proc_stack_f *)wrapper_fill_cframe_list, &dbgo2); - } else { - Pstack_iter((struct ps_prochandle*) p_ps_prochandle, gregs, - fill_cframe_list, &dbgo2); - } -#endif // SOLARIS_11_B159_OR_LATER return dbgo2.obj; } @@ -1236,7 +1200,8 @@ JNIEXPORT jobject JNICALL Java_sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal_l char nameBuf[SYMBOL_BUF_SIZE + 1]; GElf_Sym sym; int res = Plookup_by_addr((struct ps_prochandle*) p_ps_prochandle, (uintptr_t) address, - nameBuf, sizeof(nameBuf), &sym); + nameBuf, sizeof(nameBuf), &sym, NULL); + if (res != 0) { // failed return 0; } @@ -1268,102 +1233,6 @@ JNIEXPORT jstring JNICALL Java_sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal_d return res; } -#ifndef SOLARIS_11_B159_OR_LATER -// Determine if the OS we're running on has the newer version -// of libproc's Pstack_iter. -// -// Set env var PSTACK_ITER_DEBUG=true to debug this logic. -// Set env var PSTACK_ITER_DEBUG_RELEASE to simulate a 'release' value. -// Set env var PSTACK_ITER_DEBUG_VERSION to simulate a 'version' value. -// -// frankenputer 'uname -r -v': 5.10 Generic_141445-09 -// jurassic 'uname -r -v': 5.11 snv_164 -// lonepeak 'uname -r -v': 5.11 snv_127 -// -static void set_has_newer_Pstack_iter(JNIEnv *env) { - static bool done_set = false; - - if (done_set) { - // already set has_newer_Pstack_iter - return; - } - - struct utsname name; - if (uname(&name) == -1) { - THROW_NEW_DEBUGGER_EXCEPTION("uname() failed!"); - } - dprintf_2("release='%s' version='%s'\n", name.release, name.version); - - if (_Pstack_iter_debug) { - char *override = getenv("PSTACK_ITER_DEBUG_RELEASE"); - if (override != NULL) { - strncpy(name.release, override, SYS_NMLN - 1); - name.release[SYS_NMLN - 2] = '\0'; - dprintf_2("overriding with release='%s'\n", name.release); - } - override = getenv("PSTACK_ITER_DEBUG_VERSION"); - if (override != NULL) { - strncpy(name.version, override, SYS_NMLN - 1); - name.version[SYS_NMLN - 2] = '\0'; - dprintf_2("overriding with version='%s'\n", name.version); - } - } - - // the major number corresponds to the old SunOS major number - int major = atoi(name.release); - if (major >= 6) { - dprintf_2("release is SunOS 6 or later\n"); - has_newer_Pstack_iter = true; - done_set = true; - return; - } - if (major < 5) { - dprintf_2("release is SunOS 4 or earlier\n"); - done_set = true; - return; - } - - // some SunOS 5.* build so now check for Solaris versions - char *dot = strchr(name.release, '.'); - int minor = 0; - if (dot != NULL) { - // release is major.minor format - *dot = NULL; - minor = atoi(dot + 1); - } - - if (minor <= 10) { - dprintf_2("release is Solaris 10 or earlier\n"); - done_set = true; - return; - } else if (minor >= 12) { - dprintf_2("release is Solaris 12 or later\n"); - has_newer_Pstack_iter = true; - done_set = true; - return; - } - - // some Solaris 11 build so now check for internal build numbers - if (strncmp(name.version, "snv_", 4) != 0) { - dprintf_2("release is Solaris 11 post-GA or later\n"); - has_newer_Pstack_iter = true; - done_set = true; - return; - } - - // version begins with "snv_" so a pre-GA build of Solaris 11 - int build = atoi(&name.version[4]); - if (build >= 159) { - dprintf_2("release is Nevada-B159 or later\n"); - has_newer_Pstack_iter = true; - } else { - dprintf_2("release is Nevada-B158 or earlier\n"); - } - - done_set = true; -} -#endif // !SOLARIS_11_B159_OR_LATER - /* * Class: sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal * Method: initIDs @@ -1383,14 +1252,6 @@ JNIEXPORT void JNICALL Java_sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal_init if (libproc_handle == 0) THROW_NEW_DEBUGGER_EXCEPTION("can't load libproc.so, if you are using Solaris 5.7 or below, copy libproc.so from 5.8!"); -#ifndef SOLARIS_11_B159_OR_LATER - _Pstack_iter_debug = getenv("PSTACK_ITER_DEBUG") != NULL; - - set_has_newer_Pstack_iter(env); - CHECK_EXCEPTION; - dprintf_2("has_newer_Pstack_iter=%d\n", has_newer_Pstack_iter); -#endif - p_ps_prochandle_ID = env->GetFieldID(clazz, "p_ps_prochandle", "J"); CHECK_EXCEPTION; diff --git a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotJVMCIRuntime.java b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotJVMCIRuntime.java index 4f3f97de116..95c4b44bb61 100644 --- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotJVMCIRuntime.java +++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotJVMCIRuntime.java @@ -171,6 +171,9 @@ public final class HotSpotJVMCIRuntime implements HotSpotJVMCIRuntimeProvider { return (String) getValue(); } + private static final int PROPERTY_LINE_WIDTH = 80; + private static final int PROPERTY_HELP_INDENT = 10; + /** * Prints a description of the properties used to configure shared JVMCI code. * @@ -178,24 +181,26 @@ public final class HotSpotJVMCIRuntime implements HotSpotJVMCIRuntimeProvider { */ public static void printProperties(PrintStream out) { out.println("[JVMCI properties]"); - int typeWidth = 0; - int nameWidth = 0; Option[] values = values(); - for (Option option : values) { - typeWidth = Math.max(typeWidth, option.type.getSimpleName().length()); - nameWidth = Math.max(nameWidth, option.getPropertyName().length()); - } for (Option option : values) { Object value = option.getValue(); if (value instanceof String) { value = '"' + String.valueOf(value) + '"'; } - String assign = option.isDefault ? " =" : ":="; - String format = "%" + (typeWidth + 1) + "s %-" + (nameWidth + 1) + "s %s %s%n"; - out.printf(format, option.type.getSimpleName(), option.getPropertyName(), assign, value); - String helpFormat = "%" + (typeWidth + 1) + "s %s%n"; + + String name = option.getPropertyName(); + String assign = option.isDefault ? "=" : ":="; + String typeName = option.type.getSimpleName(); + String linePrefix = String.format("%s %s %s ", name, assign, value); + int typeStartPos = PROPERTY_LINE_WIDTH - typeName.length(); + int linePad = typeStartPos - linePrefix.length(); + if (linePad > 0) { + out.printf("%s%-" + linePad + "s[%s]%n", linePrefix, "", typeName); + } else { + out.printf("%s[%s]%n", linePrefix, typeName); + } for (String line : option.helpLines) { - out.printf(helpFormat, "", line); + out.printf("%" + PROPERTY_HELP_INDENT + "s%s%n", "", line); } } } @@ -306,6 +311,7 @@ public final class HotSpotJVMCIRuntime implements HotSpotJVMCIRuntimeProvider { PrintStream out = new PrintStream(getLogStream()); Option.printProperties(out); compilerFactory.printProperties(out); + System.exit(0); } if (Option.PrintConfig.getBoolean()) { diff --git a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotResolvedJavaMethodImpl.java b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotResolvedJavaMethodImpl.java index 091fdcc7847..eaace1b2331 100644 --- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotResolvedJavaMethodImpl.java +++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotResolvedJavaMethodImpl.java @@ -33,8 +33,10 @@ import static jdk.vm.ci.hotspot.UnsafeAccess.UNSAFE; import java.lang.annotation.Annotation; import java.lang.reflect.Executable; +import java.lang.reflect.Method; import java.lang.reflect.Modifier; import java.lang.reflect.Type; +import java.util.Arrays; import java.util.HashMap; import java.util.Map; @@ -459,6 +461,22 @@ final class HotSpotResolvedJavaMethodImpl extends HotSpotMethod implements HotSp return constantPool; } + @Override + public Parameter[] getParameters() { + Executable javaMethod = toJava(); + if (javaMethod == null) { + return null; + } + + java.lang.reflect.Parameter[] javaParameters = javaMethod.getParameters(); + Parameter[] res = new Parameter[javaParameters.length]; + for (int i = 0; i < res.length; i++) { + java.lang.reflect.Parameter src = javaParameters[i]; + res[i] = new Parameter(src.getName(), src.getModifiers(), this, i); + } + return res; + } + @Override public Annotation[][] getParameterAnnotations() { Executable javaMethod = toJava(); @@ -529,13 +547,31 @@ final class HotSpotResolvedJavaMethodImpl extends HotSpotMethod implements HotSp return result; } + private static Method searchMethods(Method[] methods, String name, Class returnType, Class[] parameterTypes) { + for (Method m : methods) { + if (m.getName().equals(name) && returnType.equals(m.getReturnType()) && Arrays.equals(m.getParameterTypes(), parameterTypes)) { + return m; + } + } + return null; + } + private Executable toJava() { if (toJavaCache != null) { return toJavaCache; } try { Class[] parameterTypes = signatureToTypes(); - Executable result = isConstructor() ? holder.mirror().getDeclaredConstructor(parameterTypes) : holder.mirror().getDeclaredMethod(name, parameterTypes); + Class returnType = ((HotSpotResolvedJavaType) getSignature().getReturnType(holder).resolve(holder)).mirror(); + + Executable result; + if (isConstructor()) { + result = holder.mirror().getDeclaredConstructor(parameterTypes); + } else { + // Do not use Method.getDeclaredMethod() as it can return a bridge method + // when this.isBridge() is false and vice versa. + result = searchMethods(holder.mirror().getDeclaredMethods(), name, returnType, parameterTypes); + } toJavaCache = result; return result; } catch (NoSuchMethodException | NoClassDefFoundError e) { diff --git a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.meta/src/jdk/vm/ci/meta/ResolvedJavaMethod.java b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.meta/src/jdk/vm/ci/meta/ResolvedJavaMethod.java index 5a56981e9e4..545b44bda75 100644 --- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.meta/src/jdk/vm/ci/meta/ResolvedJavaMethod.java +++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.meta/src/jdk/vm/ci/meta/ResolvedJavaMethod.java @@ -26,6 +26,7 @@ import java.lang.annotation.Annotation; import java.lang.reflect.AnnotatedElement; import java.lang.reflect.Array; import java.lang.reflect.Method; +import java.lang.reflect.Modifier; import java.lang.reflect.Type; /** @@ -173,6 +174,133 @@ public interface ResolvedJavaMethod extends JavaMethod, InvokeTarget, ModifiersP */ ConstantPool getConstantPool(); + /** + * A {@code Parameter} provides information about method parameters. + */ + public static class Parameter implements AnnotatedElement { + private final String name; + private final ResolvedJavaMethod method; + private final int modifiers; + private final int index; + + /** + * Constructor for {@code Parameter}. + * + * @param name the name of the parameter + * @param modifiers the modifier flags for the parameter + * @param method the method which defines this parameter + * @param index the index of the parameter + */ + public Parameter(String name, + int modifiers, + ResolvedJavaMethod method, + int index) { + this.name = name; + this.modifiers = modifiers; + this.method = method; + this.index = index; + } + + /** + * Gets the name of the parameter. + */ + public String getName() { + return name; + } + + /** + * Gets the method declaring the parameter. + */ + public ResolvedJavaMethod getDeclaringMethod() { + return method; + } + + /** + * Get the modifier flags for the parameter + */ + public int getModifiers() { + return modifiers; + } + + /** + * Gets the kind of the parameter. + */ + public JavaKind getKind() { + return method.getSignature().getParameterKind(index); + } + + /** + * Gets the formal type of the parameter. + */ + public Type getParameterizedType() { + return method.getGenericParameterTypes()[index]; + } + + /** + * Gets the type of the parameter. + */ + public JavaType getType() { + return method.getSignature().getParameterType(index, method.getDeclaringClass()); + } + + /** + * Determines if the parameter represents a variable argument list. + */ + public boolean isVarArgs() { + return method.isVarArgs() && index == method.getSignature().getParameterCount(false) - 1; + } + + public T getAnnotation(Class annotationClass) { + return method.getParameterAnnotations(annotationClass)[index]; + } + + public Annotation[] getAnnotations() { + return method.getParameterAnnotations()[index]; + } + + public Annotation[] getDeclaredAnnotations() { + return getAnnotations(); + } + + @Override + public String toString() { + Type type = getParameterizedType(); + String typename = type.getTypeName(); + if (isVarArgs()) { + typename = typename.replaceFirst("\\[\\]$", "..."); + } + + final StringBuilder sb = new StringBuilder(Modifier.toString(getModifiers())); + if (sb.length() != 0) { + sb.append(' '); + } + return sb.append(typename).append(' ').append(getName()).toString(); + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof Parameter) { + Parameter other = (Parameter) obj; + return (other.method.equals(method) && other.index == index); + } + return false; + } + + @Override + public int hashCode() { + return method.hashCode() ^ index; + } + } + + /** + * Returns an array of {@code Parameter} objects that represent all the parameters to this + * method. Returns an array of length 0 if this method has no parameters. Returns {@code null} + * if the parameter information is unavailable. + */ + default Parameter[] getParameters() { + return null; + } + /** * Returns an array of arrays that represent the annotations on the formal parameters, in * declaration order, of this method. diff --git a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.services/src/jdk/vm/ci/services/Services.java b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.services/src/jdk/vm/ci/services/Services.java index 81689cf00be..dbeb7fbf6b0 100644 --- a/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.services/src/jdk/vm/ci/services/Services.java +++ b/hotspot/src/jdk.vm.ci/share/classes/jdk.vm.ci.services/src/jdk/vm/ci/services/Services.java @@ -22,7 +22,8 @@ */ package jdk.vm.ci.services; -import java.lang.reflect.Module; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.util.Formatter; import java.util.Iterator; import java.util.ServiceConfigurationError; @@ -36,11 +37,62 @@ public final class Services { private Services() { } + private static int getJavaSpecificationVersion() { + String value = System.getProperty("java.specification.version"); + if (value.startsWith("1.")) { + value = value.substring(2); + } + return Integer.parseInt(value); + } + + /** + * The integer value corresponding to the value of the {@code java.specification.version} system + * property after any leading {@code "1."} has been stripped. + */ + public static final int JAVA_SPECIFICATION_VERSION = getJavaSpecificationVersion(); + + // Use reflection so that this compiles on Java 8 + private static final Method getModule; + private static final Method getPackages; + private static final Method addUses; + private static final Method isExported; + private static final Method addExports; + + static { + if (JAVA_SPECIFICATION_VERSION >= 9) { + try { + getModule = Class.class.getMethod("getModule"); + Class moduleClass = getModule.getReturnType(); + getPackages = moduleClass.getMethod("getPackages"); + addUses = moduleClass.getMethod("addUses", Class.class); + isExported = moduleClass.getMethod("isExported", String.class, moduleClass); + addExports = moduleClass.getMethod("addExports", String.class, moduleClass); + } catch (NoSuchMethodException | SecurityException e) { + throw new InternalError(e); + } + } else { + getModule = null; + getPackages = null; + addUses = null; + isExported = null; + addExports = null; + } + } + + @SuppressWarnings("unchecked") + static T invoke(Method method, Object receiver, Object... args) { + try { + return (T) method.invoke(receiver, args); + } catch (IllegalAccessException | IllegalArgumentException | InvocationTargetException e) { + throw new InternalError(e); + } + } + /** * Performs any required security checks and dynamic reconfiguration to allow the module of a * given class to access the classes in the JVMCI module. * - * Note: This API uses {@link Class} instead of {@link Module} to provide backwards + * Note: This API uses {@link Class} instead of {@code Module} to provide backwards * compatibility for JVMCI clients compiled against a JDK release earlier than 9. * * @param requestor a class requesting access to the JVMCI module for its module @@ -52,15 +104,19 @@ public final class Services { if (sm != null) { sm.checkPermission(new JVMCIPermission()); } - Module jvmci = Services.class.getModule(); - Module requestorModule = requestor.getModule(); - if (jvmci != requestorModule) { - for (String pkg : jvmci.getPackages()) { - // Export all JVMCI packages dynamically instead - // of requiring a long list of --add-exports - // options on the JVM command line. - if (!jvmci.isExported(pkg, requestorModule)) { - jvmci.addExports(pkg, requestorModule); + if (JAVA_SPECIFICATION_VERSION >= 9) { + Object jvmci = invoke(getModule, Services.class); + Object requestorModule = invoke(getModule, requestor); + if (jvmci != requestorModule) { + String[] packages = invoke(getPackages, jvmci); + for (String pkg : packages) { + // Export all JVMCI packages dynamically instead + // of requiring a long list of --add-exports + // options on the JVM command line. + boolean exported = invoke(isExported, jvmci, pkg, requestorModule); + if (!exported) { + invoke(addExports, jvmci, pkg, requestorModule); + } } } } @@ -77,8 +133,10 @@ public final class Services { if (sm != null) { sm.checkPermission(new JVMCIPermission()); } - Module jvmci = Services.class.getModule(); - jvmci.addUses(service); + if (JAVA_SPECIFICATION_VERSION >= 9) { + Object jvmci = invoke(getModule, Services.class); + invoke(addUses, jvmci, service); + } // Restrict JVMCI clients to be on the class path or module path return ServiceLoader.load(service, ClassLoader.getSystemClassLoader()); @@ -98,8 +156,10 @@ public final class Services { if (sm != null) { sm.checkPermission(new JVMCIPermission()); } - Module jvmci = Services.class.getModule(); - jvmci.addUses(service); + if (JAVA_SPECIFICATION_VERSION >= 9) { + Object jvmci = invoke(getModule, Services.class); + invoke(addUses, jvmci, service); + } // Restrict JVMCI clients to be on the class path or module path Iterable providers = ServiceLoader.load(service, ClassLoader.getSystemClassLoader()); S singleProvider = null; diff --git a/hotspot/src/os/aix/vm/globals_aix.hpp b/hotspot/src/os/aix/vm/globals_aix.hpp index 8f42e1305fe..7cf6bb93425 100644 --- a/hotspot/src/os/aix/vm/globals_aix.hpp +++ b/hotspot/src/os/aix/vm/globals_aix.hpp @@ -61,7 +61,7 @@ /* data segment when placing the java heap. If that space is too small, we */ \ /* reduce our chance of getting a low heap address (needed for compressed */ \ /* Oops). */ \ - product(uintx, MaxExpectedDataSegmentSize, (SIZE_4G * 2), \ + product(uintx, MaxExpectedDataSegmentSize, 8*G, \ "Maximum expected Data Segment Size.") \ \ /* Use optimized addresses for the polling page. */ \ diff --git a/hotspot/src/os/aix/vm/os_aix.cpp b/hotspot/src/os/aix/vm/os_aix.cpp index 47788262bdc..a87885e2273 100644 --- a/hotspot/src/os/aix/vm/os_aix.cpp +++ b/hotspot/src/os/aix/vm/os_aix.cpp @@ -321,7 +321,7 @@ size_t os::Aix::query_pagesize(void* addr) { if (os::Aix::on_pase() && os::Aix::os_version_short() < 0x0601) { // AS/400 older than V6R1: no vmgetinfo here, default to 4K - return SIZE_4K; + return 4*K; } vm_page_info pi; @@ -330,7 +330,7 @@ size_t os::Aix::query_pagesize(void* addr) { return pi.pagesize; } else { assert(false, "vmgetinfo failed to retrieve page size"); - return SIZE_4K; + return 4*K; } } @@ -351,10 +351,10 @@ void os::Aix::initialize_system_info() { // Helper function for tracing page sizes. static const char* describe_pagesize(size_t pagesize) { switch (pagesize) { - case SIZE_4K : return "4K"; - case SIZE_64K: return "64K"; - case SIZE_16M: return "16M"; - case SIZE_16G: return "16G"; + case 4*K : return "4K"; + case 64*K: return "64K"; + case 16*M: return "16M"; + case 16*G: return "16G"; default: assert(false, "surprise"); return "??"; @@ -372,14 +372,14 @@ static void query_multipage_support() { g_multipage_support.pagesize = ::sysconf(_SC_PAGESIZE); // This really would surprise me. - assert(g_multipage_support.pagesize == SIZE_4K, "surprise!"); + assert(g_multipage_support.pagesize == 4*K, "surprise!"); // Query default data page size (default page size for C-Heap, pthread stacks and .bss). // Default data page size is defined either by linker options (-bdatapsize) // or by environment variable LDR_CNTRL (suboption DATAPSIZE). If none is given, // default should be 4K. { - void* p = ::malloc(SIZE_16M); + void* p = ::malloc(16*M); g_multipage_support.datapsize = os::Aix::query_pagesize(p); ::free(p); } @@ -447,7 +447,7 @@ static void query_multipage_support() { // Can we use 64K, 16M pages? for (int i = 0; i < num_psizes; i ++) { const size_t pagesize = sizes[i]; - if (pagesize != SIZE_64K && pagesize != SIZE_16M) { + if (pagesize != 64*K && pagesize != 16*M) { continue; } bool can_use = false; @@ -477,9 +477,9 @@ static void query_multipage_support() { ::shmdt(p); } trcVerbose("Can use: %s", (can_use ? "yes" : "no")); - if (pagesize == SIZE_64K) { + if (pagesize == 64*K) { g_multipage_support.can_use_64K_pages = can_use; - } else if (pagesize == SIZE_16M) { + } else if (pagesize == 16*M) { g_multipage_support.can_use_16M_pages = can_use; } } @@ -506,11 +506,11 @@ query_multipage_support_end: g_multipage_support.error); // sanity checks - assert0(g_multipage_support.pagesize == SIZE_4K); - assert0(g_multipage_support.datapsize == SIZE_4K || g_multipage_support.datapsize == SIZE_64K); - assert0(g_multipage_support.textpsize == SIZE_4K || g_multipage_support.textpsize == SIZE_64K); + assert0(g_multipage_support.pagesize == 4*K); + assert0(g_multipage_support.datapsize == 4*K || g_multipage_support.datapsize == 64*K); + assert0(g_multipage_support.textpsize == 4*K || g_multipage_support.textpsize == 64*K); assert0(g_multipage_support.pthr_stack_pagesize == g_multipage_support.datapsize); - assert0(g_multipage_support.shmpsize == SIZE_4K || g_multipage_support.shmpsize == SIZE_64K); + assert0(g_multipage_support.shmpsize == 4*K || g_multipage_support.shmpsize == 64*K); } @@ -1924,7 +1924,7 @@ static char* reserve_shmated_memory ( } // Align size of shm up to 64K to avoid errors if we later try to change the page size. - const size_t size = align_size_up(bytes, SIZE_64K); + const size_t size = align_size_up(bytes, 64*K); // Reserve the shared segment. int shmid = shmget(IPC_PRIVATE, size, IPC_CREAT | S_IRUSR | S_IWUSR); @@ -1941,10 +1941,10 @@ static char* reserve_shmated_memory ( struct shmid_ds shmbuf; memset(&shmbuf, 0, sizeof(shmbuf)); - shmbuf.shm_pagesize = SIZE_64K; + shmbuf.shm_pagesize = 64*K; if (shmctl(shmid, SHM_PAGESIZE, &shmbuf) != 0) { trcVerbose("Failed to set page size (need " UINTX_FORMAT " 64K pages) - shmctl failed with %d.", - size / SIZE_64K, errno); + size / (64*K), errno); // I want to know if this ever happens. assert(false, "failed to set page size for shmat"); } @@ -2122,7 +2122,7 @@ static char* reserve_mmaped_memory(size_t bytes, char* requested_addr, size_t al } // bookkeeping - vmembk_add(addr, size, SIZE_4K, VMEM_MAPPED); + vmembk_add(addr, size, 4*K, VMEM_MAPPED); // Test alignment, see above. assert0(is_aligned_to(addr, os::vm_page_size())); @@ -2218,7 +2218,7 @@ bool os::pd_commit_memory(char* addr, size_t size, bool exec) { if (UseExplicitCommit) { // AIX commits memory on touch. So, touch all pages to be committed. - for (char* p = addr; p < (addr + size); p += SIZE_4K) { + for (char* p = addr; p < (addr + size); p += 4*K) { *p = '\0'; } } @@ -2330,7 +2330,7 @@ char* os::pd_reserve_memory(size_t bytes, char* requested_addr, size_t alignment // In 4K mode always use mmap. // In 64K mode allocate small sizes with mmap, large ones with 64K shmatted. - if (os::vm_page_size() == SIZE_4K) { + if (os::vm_page_size() == 4*K) { return reserve_mmaped_memory(bytes, requested_addr, alignment_hint); } else { if (bytes >= Use64KPagesThreshold) { @@ -2519,7 +2519,7 @@ char* os::pd_attempt_reserve_memory_at(size_t bytes, char* requested_addr) { // In 4K mode always use mmap. // In 64K mode allocate small sizes with mmap, large ones with 64K shmatted. - if (os::vm_page_size() == SIZE_4K) { + if (os::vm_page_size() == 4*K) { return reserve_mmaped_memory(bytes, requested_addr, 0); } else { if (bytes >= Use64KPagesThreshold) { @@ -3399,7 +3399,7 @@ void os::init(void) { // We explicitly leave no option to change page size, because only upgrading would work, // not downgrading (if stack page size is 64k you cannot pretend its 4k). - if (g_multipage_support.datapsize == SIZE_4K) { + if (g_multipage_support.datapsize == 4*K) { // datapsize = 4K. Data segment, thread stacks are 4K paged. if (g_multipage_support.can_use_64K_pages) { // .. but we are able to use 64K pages dynamically. @@ -3414,16 +3414,16 @@ void os::init(void) { // -XX:-Use64KPages. if (Use64KPages) { trcVerbose("64K page mode (faked for data segment)"); - Aix::_page_size = SIZE_64K; + Aix::_page_size = 64*K; } else { trcVerbose("4K page mode (Use64KPages=off)"); - Aix::_page_size = SIZE_4K; + Aix::_page_size = 4*K; } } else { // .. and not able to allocate 64k pages dynamically. Here, just // fall back to 4K paged mode and use mmap for everything. trcVerbose("4K page mode"); - Aix::_page_size = SIZE_4K; + Aix::_page_size = 4*K; FLAG_SET_ERGO(bool, Use64KPages, false); } } else { @@ -3432,7 +3432,7 @@ void os::init(void) { // (There is one special case where this may be false: EXTSHM=on. // but we decided to not support that mode). assert0(g_multipage_support.can_use_64K_pages); - Aix::_page_size = SIZE_64K; + Aix::_page_size = 64*K; trcVerbose("64K page mode"); FLAG_SET_ERGO(bool, Use64KPages, true); } diff --git a/hotspot/src/os/linux/vm/os_linux.cpp b/hotspot/src/os/linux/vm/os_linux.cpp index e03ac29102e..a59f52041e9 100644 --- a/hotspot/src/os/linux/vm/os_linux.cpp +++ b/hotspot/src/os/linux/vm/os_linux.cpp @@ -291,7 +291,7 @@ void os::init_system_properties_values() { // 1: ... // ... // 7: The default directories, normally /lib and /usr/lib. -#if defined(AMD64) || defined(_LP64) && (defined(SPARC) || defined(PPC) || defined(S390)) +#if defined(AMD64) || (defined(_LP64) && defined(SPARC)) || defined(PPC64) || defined(S390) #define DEFAULT_LIBPATH "/usr/lib64:/lib64:/lib:/usr/lib" #else #define DEFAULT_LIBPATH "/lib:/usr/lib" @@ -1212,8 +1212,8 @@ void os::Linux::clock_init() { } #ifndef SYS_clock_getres - #if defined(IA32) || defined(AMD64) - #define SYS_clock_getres IA32_ONLY(266) AMD64_ONLY(229) + #if defined(X86) || defined(PPC64) || defined(S390) + #define SYS_clock_getres AMD64_ONLY(229) IA32_ONLY(266) PPC64_ONLY(247) S390_ONLY(261) #define sys_clock_getres(x,y) ::syscall(SYS_clock_getres, x, y) #else #warning "SYS_clock_getres not defined for this platform, disabling fast_thread_cpu_time" @@ -1766,6 +1766,8 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) { static Elf32_Half running_arch_code=EM_PPC64; #elif (defined __powerpc__) static Elf32_Half running_arch_code=EM_PPC; +#elif (defined AARCH64) + static Elf32_Half running_arch_code=EM_AARCH64; #elif (defined ARM) static Elf32_Half running_arch_code=EM_ARM; #elif (defined S390) @@ -1780,11 +1782,9 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) { static Elf32_Half running_arch_code=EM_MIPS; #elif (defined M68K) static Elf32_Half running_arch_code=EM_68K; -#elif (defined AARCH64) - static Elf32_Half running_arch_code=EM_AARCH64; #else #error Method os::dll_load requires that one of following is defined:\ - IA32, AMD64, IA64, __sparc, __powerpc__, ARM, S390, ALPHA, MIPS, MIPSEL, PARISC, M68K, AARCH64 + AARCH64, ALPHA, ARM, AMD64, IA32, IA64, M68K, MIPS, MIPSEL, PARISC, __powerpc__, __powerpc64__, S390, __sparc #endif // Identify compatability class for VM's architecture and library's architecture @@ -2192,10 +2192,12 @@ void os::pd_print_cpu_info(outputStream* st, char* buf, size_t buflen) { #if defined(AMD64) || defined(IA32) || defined(X32) const char* search_string = "model name"; -#elif defined(SPARC) -const char* search_string = "cpu"; #elif defined(PPC64) const char* search_string = "cpu"; +#elif defined(S390) +const char* search_string = "processor"; +#elif defined(SPARC) +const char* search_string = "cpu"; #else const char* search_string = "Processor"; #endif @@ -2233,20 +2235,22 @@ void os::get_summary_cpu_info(char* cpuinfo, size_t length) { } // cpuinfo not found or parsing failed, just print generic string. The entire // /proc/cpuinfo file will be printed later in the file (or enough of it for x86) -#if defined(AMD64) +#if defined(AARCH64) + strncpy(cpuinfo, "AArch64", length); +#elif defined(AMD64) strncpy(cpuinfo, "x86_64", length); +#elif defined(ARM) // Order wrt. AARCH64 is relevant! + strncpy(cpuinfo, "ARM", length); #elif defined(IA32) strncpy(cpuinfo, "x86_32", length); #elif defined(IA64) strncpy(cpuinfo, "IA64", length); -#elif defined(SPARC) - strncpy(cpuinfo, "sparcv9", length); -#elif defined(AARCH64) - strncpy(cpuinfo, "AArch64", length); -#elif defined(ARM) - strncpy(cpuinfo, "ARM", length); #elif defined(PPC) strncpy(cpuinfo, "PPC64", length); +#elif defined(S390) + strncpy(cpuinfo, "S390", length); +#elif defined(SPARC) + strncpy(cpuinfo, "sparcv9", length); #elif defined(ZERO_LIBARCH) strncpy(cpuinfo, ZERO_LIBARCH, length); #else @@ -3242,8 +3246,15 @@ size_t os::Linux::find_large_page_size() { // the processor. #ifndef ZERO - large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M) - ARM32_ONLY(2 * M) PPC_ONLY(4 * M) AARCH64_ONLY(2 * M); + large_page_size = + AARCH64_ONLY(2 * M) + AMD64_ONLY(2 * M) + ARM32_ONLY(2 * M) + IA32_ONLY(4 * M) + IA64_ONLY(256 * M) + PPC_ONLY(4 * M) + S390_ONLY(1 * M) + SPARC_ONLY(4 * M); #endif // ZERO FILE *fp = fopen("/proc/meminfo", "r"); diff --git a/hotspot/src/os/posix/vm/os_posix.cpp b/hotspot/src/os/posix/vm/os_posix.cpp index 8ff039e8644..bf95c2af497 100644 --- a/hotspot/src/os/posix/vm/os_posix.cpp +++ b/hotspot/src/os/posix/vm/os_posix.cpp @@ -188,6 +188,10 @@ int os::get_fileno(FILE* fp) { return NOT_AIX(::)fileno(fp); } +struct tm* os::gmtime_pd(const time_t* clock, struct tm* res) { + return gmtime_r(clock, res); +} + void os::Posix::print_load_average(outputStream* st) { st->print("load average:"); double loadavg[3]; diff --git a/hotspot/src/os/windows/vm/os_windows.cpp b/hotspot/src/os/windows/vm/os_windows.cpp index d27e502ccc1..5142cd37a25 100644 --- a/hotspot/src/os/windows/vm/os_windows.cpp +++ b/hotspot/src/os/windows/vm/os_windows.cpp @@ -403,6 +403,15 @@ struct tm* os::localtime_pd(const time_t* clock, struct tm* res) { return NULL; } +struct tm* os::gmtime_pd(const time_t* clock, struct tm* res) { + const struct tm* time_struct_ptr = gmtime(clock); + if (time_struct_ptr != NULL) { + *res = *time_struct_ptr; + return res; + } + return NULL; +} + LONG WINAPI topLevelExceptionFilter(struct _EXCEPTION_POINTERS* exceptionInfo); // Thread start routine for all newly created threads diff --git a/hotspot/src/os_cpu/linux_s390/vm/atomic_linux_s390.hpp b/hotspot/src/os_cpu/linux_s390/vm/atomic_linux_s390.hpp new file mode 100644 index 00000000000..0ed4053852b --- /dev/null +++ b/hotspot/src/os_cpu/linux_s390/vm/atomic_linux_s390.hpp @@ -0,0 +1,527 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_S390_VM_ATOMIC_LINUX_S390_INLINE_HPP +#define OS_CPU_LINUX_S390_VM_ATOMIC_LINUX_S390_INLINE_HPP + +#include "runtime/atomic.hpp" +#include "runtime/os.hpp" +#include "vm_version_s390.hpp" + +// Note that the compare-and-swap instructions on System z perform +// a serialization function before the storage operand is fetched +// and again after the operation is completed. +// +// Used constraint modifiers: +// = write-only access: Value on entry to inline-assembler code irrelevant. +// + read/write access: Value on entry is used; on exit value is changed. +// read-only access: Value on entry is used and never changed. +// & early-clobber access: Might be modified before all read-only operands +// have been used. +// a address register operand (not GR0). +// d general register operand (including GR0) +// Q memory operand w/o index register. +// 0..9 operand reference (by operand position). +// Used for operands that fill multiple roles. One example would be a +// write-only operand receiving its initial value from a read-only operand. +// Refer to cmpxchg(..) operand #0 and variable cmp_val for a real-life example. +// + +// On System z, all store operations are atomic if the address where the data is stored into +// is an integer multiple of the data length. Furthermore, all stores are ordered: +// a store which occurs conceptually before another store becomes visible to other CPUs +// before the other store becomes visible. +inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; } +inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; } +inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; } +inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; } +inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; } +inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; } + +inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; } +inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; } +inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; } +inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; } +inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; } +inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; } + + +//------------ +// Atomic::add +//------------ +// These methods force the value in memory to be augmented by the passed increment. +// Both, memory value and increment, are treated as 32bit signed binary integers. +// No overflow exceptions are recognized, and the condition code does not hold +// information about the value in memory. +// +// The value in memory is updated by using a compare-and-swap instruction. The +// instruction is retried as often as required. +// +// The return value of the method is the value that was successfully stored. At the +// time the caller receives back control, the value in memory may have changed already. + +inline jint Atomic::add(jint inc, volatile jint*dest) { + unsigned int old, upd; + + if (VM_Version::has_LoadAndALUAtomicV1()) { + __asm__ __volatile__ ( + " LGFR 0,%[inc] \n\t" // save increment + " LA 3,%[mem] \n\t" // force data address into ARG2 +// " LAA %[upd],%[inc],%[mem] \n\t" // increment and get old value +// " LAA 2,0,0(3) \n\t" // actually coded instruction + " .byte 0xeb \n\t" // LAA main opcode + " .byte 0x20 \n\t" // R1,R3 + " .byte 0x30 \n\t" // R2,disp1 + " .byte 0x00 \n\t" // disp2,disp3 + " .byte 0x00 \n\t" // disp4,disp5 + " .byte 0xf8 \n\t" // LAA minor opcode + " AR 2,0 \n\t" // calc new value in register + " LR %[upd],2 \n\t" // move to result register + //---< outputs >--- + : [upd] "=&d" (upd) // write-only, updated counter value + , [mem] "+Q" (*dest) // read/write, memory to be updated atomically + //---< inputs >--- + : [inc] "a" (inc) // read-only. + //---< clobbered >--- + : "cc", "r0", "r2", "r3" + ); + } else { + __asm__ __volatile__ ( + " LLGF %[old],%[mem] \n\t" // get old value + "0: LA %[upd],0(%[inc],%[old]) \n\t" // calc result + " CS %[old],%[upd],%[mem] \n\t" // try to xchg res with mem + " JNE 0b \n\t" // no success? -> retry + //---< outputs >--- + : [old] "=&a" (old) // write-only, old counter value + , [upd] "=&d" (upd) // write-only, updated counter value + , [mem] "+Q" (*dest) // read/write, memory to be updated atomically + //---< inputs >--- + : [inc] "a" (inc) // read-only. + //---< clobbered >--- + : "cc" + ); + } + + return (jint)upd; +} + + +inline intptr_t Atomic::add_ptr(intptr_t inc, volatile intptr_t* dest) { + unsigned long old, upd; + + if (VM_Version::has_LoadAndALUAtomicV1()) { + __asm__ __volatile__ ( + " LGR 0,%[inc] \n\t" // save increment + " LA 3,%[mem] \n\t" // force data address into ARG2 +// " LAAG %[upd],%[inc],%[mem] \n\t" // increment and get old value +// " LAAG 2,0,0(3) \n\t" // actually coded instruction + " .byte 0xeb \n\t" // LAA main opcode + " .byte 0x20 \n\t" // R1,R3 + " .byte 0x30 \n\t" // R2,disp1 + " .byte 0x00 \n\t" // disp2,disp3 + " .byte 0x00 \n\t" // disp4,disp5 + " .byte 0xe8 \n\t" // LAA minor opcode + " AGR 2,0 \n\t" // calc new value in register + " LGR %[upd],2 \n\t" // move to result register + //---< outputs >--- + : [upd] "=&d" (upd) // write-only, updated counter value + , [mem] "+Q" (*dest) // read/write, memory to be updated atomically + //---< inputs >--- + : [inc] "a" (inc) // read-only. + //---< clobbered >--- + : "cc", "r0", "r2", "r3" + ); + } else { + __asm__ __volatile__ ( + " LG %[old],%[mem] \n\t" // get old value + "0: LA %[upd],0(%[inc],%[old]) \n\t" // calc result + " CSG %[old],%[upd],%[mem] \n\t" // try to xchg res with mem + " JNE 0b \n\t" // no success? -> retry + //---< outputs >--- + : [old] "=&a" (old) // write-only, old counter value + , [upd] "=&d" (upd) // write-only, updated counter value + , [mem] "+Q" (*dest) // read/write, memory to be updated atomically + //---< inputs >--- + : [inc] "a" (inc) // read-only. + //---< clobbered >--- + : "cc" + ); + } + + return (intptr_t)upd; +} + +inline void* Atomic::add_ptr(intptr_t add_value, volatile void* dest) { + return (void*)add_ptr(add_value, (volatile intptr_t*)dest); +} + + +//------------ +// Atomic::inc +//------------ +// These methods force the value in memory to be incremented (augmented by 1). +// Both, memory value and increment, are treated as 32bit signed binary integers. +// No overflow exceptions are recognized, and the condition code does not hold +// information about the value in memory. +// +// The value in memory is updated by using a compare-and-swap instruction. The +// instruction is retried as often as required. + +inline void Atomic::inc(volatile jint* dest) { + unsigned int old, upd; + + if (VM_Version::has_LoadAndALUAtomicV1()) { +// tty->print_cr("Atomic::inc called... dest @%p", dest); + __asm__ __volatile__ ( + " LGHI 2,1 \n\t" // load increment + " LA 3,%[mem] \n\t" // force data address into ARG2 +// " LAA %[upd],%[inc],%[mem] \n\t" // increment and get old value +// " LAA 2,2,0(3) \n\t" // actually coded instruction + " .byte 0xeb \n\t" // LAA main opcode + " .byte 0x22 \n\t" // R1,R3 + " .byte 0x30 \n\t" // R2,disp1 + " .byte 0x00 \n\t" // disp2,disp3 + " .byte 0x00 \n\t" // disp4,disp5 + " .byte 0xf8 \n\t" // LAA minor opcode + " AGHI 2,1 \n\t" // calc new value in register + " LR %[upd],2 \n\t" // move to result register + //---< outputs >--- + : [upd] "=&d" (upd) // write-only, updated counter value + , [mem] "+Q" (*dest) // read/write, memory to be updated atomically + //---< inputs >--- + : +// : [inc] "a" (inc) // read-only. + //---< clobbered >--- + : "cc", "r2", "r3" + ); + } else { + __asm__ __volatile__ ( + " LLGF %[old],%[mem] \n\t" // get old value + "0: LA %[upd],1(,%[old]) \n\t" // calc result + " CS %[old],%[upd],%[mem] \n\t" // try to xchg res with mem + " JNE 0b \n\t" // no success? -> retry + //---< outputs >--- + : [old] "=&a" (old) // write-only, old counter value + , [upd] "=&d" (upd) // write-only, updated counter value + , [mem] "+Q" (*dest) // read/write, memory to be updated atomically + //---< inputs >--- + : + //---< clobbered >--- + : "cc" + ); + } +} + +inline void Atomic::inc_ptr(volatile intptr_t* dest) { + unsigned long old, upd; + + if (VM_Version::has_LoadAndALUAtomicV1()) { + __asm__ __volatile__ ( + " LGHI 2,1 \n\t" // load increment + " LA 3,%[mem] \n\t" // force data address into ARG2 +// " LAAG %[upd],%[inc],%[mem] \n\t" // increment and get old value +// " LAAG 2,2,0(3) \n\t" // actually coded instruction + " .byte 0xeb \n\t" // LAA main opcode + " .byte 0x22 \n\t" // R1,R3 + " .byte 0x30 \n\t" // R2,disp1 + " .byte 0x00 \n\t" // disp2,disp3 + " .byte 0x00 \n\t" // disp4,disp5 + " .byte 0xe8 \n\t" // LAA minor opcode + " AGHI 2,1 \n\t" // calc new value in register + " LR %[upd],2 \n\t" // move to result register + //---< outputs >--- + : [upd] "=&d" (upd) // write-only, updated counter value + , [mem] "+Q" (*dest) // read/write, memory to be updated atomically + //---< inputs >--- + : +// : [inc] "a" (inc) // read-only. + //---< clobbered >--- + : "cc", "r2", "r3" + ); + } else { + __asm__ __volatile__ ( + " LG %[old],%[mem] \n\t" // get old value + "0: LA %[upd],1(,%[old]) \n\t" // calc result + " CSG %[old],%[upd],%[mem] \n\t" // try to xchg res with mem + " JNE 0b \n\t" // no success? -> retry + //---< outputs >--- + : [old] "=&a" (old) // write-only, old counter value + , [upd] "=&d" (upd) // write-only, updated counter value + , [mem] "+Q" (*dest) // read/write, memory to be updated atomically + //---< inputs >--- + : + //---< clobbered >--- + : "cc" + ); + } +} + +inline void Atomic::inc_ptr(volatile void* dest) { + inc_ptr((volatile intptr_t*)dest); +} + +//------------ +// Atomic::dec +//------------ +// These methods force the value in memory to be decremented (augmented by -1). +// Both, memory value and decrement, are treated as 32bit signed binary integers. +// No overflow exceptions are recognized, and the condition code does not hold +// information about the value in memory. +// +// The value in memory is updated by using a compare-and-swap instruction. The +// instruction is retried as often as required. + +inline void Atomic::dec(volatile jint* dest) { + unsigned int old, upd; + + if (VM_Version::has_LoadAndALUAtomicV1()) { + __asm__ __volatile__ ( + " LGHI 2,-1 \n\t" // load increment + " LA 3,%[mem] \n\t" // force data address into ARG2 +// " LAA %[upd],%[inc],%[mem] \n\t" // increment and get old value +// " LAA 2,2,0(3) \n\t" // actually coded instruction + " .byte 0xeb \n\t" // LAA main opcode + " .byte 0x22 \n\t" // R1,R3 + " .byte 0x30 \n\t" // R2,disp1 + " .byte 0x00 \n\t" // disp2,disp3 + " .byte 0x00 \n\t" // disp4,disp5 + " .byte 0xf8 \n\t" // LAA minor opcode + " AGHI 2,-1 \n\t" // calc new value in register + " LR %[upd],2 \n\t" // move to result register + //---< outputs >--- + : [upd] "=&d" (upd) // write-only, updated counter value + , [mem] "+Q" (*dest) // read/write, memory to be updated atomically + //---< inputs >--- + : +// : [inc] "a" (inc) // read-only. + //---< clobbered >--- + : "cc", "r2", "r3" + ); + } else { + __asm__ __volatile__ ( + " LLGF %[old],%[mem] \n\t" // get old value + // LAY not supported by inline assembler + // "0: LAY %[upd],-1(,%[old]) \n\t" // calc result + "0: LR %[upd],%[old] \n\t" // calc result + " AHI %[upd],-1 \n\t" + " CS %[old],%[upd],%[mem] \n\t" // try to xchg res with mem + " JNE 0b \n\t" // no success? -> retry + //---< outputs >--- + : [old] "=&a" (old) // write-only, old counter value + , [upd] "=&d" (upd) // write-only, updated counter value + , [mem] "+Q" (*dest) // read/write, memory to be updated atomically + //---< inputs >--- + : + //---< clobbered >--- + : "cc" + ); + } +} + +inline void Atomic::dec_ptr(volatile intptr_t* dest) { + unsigned long old, upd; + + if (VM_Version::has_LoadAndALUAtomicV1()) { + __asm__ __volatile__ ( + " LGHI 2,-1 \n\t" // load increment + " LA 3,%[mem] \n\t" // force data address into ARG2 +// " LAAG %[upd],%[inc],%[mem] \n\t" // increment and get old value +// " LAAG 2,2,0(3) \n\t" // actually coded instruction + " .byte 0xeb \n\t" // LAA main opcode + " .byte 0x22 \n\t" // R1,R3 + " .byte 0x30 \n\t" // R2,disp1 + " .byte 0x00 \n\t" // disp2,disp3 + " .byte 0x00 \n\t" // disp4,disp5 + " .byte 0xe8 \n\t" // LAA minor opcode + " AGHI 2,-1 \n\t" // calc new value in register + " LR %[upd],2 \n\t" // move to result register + //---< outputs >--- + : [upd] "=&d" (upd) // write-only, updated counter value + , [mem] "+Q" (*dest) // read/write, memory to be updated atomically + //---< inputs >--- + : +// : [inc] "a" (inc) // read-only. + //---< clobbered >--- + : "cc", "r2", "r3" + ); + } else { + __asm__ __volatile__ ( + " LG %[old],%[mem] \n\t" // get old value +// LAY not supported by inline assembler +// "0: LAY %[upd],-1(,%[old]) \n\t" // calc result + "0: LGR %[upd],%[old] \n\t" // calc result + " AGHI %[upd],-1 \n\t" + " CSG %[old],%[upd],%[mem] \n\t" // try to xchg res with mem + " JNE 0b \n\t" // no success? -> retry + //---< outputs >--- + : [old] "=&a" (old) // write-only, old counter value + , [upd] "=&d" (upd) // write-only, updated counter value + , [mem] "+Q" (*dest) // read/write, memory to be updated atomically + //---< inputs >--- + : + //---< clobbered >--- + : "cc" + ); + } +} + +inline void Atomic::dec_ptr(volatile void* dest) { + dec_ptr((volatile intptr_t*)dest); +} + +//------------- +// Atomic::xchg +//------------- +// These methods force the value in memory to be replaced by the new value passed +// in as argument. +// +// The value in memory is replaced by using a compare-and-swap instruction. The +// instruction is retried as often as required. This makes sure that the new +// value can be seen, at least for a very short period of time, by other CPUs. +// +// If we would use a normal "load(old value) store(new value)" sequence, +// the new value could be lost unnoticed, due to a store(new value) from +// another thread. +// +// The return value is the (unchanged) value from memory as it was when the +// replacement succeeded. +inline jint Atomic::xchg (jint xchg_val, volatile jint* dest) { + unsigned int old; + + __asm__ __volatile__ ( + " LLGF %[old],%[mem] \n\t" // get old value + "0: CS %[old],%[upd],%[mem] \n\t" // try to xchg upd with mem + " JNE 0b \n\t" // no success? -> retry + //---< outputs >--- + : [old] "=&d" (old) // write-only, prev value irrelevant + , [mem] "+Q" (*dest) // read/write, memory to be updated atomically + //---< inputs >--- + : [upd] "d" (xchg_val) // read-only, value to be written to memory + //---< clobbered >--- + : "cc" + ); + + return (jint)old; +} + +inline intptr_t Atomic::xchg_ptr(intptr_t xchg_val, volatile intptr_t* dest) { + unsigned long old; + + __asm__ __volatile__ ( + " LG %[old],%[mem] \n\t" // get old value + "0: CSG %[old],%[upd],%[mem] \n\t" // try to xchg upd with mem + " JNE 0b \n\t" // no success? -> retry + //---< outputs >--- + : [old] "=&d" (old) // write-only, init from memory + , [mem] "+Q" (*dest) // read/write, memory to be updated atomically + //---< inputs >--- + : [upd] "d" (xchg_val) // read-only, value to be written to memory + //---< clobbered >--- + : "cc" + ); + + return (intptr_t)old; +} + +inline void *Atomic::xchg_ptr(void *exchange_value, volatile void *dest) { + return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest); +} + +//---------------- +// Atomic::cmpxchg +//---------------- +// These methods compare the value in memory with a given compare value. +// If both values compare equal, the value in memory is replaced with +// the exchange value. +// +// The value in memory is compared and replaced by using a compare-and-swap +// instruction. The instruction is NOT retried (one shot only). +// +// The return value is the (unchanged) value from memory as it was when the +// compare-and-swap instruction completed. A successful exchange operation +// is indicated by (return value == compare_value). If unsuccessful, a new +// exchange value can be calculated based on the return value which is the +// latest contents of the memory location. +// +// Inspecting the return value is the only way for the caller to determine +// if the compare-and-swap instruction was successful: +// - If return value and compare value compare equal, the compare-and-swap +// instruction was successful and the value in memory was replaced by the +// exchange value. +// - If return value and compare value compare unequal, the compare-and-swap +// instruction was not successful. The value in memory was left unchanged. +// +// The s390 processors always fence before and after the csg instructions. +// Thus we ignore the memory ordering argument. The docu says: "A serialization +// function is performed before the operand is fetched and again after the +// operation is completed." + +jint Atomic::cmpxchg(jint xchg_val, volatile jint* dest, jint cmp_val, cmpxchg_memory_order unused) { + unsigned long old; + + __asm__ __volatile__ ( + " CS %[old],%[upd],%[mem] \n\t" // Try to xchg upd with mem. + // outputs + : [old] "=&d" (old) // Write-only, prev value irrelevant. + , [mem] "+Q" (*dest) // Read/write, memory to be updated atomically. + // inputs + : [upd] "d" (xchg_val) + , "0" (cmp_val) // Read-only, initial value for [old] (operand #0). + // clobbered + : "cc" + ); + + return (jint)old; +} + +jlong Atomic::cmpxchg(jlong xchg_val, volatile jlong* dest, jlong cmp_val, cmpxchg_memory_order unused) { + unsigned long old; + + __asm__ __volatile__ ( + " CSG %[old],%[upd],%[mem] \n\t" // Try to xchg upd with mem. + // outputs + : [old] "=&d" (old) // Write-only, prev value irrelevant. + , [mem] "+Q" (*dest) // Read/write, memory to be updated atomically. + // inputs + : [upd] "d" (xchg_val) + , "0" (cmp_val) // Read-only, initial value for [old] (operand #0). + // clobbered + : "cc" + ); + + return (jlong)old; +} + +void* Atomic::cmpxchg_ptr(void *xchg_val, volatile void* dest, void* cmp_val, cmpxchg_memory_order unused) { + return (void*)cmpxchg((jlong)xchg_val, (volatile jlong*)dest, (jlong)cmp_val, unused); +} + +intptr_t Atomic::cmpxchg_ptr(intptr_t xchg_val, volatile intptr_t* dest, intptr_t cmp_val, cmpxchg_memory_order unused) { + return (intptr_t)cmpxchg((jlong)xchg_val, (volatile jlong*)dest, (jlong)cmp_val, unused); +} + +inline jlong Atomic::load(volatile jlong* src) { return *src; } + +#endif // OS_CPU_LINUX_S390_VM_ATOMIC_LINUX_S390_INLINE_HPP diff --git a/hotspot/src/os_cpu/linux_s390/vm/bytes_linux_s390.inline.hpp b/hotspot/src/os_cpu/linux_s390/vm/bytes_linux_s390.inline.hpp new file mode 100644 index 00000000000..3471b647871 --- /dev/null +++ b/hotspot/src/os_cpu/linux_s390/vm/bytes_linux_s390.inline.hpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_S390_VM_BYTES_LINUX_S390_INLINE_HPP +#define OS_CPU_LINUX_S390_VM_BYTES_LINUX_S390_INLINE_HPP + +// Efficient swapping of data bytes from Java byte +// ordering to native byte ordering and vice versa. + +#include + +inline u2 swap_u2(u2 x) { + return bswap_16(x); +} + +inline u4 swap_u4(u4 x) { + return bswap_32(x); +} + +inline u8 swap_u8(u8 x) { + return bswap_64(x); +} + +#endif // OS_CPU_LINUX_S390_VM_BYTES_LINUX_S390_INLINE_HPP diff --git a/hotspot/src/os_cpu/linux_s390/vm/globals_linux_s390.hpp b/hotspot/src/os_cpu/linux_s390/vm/globals_linux_s390.hpp new file mode 100644 index 00000000000..39050ac6fb6 --- /dev/null +++ b/hotspot/src/os_cpu/linux_s390/vm/globals_linux_s390.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// Sorted according to linux_x86. + +#ifndef OS_CPU_LINUX_S390_VM_GLOBALS_LINUX_S390_HPP +#define OS_CPU_LINUX_S390_VM_GLOBALS_LINUX_S390_HPP + +// Sets the default values for platform dependent flags used by the +// runtime system (see globals.hpp). + +define_pd_global(bool, DontYieldALot, false); +define_pd_global(intx, ThreadStackSize, 1024); // 0 => Use system default. +define_pd_global(intx, VMThreadStackSize, 1024); +// Some jck tests in lang/fp/fpl038 run out of compile thread stack. +// Observed in pure dbg build, running with -Xcomp -Xbatch on z990. +// We also increase the stack size for opt builds to be on the safe side. +#ifdef ASSERT +define_pd_global(intx, CompilerThreadStackSize, 4096); +#else +define_pd_global(intx, CompilerThreadStackSize, 2048); +#endif + +// Allow extra space in DEBUG builds for asserts. +define_pd_global(size_t, JVMInvokeMethodSlack, 8192); + +// Only used on 64 bit platforms. +define_pd_global(size_t, HeapBaseMinAddress, 2*G); + +#endif // OS_CPU_LINUX_S390_VM_GLOBALS_LINUX_S390_HPP + + diff --git a/hotspot/src/os_cpu/linux_s390/vm/orderAccess_linux_s390.inline.hpp b/hotspot/src/os_cpu/linux_s390/vm/orderAccess_linux_s390.inline.hpp new file mode 100644 index 00000000000..f69e0610f18 --- /dev/null +++ b/hotspot/src/os_cpu/linux_s390/vm/orderAccess_linux_s390.inline.hpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_S390_VM_ORDERACCESS_LINUX_S390_INLINE_HPP +#define OS_CPU_LINUX_S390_VM_ORDERACCESS_LINUX_S390_INLINE_HPP + +#include "runtime/orderAccess.hpp" +#include "vm_version_s390.hpp" + +// Implementation of class OrderAccess. + +// +// machine barrier instructions: +// +// - z_sync two-way memory barrier, aka fence +// +// semantic barrier instructions: +// (as defined in orderAccess.hpp) +// +// - z_release orders Store|Store, (maps to compiler barrier) +// Load|Store +// - z_acquire orders Load|Store, (maps to compiler barrier) +// Load|Load +// - z_fence orders Store|Store, (maps to z_sync) +// Load|Store, +// Load|Load, +// Store|Load +// + + +// Only load-after-store-order is not guaranteed on z/Architecture, i.e. only 'fence' +// is needed. + +// A compiler barrier, forcing the C++ compiler to invalidate all memory assumptions. +#define inlasm_compiler_barrier() __asm__ volatile ("" : : : "memory"); +// "bcr 15, 0" is used as two way memory barrier. +#define inlasm_zarch_sync() __asm__ __volatile__ ("bcr 15, 0" : : : "memory"); + +// Release and acquire are empty on z/Architecture, but potential +// optimizations of gcc must be forbidden by OrderAccess::release and +// OrderAccess::acquire. +#define inlasm_zarch_release() inlasm_compiler_barrier() +#define inlasm_zarch_acquire() inlasm_compiler_barrier() +#define inlasm_zarch_fence() inlasm_zarch_sync() + +inline void OrderAccess::loadload() { inlasm_compiler_barrier(); } +inline void OrderAccess::storestore() { inlasm_compiler_barrier(); } +inline void OrderAccess::loadstore() { inlasm_compiler_barrier(); } +inline void OrderAccess::storeload() { inlasm_zarch_sync(); } + +inline void OrderAccess::acquire() { inlasm_zarch_acquire(); } +inline void OrderAccess::release() { inlasm_zarch_release(); } +inline void OrderAccess::fence() { inlasm_zarch_sync(); } + +template<> inline jbyte OrderAccess::specialized_load_acquire (volatile jbyte* p) { register jbyte t = *p; inlasm_zarch_acquire(); return t; } +template<> inline jshort OrderAccess::specialized_load_acquire(volatile jshort* p) { register jshort t = *p; inlasm_zarch_acquire(); return t; } +template<> inline jint OrderAccess::specialized_load_acquire (volatile jint* p) { register jint t = *p; inlasm_zarch_acquire(); return t; } +template<> inline jlong OrderAccess::specialized_load_acquire (volatile jlong* p) { register jlong t = *p; inlasm_zarch_acquire(); return t; } + +#undef inlasm_compiler_barrier +#undef inlasm_zarch_sync +#undef inlasm_zarch_release +#undef inlasm_zarch_acquire +#undef inlasm_zarch_fence + +#define VM_HAS_GENERALIZED_ORDER_ACCESS 1 + +#endif // OS_CPU_LINUX_S390_VM_ORDERACCESS_LINUX_S390_INLINE_HPP + + diff --git a/hotspot/src/os_cpu/linux_s390/vm/os_linux_s390.cpp b/hotspot/src/os_cpu/linux_s390/vm/os_linux_s390.cpp new file mode 100644 index 00000000000..c906109ea9e --- /dev/null +++ b/hotspot/src/os_cpu/linux_s390/vm/os_linux_s390.cpp @@ -0,0 +1,640 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// This file is organized as os_linux_x86.cpp. + +// no precompiled headers +#include "asm/assembler.inline.hpp" +#include "classfile/classLoader.hpp" +#include "classfile/systemDictionary.hpp" +#include "classfile/vmSymbols.hpp" +#include "code/icBuffer.hpp" +#include "code/nativeInst.hpp" +#include "code/vtableStubs.hpp" +#include "compiler/disassembler.hpp" +#include "interpreter/interpreter.hpp" +#include "jvm_linux.h" +#include "memory/allocation.inline.hpp" +#include "nativeInst_s390.hpp" +#include "os_share_linux.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm.h" +#include "prims/jvm_misc.hpp" +#include "runtime/arguments.hpp" +#include "runtime/extendedPC.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/java.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/osThread.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "runtime/timer.hpp" +#include "utilities/events.hpp" +#include "utilities/vmError.hpp" + +// put OS-includes here +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +address os::current_stack_pointer() { + intptr_t* csp; + + // Inline assembly for `z_lgr regno(csp), Z_SP' (Z_SP = Z_R15): + __asm__ __volatile__ ("lgr %0, 15":"=r"(csp):); + + assert(((uint64_t)csp & (frame::alignment_in_bytes-1)) == 0, "SP must be aligned"); + return (address) csp; +} + +char* os::non_memory_address_word() { + // Must never look like an address returned by reserve_memory, + // even in its subfields (as defined by the CPU immediate fields, + // if the CPU splits constants across multiple instructions). + return (char*) -1; +} + +// OS specific thread initialization. +void os::initialize_thread(Thread* thread) { } + +// Frame information (pc, sp, fp) retrieved via ucontext +// always looks like a C-frame according to the frame +// conventions in frame_s390.hpp. +address os::Linux::ucontext_get_pc(const ucontext_t * uc) { + return (address)uc->uc_mcontext.psw.addr; +} + +void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) { + uc->uc_mcontext.psw.addr = (unsigned long)pc; +} + +intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) { + return (intptr_t*)uc->uc_mcontext.gregs[15/*REG_SP*/]; +} + +intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) { + return NULL; +} + +ExtendedPC os::fetch_frame_from_context(const void* ucVoid, + intptr_t** ret_sp, intptr_t** ret_fp) { + + ExtendedPC epc; + const ucontext_t* uc = (const ucontext_t*)ucVoid; + + if (uc != NULL) { + epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); + if (ret_sp) { *ret_sp = os::Linux::ucontext_get_sp(uc); } + if (ret_fp) { *ret_fp = os::Linux::ucontext_get_fp(uc); } + } else { + // Construct empty ExtendedPC for return value checking. + epc = ExtendedPC(NULL); + if (ret_sp) { *ret_sp = (intptr_t *)NULL; } + if (ret_fp) { *ret_fp = (intptr_t *)NULL; } + } + + return epc; +} + +frame os::fetch_frame_from_context(const void* ucVoid) { + intptr_t* sp; + intptr_t* fp; + ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp); + return frame(sp, epc.pc()); +} + +frame os::get_sender_for_C_frame(frame* fr) { + if (*fr->sp() == 0) { + // fr is the last C frame. + return frame(); + } + + // If its not one of our frames, the return pc is saved at gpr14 + // stack slot. The call_stub stores the return_pc to the stack slot + // of gpr10. + if ((Interpreter::code() != NULL && Interpreter::contains(fr->pc())) || + (CodeCache::contains(fr->pc()) && !StubRoutines::contains(fr->pc()))) { + return frame(fr->sender_sp(), fr->sender_pc()); + } else { + if (StubRoutines::contains(fr->pc())) { + StubCodeDesc* desc = StubCodeDesc::desc_for(fr->pc()); + if (desc && !strcmp(desc->name(),"call_stub")) { + return frame(fr->sender_sp(), fr->callstub_sender_pc()); + } else { + return frame(fr->sender_sp(), fr->sender_pc()); + } + } else { + return frame(fr->sender_sp(), fr->native_sender_pc()); + } + } +} + +frame os::current_frame() { + intptr_t* csp = (intptr_t*) *((intptr_t*) os::current_stack_pointer()); + assert (csp != NULL, "sp should not be NULL"); + // Pass a dummy pc. This way we don't have to load it from the + // stack, since we don't know in which slot we can find it. + frame topframe(csp, (address)0x8); + if (os::is_first_C_frame(&topframe)) { + // Stack is not walkable. + return frame(); + } else { + frame senderFrame = os::get_sender_for_C_frame(&topframe); + assert(senderFrame.pc() != NULL, "Sender pc should not be NULL"); + // Return sender of sender of current topframe which hopefully + // both have pc != NULL. + frame tmp = os::get_sender_for_C_frame(&topframe); + return os::get_sender_for_C_frame(&tmp); + } +} + +// Utility functions + +extern "C" JNIEXPORT int +JVM_handle_linux_signal(int sig, + siginfo_t* info, + void* ucVoid, + int abort_if_unrecognized) { + ucontext_t* uc = (ucontext_t*) ucVoid; + + Thread* t = Thread::current_or_null_safe(); + + // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away + // (no destructors can be run). + os::WatcherThreadCrashProtection::check_crash_protection(sig, t); + + SignalHandlerMark shm(t); + + // Note: it's not uncommon that JNI code uses signal/sigset to install + // then restore certain signal handler (e.g. to temporarily block SIGPIPE, + // or have a SIGILL handler when detecting CPU type). When that happens, + // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To + // avoid unnecessary crash when libjsig is not preloaded, try handle signals + // that do not require siginfo/ucontext first. + + if (sig == SIGPIPE) { + if (os::Linux::chained_handler(sig, info, ucVoid)) { + return true; + } else { + if (PrintMiscellaneous && (WizardMode || Verbose)) { + warning("Ignoring SIGPIPE - see bug 4229104"); + } + return true; + } + } + + JavaThread* thread = NULL; + VMThread* vmthread = NULL; + if (os::Linux::signal_handlers_are_installed) { + if (t != NULL) { + if(t->is_Java_thread()) { + thread = (JavaThread*)t; + } else if(t->is_VM_thread()) { + vmthread = (VMThread *)t; + } + } + } + + // Moved SafeFetch32 handling outside thread!=NULL conditional block to make + // it work if no associated JavaThread object exists. + if (uc) { + address const pc = os::Linux::ucontext_get_pc(uc); + if (pc && StubRoutines::is_safefetch_fault(pc)) { + os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc)); + return true; + } + } + + // Decide if this trap can be handled by a stub. + address stub = NULL; + address pc = NULL; // Pc as retrieved from PSW. Usually points past failing instruction. + address trap_pc = NULL; // Pc of the instruction causing the trap. + + //%note os_trap_1 + if (info != NULL && uc != NULL && thread != NULL) { + pc = os::Linux::ucontext_get_pc(uc); + if (TraceTraps) { + tty->print_cr(" pc at " INTPTR_FORMAT, p2i(pc)); + } + if ((unsigned long)(pc - (address)info->si_addr) <= (unsigned long)Assembler::instr_maxlen() ) { + trap_pc = (address)info->si_addr; + if (TraceTraps) { + tty->print_cr("trap_pc at " INTPTR_FORMAT, p2i(trap_pc)); + } + } + + // Handle ALL stack overflow variations here + if (sig == SIGSEGV) { + address addr = (address)info->si_addr; // Address causing SIGSEGV, usually mem ref target. + + // Check if fault address is within thread stack. + if (thread->on_local_stack(addr)) { + // stack overflow + if (thread->in_stack_yellow_reserved_zone(addr)) { + thread->disable_stack_yellow_reserved_zone(); + if (thread->thread_state() == _thread_in_Java) { + // Throw a stack overflow exception. + // Guard pages will be reenabled while unwinding the stack. + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); + } else { + // Thread was in the vm or native code. Return and try to finish. + return 1; + } + } else if (thread->in_stack_red_zone(addr)) { + // Fatal red zone violation. Disable the guard pages and fall through + // to handle_unexpected_exception way down below. + thread->disable_stack_red_zone(); + tty->print_raw_cr("An irrecoverable stack overflow has occurred."); + + // This is a likely cause, but hard to verify. Let's just print + // it as a hint. + tty->print_raw_cr("Please check if any of your loaded .so files has " + "enabled executable stack (see man page execstack(8))"); + } else { + // Accessing stack address below sp may cause SEGV if current + // thread has MAP_GROWSDOWN stack. This should only happen when + // current thread was created by user code with MAP_GROWSDOWN flag + // and then attached to VM. See notes in os_linux.cpp. + if (thread->osthread()->expanding_stack() == 0) { + thread->osthread()->set_expanding_stack(); + if (os::Linux::manually_expand_stack(thread, addr)) { + thread->osthread()->clear_expanding_stack(); + return 1; + } + thread->osthread()->clear_expanding_stack(); + } else { + fatal("recursive segv. expanding stack."); + } + } + } + } + + if (thread->thread_state() == _thread_in_Java) { + // Java thread running in Java code => find exception handler if any + // a fault inside compiled code, the interpreter, or a stub + + // Handle signal from NativeJump::patch_verified_entry(). + if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { + if (TraceTraps) { + tty->print_cr("trap: zombie_not_entrant (SIGILL)"); + } + stub = SharedRuntime::get_handle_wrong_method_stub(); + } + + else if (sig == SIGSEGV && + os::is_poll_address((address)info->si_addr)) { + if (TraceTraps) { + tty->print_cr("trap: safepoint_poll at " INTPTR_FORMAT " (SIGSEGV)", p2i(pc)); + } + stub = SharedRuntime::get_poll_stub(pc); + + // Info->si_addr only points to the page base address, so we + // must extract the real si_addr from the instruction and the + // ucontext. + assert(((NativeInstruction*)pc)->is_safepoint_poll(), "must be safepoint poll"); + const address real_si_addr = ((NativeInstruction*)pc)->get_poll_address(uc); + } + + // SIGTRAP-based implicit null check in compiled code. + else if ((sig == SIGFPE) && + TrapBasedNullChecks && + (trap_pc != NULL) && + Assembler::is_sigtrap_zero_check(trap_pc)) { + if (TraceTraps) { + tty->print_cr("trap: NULL_CHECK at " INTPTR_FORMAT " (SIGFPE)", p2i(trap_pc)); + } + stub = SharedRuntime::continuation_for_implicit_exception(thread, trap_pc, SharedRuntime::IMPLICIT_NULL); + } + + else if (sig == SIGSEGV && ImplicitNullChecks && + CodeCache::contains((void*) pc) && + !MacroAssembler::needs_explicit_null_check((intptr_t) info->si_addr)) { + if (TraceTraps) { + tty->print_cr("trap: null_check at " INTPTR_FORMAT " (SIGSEGV)", p2i(pc)); + } + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); + } + + // SIGTRAP-based implicit range check in compiled code. + else if (sig == SIGFPE && TrapBasedRangeChecks && + (trap_pc != NULL) && + Assembler::is_sigtrap_range_check(trap_pc)) { + if (TraceTraps) { + tty->print_cr("trap: RANGE_CHECK at " INTPTR_FORMAT " (SIGFPE)", p2i(trap_pc)); + } + stub = SharedRuntime::continuation_for_implicit_exception(thread, trap_pc, SharedRuntime::IMPLICIT_NULL); + } + + else if (sig == SIGFPE && info->si_code == FPE_INTDIV) { + stub = SharedRuntime::continuation_for_implicit_exception(thread, trap_pc, SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO); + } + + else if (sig == SIGBUS) { + // BugId 4454115: A read from a MappedByteBuffer can fault here if the + // underlying file has been truncated. Do not crash the VM in such a case. + CodeBlob* cb = CodeCache::find_blob_unsafe(pc); + nmethod* nm = (cb != NULL && cb->is_nmethod()) ? (nmethod*)cb : NULL; + if (nm != NULL && nm->has_unsafe_access()) { + // We don't really need a stub here! Just set the pending exeption and + // continue at the next instruction after the faulting read. Returning + // garbage from this read is ok. + thread->set_pending_unsafe_access_error(); + uc->uc_mcontext.psw.addr = ((unsigned long)pc) + Assembler::instr_len(pc); + return true; + } + } + } + + else { // thread->thread_state() != _thread_in_Java + if (sig == SIGILL && VM_Version::is_determine_features_test_running()) { + // SIGILL must be caused by VM_Version::determine_features(). + //*(int *) (pc-6)=0; // Patch instruction to 0 to indicate that it causes a SIGILL. + // Flushing of icache is not necessary. + stub = pc; // Continue with next instruction. + } else if (thread->thread_state() == _thread_in_vm && + sig == SIGBUS && thread->doing_unsafe_access()) { + // We don't really need a stub here! Just set the pending exeption and + // continue at the next instruction after the faulting read. Returning + // garbage from this read is ok. + thread->set_pending_unsafe_access_error(); + os::Linux::ucontext_set_pc(uc, pc + Assembler::instr_len(pc)); + return true; + } + } + + // Check to see if we caught the safepoint code in the + // process of write protecting the memory serialization page. + // It write enables the page immediately after protecting it + // so we can just return to retry the write. + // Info->si_addr need not be the exact address, it is only + // guaranteed to be on the same page as the address that caused + // the SIGSEGV. + if ((sig == SIGSEGV) && + (os::get_memory_serialize_page() == + (address)((uintptr_t)info->si_addr & ~(os::vm_page_size()-1)))) { + return true; + } + } + + if (stub != NULL) { + // Save all thread context in case we need to restore it. + if (thread != NULL) thread->set_saved_exception_pc(pc); + os::Linux::ucontext_set_pc(uc, stub); + return true; + } + + // signal-chaining + if (os::Linux::chained_handler(sig, info, ucVoid)) { + return true; + } + + if (!abort_if_unrecognized) { + // caller wants another chance, so give it to him + return false; + } + + if (pc == NULL && uc != NULL) { + pc = os::Linux::ucontext_get_pc(uc); + } + + // unmask current signal + sigset_t newset; + sigemptyset(&newset); + sigaddset(&newset, sig); + sigprocmask(SIG_UNBLOCK, &newset, NULL); + + VMError::report_and_die(t, sig, pc, info, ucVoid); + + ShouldNotReachHere(); + return false; +} + +void os::Linux::init_thread_fpu_state(void) { + // Nothing to do on z/Architecture. +} + +int os::Linux::get_fpu_control_word(void) { + // Nothing to do on z/Architecture. + return 0; +} + +void os::Linux::set_fpu_control_word(int fpu_control) { + // Nothing to do on z/Architecture. +} + +//////////////////////////////////////////////////////////////////////////////// +// thread stack + +size_t os::Posix::_compiler_thread_min_stack_allowed = 128 * K; +size_t os::Posix::_java_thread_min_stack_allowed = 128 * K; +size_t os::Posix::_vm_internal_thread_min_stack_allowed = 128 * K; + +// return default stack size for thr_type +size_t os::Posix::default_stack_size(os::ThreadType thr_type) { + // default stack size (compiler thread needs larger stack) + size_t s = (thr_type == os::compiler_thread ? 4 * M : 1024 * K); + return s; +} + +size_t os::Linux::default_guard_size(os::ThreadType thr_type) { + // z/Architecture: put 2 guard pages right in the middle of thread stack. This value + // should be consistent with the value used by register stack handling code. + return 2 * page_size(); +} + +// Java thread: +// +// Low memory addresses +// +------------------------+ +// | |\ +// | glibc guard page | - Right in the middle of stack, 2 pages +// | |/ +// P1 +------------------------+ Thread::stack_base() - Thread::stack_size() +// | |\ +// | HotSpot Guard Pages | - red and yellow pages +// | |/ +// +------------------------+ JavaThread::stack_yellow_zone_base() +// | |\ +// | Normal Stack | - +// | |/ +// P2 +------------------------+ Thread::stack_base() +// +// Non-Java thread: +// +// Low memory addresses +// +------------------------+ +// | |\ +// | glibc guard page | - Right in the middle of stack, 2 pages +// | |/ +// P1 +------------------------+ Thread::stack_base() - Thread::stack_size() +// | |\ +// | Normal Stack | - +// | |/ +// P2 +------------------------+ Thread::stack_base() +// +// ** P2 is the address returned from pthread_attr_getstackaddr(), P2 - P1 +// is the stack size returned by pthread_attr_getstacksize(). + + +static void current_stack_region(address * bottom, size_t * size) { + if (os::Linux::is_initial_thread()) { + // Initial thread needs special handling because pthread_getattr_np() + // may return bogus value. + *bottom = os::Linux::initial_thread_stack_bottom(); + *size = os::Linux::initial_thread_stack_size(); + } else { + pthread_attr_t attr; + + int rslt = pthread_getattr_np(pthread_self(), &attr); + + // JVM needs to know exact stack location, abort if it fails + if (rslt != 0) { + if (rslt == ENOMEM) { + vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np"); + } else { + fatal("pthread_getattr_np failed with errno = %d", rslt); + } + } + + if (pthread_attr_getstack(&attr, (void **)bottom, size) != 0) { + fatal("Can not locate current stack attributes!"); + } + + pthread_attr_destroy(&attr); + + } + assert(os::current_stack_pointer() >= *bottom && + os::current_stack_pointer() < *bottom + *size, "just checking"); +} + +address os::current_stack_base() { + address bottom; + size_t size; + current_stack_region(&bottom, &size); + return (bottom + size); +} + +size_t os::current_stack_size() { + // stack size includes normal stack and HotSpot guard pages + address bottom; + size_t size; + current_stack_region(&bottom, &size); + return size; +} + +///////////////////////////////////////////////////////////////////////////// +// helper functions for fatal error handler + +void os::print_context(outputStream *st, const void *context) { + if (context == NULL) return; + + const ucontext_t* uc = (const ucontext_t*)context; + + st->print_cr("Processor state:"); + st->print_cr("----------------"); + st->print_cr(" ip = " INTPTR_FORMAT " ", uc->uc_mcontext.psw.addr); + st->print_cr(" proc mask = " INTPTR_FORMAT " ", uc->uc_mcontext.psw.mask); + st->print_cr(" fpc reg = 0x%8.8x " , uc->uc_mcontext.fpregs.fpc); + st->cr(); + + st->print_cr("General Purpose Registers:"); + st->print_cr("--------------------------"); + for( int i = 0; i < 16; i+=2 ) { + st->print(" r%-2d = " INTPTR_FORMAT " " , i, uc->uc_mcontext.gregs[i]); + st->print(" r%-2d = " INTPTR_FORMAT " |", i+1, uc->uc_mcontext.gregs[i+1]); + st->print(" r%-2d = %23.1ld " , i, uc->uc_mcontext.gregs[i]); + st->print(" r%-2d = %23.1ld " , i+1, uc->uc_mcontext.gregs[i+1]); + st->cr(); + } + st->cr(); + + st->print_cr("Access Registers:"); + st->print_cr("-----------------"); + for( int i = 0; i < 16; i+=2 ) { + st->print(" ar%-2d = 0x%8.8x ", i, uc->uc_mcontext.aregs[i]); + st->print(" ar%-2d = 0x%8.8x ", i+1, uc->uc_mcontext.aregs[i+1]); + st->cr(); + } + st->cr(); + + st->print_cr("Float Registers:"); + st->print_cr("----------------"); + for (int i = 0; i < 16; i += 2) { + st->print(" fr%-2d = " INTPTR_FORMAT " " , i, (int64_t)(uc->uc_mcontext.fpregs.fprs[i].d)); + st->print(" fr%-2d = " INTPTR_FORMAT " |", i+1, (int64_t)(uc->uc_mcontext.fpregs.fprs[i+1].d)); + st->print(" fr%-2d = %23.15e " , i, (uc->uc_mcontext.fpregs.fprs[i].d)); + st->print(" fr%-2d = %23.15e " , i+1, (uc->uc_mcontext.fpregs.fprs[i+1].d)); + st->cr(); + } + st->cr(); + st->cr(); + + intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); + st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp)); + print_hex_dump(st, (address)sp, (address)(sp + 128), sizeof(intptr_t)); + st->cr(); + + // Note: it may be unsafe to inspect memory near pc. For example, pc may + // point to garbage if entry point in an nmethod is corrupted. Leave + // this at the end, and hope for the best. + address pc = os::Linux::ucontext_get_pc(uc); + if (Verbose) { st->print_cr("pc at " PTR_FORMAT, p2i(pc)); } + st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc)); + print_hex_dump(st, pc-64, pc+64, /*intrsize=*/4); + st->cr(); +} + +void os::print_register_info(outputStream *st, const void *context) { + st->print("Not ported\n"); +} + +#ifndef PRODUCT +void os::verify_stack_alignment() { +} +#endif + +int os::extra_bang_size_in_bytes() { + // z/Architecture does not require the additional stack bang. + return 0; +} diff --git a/hotspot/src/os_cpu/linux_s390/vm/os_linux_s390.hpp b/hotspot/src/os_cpu/linux_s390/vm/os_linux_s390.hpp new file mode 100644 index 00000000000..5dbb610dd16 --- /dev/null +++ b/hotspot/src/os_cpu/linux_s390/vm/os_linux_s390.hpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_S390_VM_OS_LINUX_S390_HPP +#define OS_CPU_LINUX_S390_VM_OS_LINUX_S390_HPP + + static void setup_fpu() {} + + // Used to register dynamic code cache area with the OS. + static bool register_code_area(char *low, char *high) { return true; } + +#endif // OS_CPU_LINUX_S390_VM_OS_LINUX_S390_HPP + diff --git a/hotspot/src/os_cpu/linux_s390/vm/prefetch_linux_s390.inline.hpp b/hotspot/src/os_cpu/linux_s390/vm/prefetch_linux_s390.inline.hpp new file mode 100644 index 00000000000..0009e059c40 --- /dev/null +++ b/hotspot/src/os_cpu/linux_s390/vm/prefetch_linux_s390.inline.hpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_S390_VM_PREFETCH_LINUX_S390_INLINE_HPP +#define OS_CPU_LINUX_S390_VM_PREFETCH_LINUX_S390_INLINE_HPP + +#include "runtime/prefetch.hpp" + +inline void Prefetch::read(void* loc, intx interval) { + // No prefetch instructions on z/Architecture -> implement trivially. +} + +inline void Prefetch::write(void* loc, intx interval) { + // No prefetch instructions on z/Architecture -> implement trivially. +} + +#endif // OS_CPU_LINUX_S390_VM_PREFETCH_LINUX_S390_INLINE_HPP diff --git a/hotspot/src/os_cpu/linux_s390/vm/thread_linux_s390.cpp b/hotspot/src/os_cpu/linux_s390/vm/thread_linux_s390.cpp new file mode 100644 index 00000000000..8d66d94eef7 --- /dev/null +++ b/hotspot/src/os_cpu/linux_s390/vm/thread_linux_s390.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/frame.hpp" +#include "runtime/thread.hpp" + +// Forte Analyzer AsyncGetCallTrace profiling support is not implemented on Linux/S390x. +bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, bool isInJava) { + Unimplemented(); + return false; +} + +void JavaThread::cache_global_variables() { } diff --git a/hotspot/src/os_cpu/linux_s390/vm/thread_linux_s390.hpp b/hotspot/src/os_cpu/linux_s390/vm/thread_linux_s390.hpp new file mode 100644 index 00000000000..6268700c277 --- /dev/null +++ b/hotspot/src/os_cpu/linux_s390/vm/thread_linux_s390.hpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_S390_VM_THREAD_LINUX_S390_HPP +#define OS_CPU_LINUX_S390_VM_THREAD_LINUX_S390_HPP + + private: + + void pd_initialize() { + _anchor.clear(); + _last_interpreter_fp = NULL; + } + + // The `last' frame is the youngest Java frame on the thread's stack. + frame pd_last_frame() { + assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); + + intptr_t* sp = last_Java_sp(); + address pc = _anchor.last_Java_pc(); + + // Last_Java_pc ist not set if we come here from compiled code. + if (pc == NULL) { + pc = (address) *(sp + 14); + } + + return frame(sp, pc); + } + + public: + void set_base_of_stack_pointer(intptr_t* base_sp) {} + intptr_t* base_of_stack_pointer() { return NULL; } + void record_base_of_stack_pointer() {} + + // These routines are only used on cpu architectures that + // have separate register stacks (Itanium). + static bool register_stack_overflow() { return false; } + static void enable_register_stack_guard() {} + static void disable_register_stack_guard() {} + + bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, bool isInJava); + + protected: + + // -Xprof support + // + // In order to find the last Java fp from an async profile + // tick, we store the current interpreter fp in the thread. + // This value is only valid while we are in the C++ interpreter + // and profiling. + intptr_t *_last_interpreter_fp; + + public: + + static ByteSize last_interpreter_fp_offset() { + return byte_offset_of(JavaThread, _last_interpreter_fp); + } + + intptr_t* last_interpreter_fp() { return _last_interpreter_fp; } + +#endif // OS_CPU_LINUX_S390_VM_THREAD_LINUX_S390_HPP diff --git a/hotspot/src/os_cpu/linux_s390/vm/vmStructs_linux_s390.hpp b/hotspot/src/os_cpu/linux_s390/vm/vmStructs_linux_s390.hpp new file mode 100644 index 00000000000..5439fc5a8e4 --- /dev/null +++ b/hotspot/src/os_cpu/linux_s390/vm/vmStructs_linux_s390.hpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016 SAP SE. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_S390_VM_VMSTRUCTS_LINUX_S390_HPP +#define OS_CPU_LINUX_S390_VM_VMSTRUCTS_LINUX_S390_HPP + +// These are the OS and CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + \ + /******************************/ \ + /* Threads (NOTE: incomplete) */ \ + /******************************/ \ + nonstatic_field(OSThread, _thread_id, pid_t) \ + nonstatic_field(OSThread, _pthread_id, pthread_t) + + +#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ + \ + /**********************/ \ + /* Posix Thread IDs */ \ + /**********************/ \ + \ + declare_integer_type(pid_t) \ + declare_unsigned_integer_type(pthread_t) + +#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#endif // OS_CPU_LINUX_S390_VM_VMSTRUCTS_LINUX_S390_HPP + diff --git a/hotspot/src/share/tools/hsdis/hsdis.c b/hotspot/src/share/tools/hsdis/hsdis.c index 8706163981c..ca2d5cc818b 100644 --- a/hotspot/src/share/tools/hsdis/hsdis.c +++ b/hotspot/src/share/tools/hsdis/hsdis.c @@ -467,6 +467,9 @@ static const char* native_arch_name() { #endif #ifdef LIBARCH_aarch64 res = "aarch64"; +#endif +#ifdef LIBARCH_s390x + res = "s390:64-bit"; #endif if (res == NULL) res = "architecture not set in Makefile!"; diff --git a/hotspot/src/share/vm/adlc/formssel.cpp b/hotspot/src/share/vm/adlc/formssel.cpp index 5f748379655..d9e8726bb58 100644 --- a/hotspot/src/share/vm/adlc/formssel.cpp +++ b/hotspot/src/share/vm/adlc/formssel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -722,6 +722,11 @@ int InstructForm::memory_operand(FormDict &globals) const { // // unique def, some uses // // must return bottom unless all uses match def // unique = NULL; +#ifdef S390 + // This case is important for move instructions on s390x. + // On other platforms (e.g. x86), all uses always match the def. + unique = NULL; +#endif } } else if( DEF_of_memory > 0 ) { // multiple defs, don't care about uses @@ -771,19 +776,21 @@ int InstructForm::memory_operand(FormDict &globals) const { // This instruction captures the machine-independent bottom_type // Expected use is for pointer vs oop determination for LoadP bool InstructForm::captures_bottom_type(FormDict &globals) const { - if( _matrule && _matrule->_rChild && - (!strcmp(_matrule->_rChild->_opType,"CastPP") || // new result type - !strcmp(_matrule->_rChild->_opType,"CastX2P") || // new result type - !strcmp(_matrule->_rChild->_opType,"DecodeN") || - !strcmp(_matrule->_rChild->_opType,"EncodeP") || - !strcmp(_matrule->_rChild->_opType,"DecodeNKlass") || - !strcmp(_matrule->_rChild->_opType,"EncodePKlass") || - !strcmp(_matrule->_rChild->_opType,"LoadN") || - !strcmp(_matrule->_rChild->_opType,"LoadNKlass") || - !strcmp(_matrule->_rChild->_opType,"CreateEx") || // type of exception - !strcmp(_matrule->_rChild->_opType,"CheckCastPP") || - !strcmp(_matrule->_rChild->_opType,"GetAndSetP") || - !strcmp(_matrule->_rChild->_opType,"GetAndSetN")) ) return true; + if (_matrule && _matrule->_rChild && + (!strcmp(_matrule->_rChild->_opType,"CastPP") || // new result type + !strcmp(_matrule->_rChild->_opType,"CastX2P") || // new result type + !strcmp(_matrule->_rChild->_opType,"DecodeN") || + !strcmp(_matrule->_rChild->_opType,"EncodeP") || + !strcmp(_matrule->_rChild->_opType,"DecodeNKlass") || + !strcmp(_matrule->_rChild->_opType,"EncodePKlass") || + !strcmp(_matrule->_rChild->_opType,"LoadN") || + !strcmp(_matrule->_rChild->_opType,"LoadNKlass") || + !strcmp(_matrule->_rChild->_opType,"CreateEx") || // type of exception + !strcmp(_matrule->_rChild->_opType,"CheckCastPP") || + !strcmp(_matrule->_rChild->_opType,"GetAndSetP") || + !strcmp(_matrule->_rChild->_opType,"GetAndSetN") || + !strcmp(_matrule->_rChild->_opType,"CompareAndExchangeP") || + !strcmp(_matrule->_rChild->_opType,"CompareAndExchangeN"))) return true; else if ( is_ideal_load() == Form::idealP ) return true; else if ( is_ideal_store() != Form::none ) return true; diff --git a/hotspot/src/share/vm/adlc/main.cpp b/hotspot/src/share/vm/adlc/main.cpp index 7453d38d61b..9c5bf021837 100644 --- a/hotspot/src/share/vm/adlc/main.cpp +++ b/hotspot/src/share/vm/adlc/main.cpp @@ -259,6 +259,7 @@ int main(int argc, char *argv[]) AD.addInclude(AD._DFA_file, "opto/cfgnode.hpp"); // Use PROB_MAX in predicate. AD.addInclude(AD._DFA_file, "opto/intrinsicnode.hpp"); AD.addInclude(AD._DFA_file, "opto/matcher.hpp"); + AD.addInclude(AD._DFA_file, "opto/narrowptrnode.hpp"); AD.addInclude(AD._DFA_file, "opto/opcodes.hpp"); AD.addInclude(AD._DFA_file, "opto/convertnode.hpp"); // Make sure each .cpp file starts with include lines: diff --git a/hotspot/src/share/vm/adlc/output_c.cpp b/hotspot/src/share/vm/adlc/output_c.cpp index cc98ef59af4..b476e84822a 100644 --- a/hotspot/src/share/vm/adlc/output_c.cpp +++ b/hotspot/src/share/vm/adlc/output_c.cpp @@ -1644,6 +1644,15 @@ void ArchDesc::defineExpand(FILE *fp, InstructForm *node) { } } // done iterating over a new instruction's operands + // Fix number of operands, as we do not generate redundant ones. + // The matcher generates some redundant operands, which are removed + // in the expand function (of the node we generate here). We don't + // generate the redundant operands here, so set the correct _num_opnds. + if (expand_instruction->num_opnds() != expand_instruction->num_unique_opnds()) { + fprintf(fp, " n%d->_num_opnds = %d; // Only unique opnds generated.\n", + cnt, expand_instruction->num_unique_opnds()); + } + // Invoke Expand() for the newly created instruction. fprintf(fp," result = n%d->Expand( state, proj_list, mem );\n", cnt); assert( !new_inst->expands(), "Do not have complete support for recursive expansion"); @@ -1722,27 +1731,30 @@ void ArchDesc::defineExpand(FILE *fp, InstructForm *node) { if( !node->expands() && node->_matrule != NULL ) { // Remove duplicated operands and inputs which use the same name. - // Seach through match operands for the same name usage. + // Search through match operands for the same name usage. + // The matcher generates these non-unique operands. If the node + // was constructed by an expand rule, there are no unique operands. uint cur_num_opnds = node->num_opnds(); - if( cur_num_opnds > 1 && cur_num_opnds != node->num_unique_opnds() ) { + if (cur_num_opnds > 1 && cur_num_opnds != node->num_unique_opnds()) { Component *comp = NULL; + fprintf(fp, " // Remove duplicated operands and inputs which use the same name.\n"); + fprintf(fp, " if (num_opnds() == %d) {\n", cur_num_opnds); // Build mapping from num_edges to local variables - fprintf(fp," unsigned num0 = 0;\n"); - for( i = 1; i < cur_num_opnds; i++ ) { - fprintf(fp," unsigned num%d = opnd_array(%d)->num_edges();",i,i); + fprintf(fp," unsigned num0 = 0;\n"); + for (i = 1; i < cur_num_opnds; i++) { + fprintf(fp," unsigned num%d = opnd_array(%d)->num_edges();", i, i); fprintf(fp, " \t// %s\n", node->opnd_ident(i)); } // Build a mapping from operand index to input edges - fprintf(fp," unsigned idx0 = oper_input_base();\n"); - for( i = 0; i < cur_num_opnds; i++ ) { - fprintf(fp," unsigned idx%d = idx%d + num%d;\n", - i+1,i,i); + fprintf(fp," unsigned idx0 = oper_input_base();\n"); + for (i = 0; i < cur_num_opnds; i++) { + fprintf(fp," unsigned idx%d = idx%d + num%d;\n", i+1, i, i); } uint new_num_opnds = 1; node->_components.reset(); // Skip first unique operands. - for( i = 1; i < cur_num_opnds; i++ ) { + for (i = 1; i < cur_num_opnds; i++) { comp = node->_components.iter(); if (i != node->unique_opnds_idx(i)) { break; @@ -1750,28 +1762,32 @@ void ArchDesc::defineExpand(FILE *fp, InstructForm *node) { new_num_opnds++; } // Replace not unique operands with next unique operands. - for( ; i < cur_num_opnds; i++ ) { + for ( ; i < cur_num_opnds; i++) { comp = node->_components.iter(); uint j = node->unique_opnds_idx(i); // unique_opnds_idx(i) is unique if unique_opnds_idx(j) is not unique. - if( j != node->unique_opnds_idx(j) ) { - fprintf(fp," set_opnd_array(%d, opnd_array(%d)->clone()); // %s\n", + if (j != node->unique_opnds_idx(j)) { + fprintf(fp," set_opnd_array(%d, opnd_array(%d)->clone()); // %s\n", new_num_opnds, i, comp->_name); - // delete not unique edges here - fprintf(fp," for(unsigned i = 0; i < num%d; i++) {\n", i); - fprintf(fp," set_req(i + idx%d, _in[i + idx%d]);\n", new_num_opnds, i); - fprintf(fp," }\n"); - fprintf(fp," num%d = num%d;\n", new_num_opnds, i); - fprintf(fp," idx%d = idx%d + num%d;\n", new_num_opnds+1, new_num_opnds, new_num_opnds); + // Delete not unique edges here. + fprintf(fp," for (unsigned i = 0; i < num%d; i++) {\n", i); + fprintf(fp," set_req(i + idx%d, _in[i + idx%d]);\n", new_num_opnds, i); + fprintf(fp," }\n"); + fprintf(fp," num%d = num%d;\n", new_num_opnds, i); + fprintf(fp," idx%d = idx%d + num%d;\n", new_num_opnds+1, new_num_opnds, new_num_opnds); new_num_opnds++; } } - // delete the rest of edges - fprintf(fp," for(int i = idx%d - 1; i >= (int)idx%d; i--) {\n", cur_num_opnds, new_num_opnds); - fprintf(fp," del_req(i);\n"); - fprintf(fp," }\n"); - fprintf(fp," _num_opnds = %d;\n", new_num_opnds); + // Delete the rest of edges. + fprintf(fp," for (int i = idx%d - 1; i >= (int)idx%d; i--) {\n", cur_num_opnds, new_num_opnds); + fprintf(fp," del_req(i);\n"); + fprintf(fp," }\n"); + fprintf(fp," _num_opnds = %d;\n", new_num_opnds); assert(new_num_opnds == node->num_unique_opnds(), "what?"); + fprintf(fp, " } else {\n"); + fprintf(fp, " assert(_num_opnds == %d, \"There should be either %d or %d operands.\");\n", + new_num_opnds, new_num_opnds, cur_num_opnds); + fprintf(fp, " }\n"); } } diff --git a/hotspot/src/share/vm/asm/codeBuffer.cpp b/hotspot/src/share/vm/asm/codeBuffer.cpp index bd9d8dae97a..a7bc17a647e 100644 --- a/hotspot/src/share/vm/asm/codeBuffer.cpp +++ b/hotspot/src/share/vm/asm/codeBuffer.cpp @@ -747,6 +747,10 @@ void CodeBuffer::copy_code_to(CodeBlob* dest_blob) { CodeBuffer dest(dest_blob); assert(dest_blob->content_size() >= total_content_size(), "good sizing"); this->compute_final_layout(&dest); + + // Set beginning of constant table before relocating. + dest_blob->set_ctable_begin(dest.consts()->start()); + relocate_code_to(&dest); // transfer strings and comments from buffer to blob @@ -940,6 +944,9 @@ void CodeBuffer::expand(CodeSection* which_cs, csize_t amount) { } } + // Needs to be initialized when calling fix_relocation_after_move. + cb.blob()->set_ctable_begin(cb.consts()->start()); + // Move all the code and relocations to the new blob: relocate_code_to(&cb); diff --git a/hotspot/src/share/vm/c1/c1_Compiler.cpp b/hotspot/src/share/vm/c1/c1_Compiler.cpp index b25255bd7db..731b9efc96c 100644 --- a/hotspot/src/share/vm/c1/c1_Compiler.cpp +++ b/hotspot/src/share/vm/c1/c1_Compiler.cpp @@ -42,7 +42,7 @@ #include "runtime/sharedRuntime.hpp" -Compiler::Compiler() : AbstractCompiler(c1) { +Compiler::Compiler() : AbstractCompiler(compiler_c1) { } void Compiler::init_c1_runtime() { @@ -223,6 +223,7 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) { case vmIntrinsics::_putCharStringU: #ifdef TRACE_HAVE_INTRINSICS case vmIntrinsics::_counterTime: + case vmIntrinsics::_getBufferWriter: #if defined(_LP64) || !defined(TRACE_ID_CLASS_SHIFT) case vmIntrinsics::_getClassId: #endif diff --git a/hotspot/src/share/vm/c1/c1_LIR.cpp b/hotspot/src/share/vm/c1/c1_LIR.cpp index 8c577be0918..a8bbf933102 100644 --- a/hotspot/src/share/vm/c1/c1_LIR.cpp +++ b/hotspot/src/share/vm/c1/c1_LIR.cpp @@ -209,6 +209,17 @@ void LIR_Op2::verify() const { } if (TwoOperandLIRForm) { + +#ifdef ASSERT + bool threeOperandForm = false; +#ifdef S390 + // There are 3 operand shifts on S390 (see LIR_Assembler::shift_op()). + threeOperandForm = + code() == lir_shl || + ((code() == lir_shr || code() == lir_ushr) && (result_opr()->is_double_cpu() || in_opr1()->type() == T_OBJECT)); +#endif +#endif + switch (code()) { case lir_add: case lir_sub: @@ -222,13 +233,13 @@ void LIR_Op2::verify() const { case lir_logic_xor: case lir_shl: case lir_shr: - assert(in_opr1() == result_opr(), "opr1 and result must match"); + assert(in_opr1() == result_opr() || threeOperandForm, "opr1 and result must match"); assert(in_opr1()->is_valid() && in_opr2()->is_valid(), "must be valid"); break; // special handling for lir_ushr because of write barriers case lir_ushr: - assert(in_opr1() == result_opr() || in_opr2()->is_constant(), "opr1 and result must match or shift count is constant"); + assert(in_opr1() == result_opr() || in_opr2()->is_constant() || threeOperandForm, "opr1 and result must match or shift count is constant"); assert(in_opr1()->is_valid() && in_opr2()->is_valid(), "must be valid"); break; diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp index 65c13d5912f..dd21475d30b 100644 --- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp +++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp @@ -606,7 +606,10 @@ void LIRGenerator::arithmetic_op_fpu(Bytecodes::Code code, LIR_Opr result, LIR_O void LIRGenerator::shift_op(Bytecodes::Code code, LIR_Opr result_op, LIR_Opr value, LIR_Opr count, LIR_Opr tmp) { - if (TwoOperandLIRForm && value != result_op) { + + if (TwoOperandLIRForm && value != result_op + // Only 32bit right shifts require two operand form on S390. + S390_ONLY(&& (code == Bytecodes::_ishr || code == Bytecodes::_iushr))) { assert(count != result_op, "malformed"); __ move(value, result_op); value = result_op; @@ -3120,6 +3123,22 @@ void LIRGenerator::do_ClassIDIntrinsic(Intrinsic* x) { __ move(id, rlock_result(x)); } + +void LIRGenerator::do_getBufferWriter(Intrinsic* x) { + LabelObj* L_end = new LabelObj(); + + LIR_Address* jobj_addr = new LIR_Address(getThreadPointer(), + in_bytes(TRACE_THREAD_DATA_WRITER_OFFSET), + T_OBJECT); + LIR_Opr result = rlock_result(x); + __ move_wide(jobj_addr, result); + __ cmp(lir_cond_equal, result, LIR_OprFact::oopConst(NULL)); + __ branch(lir_cond_equal, T_OBJECT, L_end->label()); + __ move_wide(new LIR_Address(result, T_OBJECT), result); + + __ branch_destination(L_end->label()); +} + #endif @@ -3151,6 +3170,9 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) { case vmIntrinsics::_getClassId: do_ClassIDIntrinsic(x); break; + case vmIntrinsics::_getBufferWriter: + do_getBufferWriter(x); + break; case vmIntrinsics::_counterTime: do_RuntimeCall(CAST_FROM_FN_PTR(address, TRACE_TIME_METHOD), x); break; diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp index 04dae206422..3a7e26a8562 100644 --- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp +++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp @@ -28,6 +28,7 @@ #include "c1/c1_Instruction.hpp" #include "c1/c1_LIR.hpp" #include "ci/ciMethodData.hpp" +#include "utilities/macros.hpp" #include "utilities/sizes.hpp" // The classes responsible for code emission and register allocation @@ -360,7 +361,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { void add_large_constant(LIR_Opr src, int c, LIR_Opr dest); // machine preferences and characteristics - bool can_inline_as_constant(Value i) const; + bool can_inline_as_constant(Value i S390_ONLY(COMMA int bits = 20)) const; bool can_inline_as_constant(LIR_Const* c) const; bool can_store_as_constant(Value i, BasicType type) const; @@ -441,6 +442,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { #ifdef TRACE_HAVE_INTRINSICS void do_ClassIDIntrinsic(Intrinsic* x); + void do_getBufferWriter(Intrinsic* x); #endif void do_RuntimeCall(address routine, Intrinsic* x); @@ -496,6 +498,12 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { static LIR_Opr divInOpr(); static LIR_Opr divOutOpr(); static LIR_Opr remOutOpr(); +#ifdef S390 + // On S390 we can do ldiv, lrem without RT call. + static LIR_Opr ldivInOpr(); + static LIR_Opr ldivOutOpr(); + static LIR_Opr lremOutOpr(); +#endif static LIR_Opr shiftCountOpr(); LIR_Opr syncLockOpr(); LIR_Opr syncTempOpr(); @@ -621,7 +629,7 @@ class LIRItem: public CompilationResourceObj { void load_item(); void load_byte_item(); - void load_nonconstant(); + void load_nonconstant(S390_ONLY(int bits = 20)); // load any values which can't be expressed as part of a single store instruction void load_for_store(BasicType store_type); void load_item_force(LIR_Opr reg); diff --git a/hotspot/src/share/vm/c1/c1_LinearScan.cpp b/hotspot/src/share/vm/c1/c1_LinearScan.cpp index 18397d6118f..79446fc30cc 100644 --- a/hotspot/src/share/vm/c1/c1_LinearScan.cpp +++ b/hotspot/src/share/vm/c1/c1_LinearScan.cpp @@ -1077,7 +1077,7 @@ IntervalUseKind LinearScan::use_kind_of_input_operand(LIR_Op* op, LIR_Opr opr) { } -#ifdef X86 +#if defined(X86) || defined(S390) if (op->code() == lir_cmove) { // conditional moves can handle stack operands assert(op->result_opr()->is_register(), "result must always be in a register"); @@ -1088,7 +1088,7 @@ IntervalUseKind LinearScan::use_kind_of_input_operand(LIR_Op* op, LIR_Opr opr) { // this operand is allowed to be on the stack in some cases BasicType opr_type = opr->type_register(); if (opr_type == T_FLOAT || opr_type == T_DOUBLE) { - if ((UseSSE == 1 && opr_type == T_FLOAT) || UseSSE >= 2) { + if ((UseSSE == 1 && opr_type == T_FLOAT) || UseSSE >= 2 S390_ONLY(|| true)) { // SSE float instruction (T_DOUBLE only supported with SSE2) switch (op->code()) { case lir_cmp: @@ -1144,7 +1144,7 @@ IntervalUseKind LinearScan::use_kind_of_input_operand(LIR_Op* op, LIR_Opr opr) { } } } -#endif // X86 +#endif // X86 S390 // all other operands require a register return mustHaveRegister; @@ -2653,6 +2653,11 @@ int LinearScan::append_scope_value_for_operand(LIR_Opr opr, GrowableArrayfpu_regname(opr->fpu_regnr()); #ifndef __SOFTFP__ #ifndef VM_LITTLE_ENDIAN + // On S390 a (single precision) float value occupies only the high + // word of the full double register. So when the double register is + // stored to memory (e.g. by the RegisterSaver), then the float value + // is found at offset 0. I.e. the code below is not needed on S390. +#ifndef S390 if (! float_saved_as_double) { // On big endian system, we may have an issue if float registers use only // the low half of the (same) double registers. @@ -2667,6 +2672,7 @@ int LinearScan::append_scope_value_for_operand(LIR_Opr opr, GrowableArrayprint("%s%s", name->as_C_string(), signature->as_C_string()); } static void print_method(outputStream* str, Method* mo, bool with_class=true) { - ResourceMark rm; if (with_class) { str->print("%s.", mo->klass_name()->as_C_string()); } diff --git a/hotspot/src/share/vm/classfile/dictionary.cpp b/hotspot/src/share/vm/classfile/dictionary.cpp index 56d59d23afc..5bb666d730c 100644 --- a/hotspot/src/share/vm/classfile/dictionary.cpp +++ b/hotspot/src/share/vm/classfile/dictionary.cpp @@ -597,7 +597,7 @@ void ProtectionDomainCacheTable::verify() { } guarantee(number_of_entries() == element_count, "Verify of protection domain cache table failed"); - DEBUG_ONLY(verify_lookup_length((double)number_of_entries() / table_size())); + DEBUG_ONLY(verify_lookup_length((double)number_of_entries() / table_size(), "Domain Cache Table")); } void ProtectionDomainCacheEntry::verify() { @@ -738,50 +738,9 @@ void Dictionary::print(bool details) { table_size(), number_of_entries()); tty->print_cr("^ indicates that initiating loader is different from " "defining loader"); - tty->print_cr("1st number: th bucket index"); - tty->print_cr("2nd number: the entry's index within this bucket"); -#ifdef ASSERT - tty->print_cr("3rd number: the hit percentage of this entry"); - tty->print_cr("4th number: the hash index of this entry"); -#endif } -#ifdef ASSERT - // find top buckets with highest lookup count - #define TOP_COUNT 16 - int topItemsIndicies[TOP_COUNT]; - for (int i = 0; i < TOP_COUNT; i++) { - topItemsIndicies[i] = i; - } - double total = 0.0; - for (int i = 0; i < table_size(); i++) { - // find the total count number, so later on we can - // express bucket lookup count as a percentage of all lookups - unsigned value = bucket_hits(i); - total += value; - - // find the entry with min value - int index = 0; - unsigned min = bucket_hits(topItemsIndicies[index]); - for (int j = 1; j < TOP_COUNT; j++) { - if (bucket_hits(topItemsIndicies[j]) < min) { - min = bucket_hits(topItemsIndicies[j]); - index = j; - } - } - // if the bucket loookup value is bigger than the current min - // move that bucket index into the top list - if (value > min) { - topItemsIndicies[index] = i; - } - } -#endif - for (int index = 0; index < table_size(); index++) { -#ifdef ASSERT - double percentage = 100.0 * (double)bucket_hits(index)/total; -#endif - int chain = 0; for (DictionaryEntry* probe = bucket(index); probe != NULL; probe = probe->next()) { @@ -790,10 +749,7 @@ void Dictionary::print(bool details) { bool is_defining_class = (loader_data == e->class_loader_data()); if (details) { - tty->print("%4d: %3d: ", index, chain); -#ifdef ASSERT - tty->print("%5.2f%%: %10u:", percentage, probe->hash()); -#endif + tty->print("%4d: ", index); } tty->print("%s%s", ((!details) || is_defining_class) ? " " : "^", e->external_name()); @@ -807,30 +763,9 @@ void Dictionary::print(bool details) { } } tty->cr(); - - chain++; - } - if (details && (chain == 0)) { - tty->print("%4d:", index); - tty->cr(); } } -#ifdef ASSERT - // print out the TOP_COUNT of buckets with highest lookup count (unsorted) - if (details) { - tty->cr(); - tty->print("Top %d buckets:", TOP_COUNT); - tty->cr(); - for (int i = 0; i < TOP_COUNT; i++) { - tty->print("%4d: hits %5.2f%%", - topItemsIndicies[i], - 100.0*(double)bucket_hits(topItemsIndicies[i])/total); - tty->cr(); - } - } -#endif - if (details) { tty->cr(); _pd_cache_table->print(); @@ -838,6 +773,84 @@ void Dictionary::print(bool details) { tty->cr(); } +#ifdef ASSERT +void Dictionary::printPerformanceInfoDetails() { + if (log_is_enabled(Info, hashtables)) { + ResourceMark rm; + HandleMark hm; + + log_info(hashtables)(" "); + log_info(hashtables)("Java system dictionary (table_size=%d, classes=%d)", + table_size(), number_of_entries()); + log_info(hashtables)("1st number: the bucket index"); + log_info(hashtables)("2nd number: the hit percentage for this bucket"); + log_info(hashtables)("3rd number: the entry's index within this bucket"); + log_info(hashtables)("4th number: the hash index of this entry"); + log_info(hashtables)(" "); + + // find top buckets with highest lookup count +#define TOP_COUNT 16 + int topItemsIndicies[TOP_COUNT]; + for (int i = 0; i < TOP_COUNT; i++) { + topItemsIndicies[i] = i; + } + double total = 0.0; + for (int i = 0; i < table_size(); i++) { + // find the total count number, so later on we can + // express bucket lookup count as a percentage of all lookups + unsigned value = bucket_hits(i); + total += value; + + // find the top entry with min value + int min_index = 0; + unsigned min_value = bucket_hits(topItemsIndicies[min_index]); + for (int j = 1; j < TOP_COUNT; j++) { + unsigned top_value = bucket_hits(topItemsIndicies[j]); + if (top_value < min_value) { + min_value = top_value; + min_index = j; + } + } + // if the bucket loookup value is bigger than the top buckets min + // move that bucket index into the top list + if (value > min_value) { + topItemsIndicies[min_index] = i; + } + } + + for (int index = 0; index < table_size(); index++) { + double percentage = 100.0 * (double)bucket_hits(index)/total; + int chain = 0; + for (DictionaryEntry* probe = bucket(index); + probe != NULL; + probe = probe->next()) { + Klass* e = probe->klass(); + ClassLoaderData* loader_data = probe->loader_data(); + bool is_defining_class = + (loader_data == e->class_loader_data()); + log_info(hashtables)("%4d: %5.2f%%: %3d: %10u: %s, loader %s", + index, percentage, chain, probe->hash(), e->external_name(), + (loader_data != NULL) ? loader_data->loader_name() : "NULL"); + + chain++; + } + if (chain == 0) { + log_info(hashtables)("%4d:", index+1); + } + } + log_info(hashtables)(" "); + + // print out the TOP_COUNT of buckets with highest lookup count (unsorted) + log_info(hashtables)("Top %d buckets:", TOP_COUNT); + for (int i = 0; i < TOP_COUNT; i++) { + log_info(hashtables)("%4d: hits %5.2f%%", + topItemsIndicies[i], + 100.0*(double)bucket_hits(topItemsIndicies[i])/total); + } + } +} +#endif // ASSERT + void Dictionary::verify() { guarantee(number_of_entries() >= 0, "Verify of system dictionary failed"); @@ -863,7 +876,11 @@ void Dictionary::verify() { } guarantee(number_of_entries() == element_count, "Verify of system dictionary failed"); - DEBUG_ONLY(if (!verify_lookup_length((double)number_of_entries() / table_size())) this->print(true)); +#ifdef ASSERT + if (!verify_lookup_length((double)number_of_entries() / table_size(), "System Dictionary")) { + this->printPerformanceInfoDetails(); + } +#endif // ASSERT _pd_cache_table->verify(); } diff --git a/hotspot/src/share/vm/classfile/dictionary.hpp b/hotspot/src/share/vm/classfile/dictionary.hpp index a873fdd3e47..e0280441bd3 100644 --- a/hotspot/src/share/vm/classfile/dictionary.hpp +++ b/hotspot/src/share/vm/classfile/dictionary.hpp @@ -131,6 +131,9 @@ public: ProtectionDomainCacheEntry* cache_get(oop protection_domain); void print(bool details = true); +#ifdef ASSERT + void printPerformanceInfoDetails(); +#endif // ASSERT void verify(); }; diff --git a/hotspot/src/share/vm/classfile/moduleEntry.cpp b/hotspot/src/share/vm/classfile/moduleEntry.cpp index f400f9c9b8a..e4bbb08b2f7 100644 --- a/hotspot/src/share/vm/classfile/moduleEntry.cpp +++ b/hotspot/src/share/vm/classfile/moduleEntry.cpp @@ -440,7 +440,7 @@ void ModuleEntryTable::verify() { } guarantee(number_of_entries() == element_count, "Verify of Module Entry Table failed"); - debug_only(verify_lookup_length((double)number_of_entries() / table_size())); + DEBUG_ONLY(verify_lookup_length((double)number_of_entries() / table_size(), "Module Entry Table")); } void ModuleEntry::verify() { diff --git a/hotspot/src/share/vm/classfile/packageEntry.cpp b/hotspot/src/share/vm/classfile/packageEntry.cpp index 2389cb25559..f358c51bd59 100644 --- a/hotspot/src/share/vm/classfile/packageEntry.cpp +++ b/hotspot/src/share/vm/classfile/packageEntry.cpp @@ -365,7 +365,7 @@ void PackageEntryTable::verify() { } guarantee(number_of_entries() == element_count, "Verify of Package Entry Table failed"); - debug_only(verify_lookup_length((double)number_of_entries() / table_size())); + DEBUG_ONLY(verify_lookup_length((double)number_of_entries() / table_size(), "Package Entry Table")); } void PackageEntry::verify() { diff --git a/hotspot/src/share/vm/classfile/stringTable.cpp b/hotspot/src/share/vm/classfile/stringTable.cpp index 7d3fe1636f2..24c72b7ccf1 100644 --- a/hotspot/src/share/vm/classfile/stringTable.cpp +++ b/hotspot/src/share/vm/classfile/stringTable.cpp @@ -437,17 +437,15 @@ void StringTable::dump(outputStream* st, bool verbose) { st->print("%d: ", length); } else { ResourceMark rm(THREAD); - int utf8_length; + int utf8_length = length; char* utf8_string; if (!is_latin1) { jchar* chars = value->char_at_addr(0); - utf8_length = UNICODE::utf8_length(chars, length); - utf8_string = UNICODE::as_utf8(chars, length); + utf8_string = UNICODE::as_utf8(chars, utf8_length); } else { jbyte* bytes = value->byte_at_addr(0); - utf8_length = UNICODE::utf8_length(bytes, length); - utf8_string = UNICODE::as_utf8(bytes, length); + utf8_string = UNICODE::as_utf8(bytes, utf8_length); } st->print("%d: ", utf8_length); diff --git a/hotspot/src/share/vm/classfile/systemDictionary.cpp b/hotspot/src/share/vm/classfile/systemDictionary.cpp index f9a5835ad9d..c6ae8e1e36a 100644 --- a/hotspot/src/share/vm/classfile/systemDictionary.cpp +++ b/hotspot/src/share/vm/classfile/systemDictionary.cpp @@ -2698,7 +2698,7 @@ Handle SystemDictionary::find_method_handle_type(Symbol* signature, } assert(arg == npts, ""); - // call java.lang.invoke.MethodHandleNatives::findMethodType(Class rt, Class[] pts) -> MethodType + // call java.lang.invoke.MethodHandleNatives::findMethodHandleType(Class rt, Class[] pts) -> MethodType JavaCallArguments args(Handle(THREAD, rt())); args.push_oop(pts()); JavaValue result(T_OBJECT); diff --git a/hotspot/src/share/vm/code/codeBlob.cpp b/hotspot/src/share/vm/code/codeBlob.cpp index 91d231e6a5f..66125a6aba0 100644 --- a/hotspot/src/share/vm/code/codeBlob.cpp +++ b/hotspot/src/share/vm/code/codeBlob.cpp @@ -45,6 +45,10 @@ #include "c1/c1_Runtime1.hpp" #endif +const char* CodeBlob::compiler_name() const { + return compilertype2name(_type); +} + unsigned int CodeBlob::align_code_offset(int offset) { // align the size to CodeEntryAlignment return @@ -65,7 +69,7 @@ unsigned int CodeBlob::allocation_size(CodeBuffer* cb, int header_size) { return size; } -CodeBlob::CodeBlob(const char* name, const CodeBlobLayout& layout, int frame_complete_offset, int frame_size, ImmutableOopMapSet* oop_maps, bool caller_must_gc_arguments) : +CodeBlob::CodeBlob(const char* name, CompilerType type, const CodeBlobLayout& layout, int frame_complete_offset, int frame_size, ImmutableOopMapSet* oop_maps, bool caller_must_gc_arguments) : _name(name), _size(layout.size()), _header_size(layout.header_size()), @@ -80,7 +84,8 @@ CodeBlob::CodeBlob(const char* name, const CodeBlobLayout& layout, int frame_com _data_end(layout.data_end()), _relocation_begin(layout.relocation_begin()), _relocation_end(layout.relocation_end()), - _content_begin(layout.content_begin()) + _content_begin(layout.content_begin()), + _type(type) { assert(layout.size() == round_to(layout.size(), oopSize), "unaligned size"); assert(layout.header_size() == round_to(layout.header_size(), oopSize), "unaligned size"); @@ -92,7 +97,7 @@ CodeBlob::CodeBlob(const char* name, const CodeBlobLayout& layout, int frame_com #endif // COMPILER1 } -CodeBlob::CodeBlob(const char* name, const CodeBlobLayout& layout, CodeBuffer* cb, int frame_complete_offset, int frame_size, OopMapSet* oop_maps, bool caller_must_gc_arguments) : +CodeBlob::CodeBlob(const char* name, CompilerType type, const CodeBlobLayout& layout, CodeBuffer* cb, int frame_complete_offset, int frame_size, OopMapSet* oop_maps, bool caller_must_gc_arguments) : _name(name), _size(layout.size()), _header_size(layout.header_size()), @@ -106,7 +111,8 @@ CodeBlob::CodeBlob(const char* name, const CodeBlobLayout& layout, CodeBuffer* c _data_end(layout.data_end()), _relocation_begin(layout.relocation_begin()), _relocation_end(layout.relocation_end()), - _content_begin(layout.content_begin()) + _content_begin(layout.content_begin()), + _type(type) { assert(_size == round_to(_size, oopSize), "unaligned size"); assert(_header_size == round_to(_header_size, oopSize), "unaligned size"); @@ -123,7 +129,7 @@ CodeBlob::CodeBlob(const char* name, const CodeBlobLayout& layout, CodeBuffer* c // Creates a simple CodeBlob. Sets up the size of the different regions. RuntimeBlob::RuntimeBlob(const char* name, int header_size, int size, int frame_complete, int locs_size) - : CodeBlob(name, CodeBlobLayout((address) this, size, header_size, locs_size, size), frame_complete, 0, NULL, false /* caller_must_gc_arguments */) + : CodeBlob(name, compiler_none, CodeBlobLayout((address) this, size, header_size, locs_size, size), frame_complete, 0, NULL, false /* caller_must_gc_arguments */) { assert(locs_size == round_to(locs_size, oopSize), "unaligned size"); assert(!UseRelocIndex, "no space allocated for reloc index yet"); @@ -148,7 +154,7 @@ RuntimeBlob::RuntimeBlob( int frame_size, OopMapSet* oop_maps, bool caller_must_gc_arguments -) : CodeBlob(name, CodeBlobLayout((address) this, size, header_size, cb), cb, frame_complete, frame_size, oop_maps, caller_must_gc_arguments) { +) : CodeBlob(name, compiler_none, CodeBlobLayout((address) this, size, header_size, cb), cb, frame_complete, frame_size, oop_maps, caller_must_gc_arguments) { cb->copy_code_and_locs_to(this); } diff --git a/hotspot/src/share/vm/code/codeBlob.hpp b/hotspot/src/share/vm/code/codeBlob.hpp index 30105e334bf..ff0f9cf8975 100644 --- a/hotspot/src/share/vm/code/codeBlob.hpp +++ b/hotspot/src/share/vm/code/codeBlob.hpp @@ -26,9 +26,11 @@ #define SHARE_VM_CODE_CODEBLOB_HPP #include "asm/codeBuffer.hpp" +#include "compiler/compilerDefinitions.hpp" #include "compiler/oopMap.hpp" #include "runtime/frame.hpp" #include "runtime/handles.hpp" +#include "utilities/macros.hpp" // CodeBlob Types // Used in the CodeCache to assign CodeBlobs to different CodeHeaps @@ -71,7 +73,8 @@ class CodeBlob VALUE_OBJ_CLASS_SPEC { friend class CodeCacheDumper; protected: - const char* _name; + + const CompilerType _type; // CompilerType int _size; // total size of CodeBlob in bytes int _header_size; // size of header (depends on subclass) int _frame_complete_offset; // instruction offsets in [0.._frame_complete_offset) have @@ -92,9 +95,11 @@ protected: ImmutableOopMapSet* _oop_maps; // OopMap for this CodeBlob bool _caller_must_gc_arguments; CodeStrings _strings; + const char* _name; + S390_ONLY(int _ctable_offset;) - CodeBlob(const char* name, const CodeBlobLayout& layout, int frame_complete_offset, int frame_size, ImmutableOopMapSet* oop_maps, bool caller_must_gc_arguments); - CodeBlob(const char* name, const CodeBlobLayout& layout, CodeBuffer* cb, int frame_complete_offset, int frame_size, OopMapSet* oop_maps, bool caller_must_gc_arguments); + CodeBlob(const char* name, CompilerType type, const CodeBlobLayout& layout, int frame_complete_offset, int frame_size, ImmutableOopMapSet* oop_maps, bool caller_must_gc_arguments); + CodeBlob(const char* name, CompilerType type, const CodeBlobLayout& layout, CodeBuffer* cb, int frame_complete_offset, int frame_size, OopMapSet* oop_maps, bool caller_must_gc_arguments); public: // Returns the space needed for CodeBlob static unsigned int allocation_size(CodeBuffer* cb, int header_size); @@ -115,9 +120,11 @@ public: virtual bool is_method_handles_adapter_blob() const { return false; } virtual bool is_compiled() const { return false; } - virtual bool is_compiled_by_c2() const { return false; } - virtual bool is_compiled_by_c1() const { return false; } - virtual bool is_compiled_by_jvmci() const { return false; } + inline bool is_compiled_by_c1() const { return _type == compiler_c1; }; + inline bool is_compiled_by_c2() const { return _type == compiler_c2; }; + inline bool is_compiled_by_jvmci() const { return _type == compiler_jvmci; }; + inline bool is_compiled_by_shark() const { return _type == compiler_shark; }; + const char* compiler_name() const; // Casting nmethod* as_nmethod_or_null() { return is_nmethod() ? (nmethod*) this : NULL; } @@ -135,6 +142,12 @@ public: address code_end() const { return _code_end; } address data_end() const { return _data_end; } + // This field holds the beginning of the const section in the old code buffer. + // It is needed to fix relocations of pc-relative loads when resizing the + // the constant pool or moving it. + S390_ONLY(address ctable_begin() const { return header_begin() + _ctable_offset; }) + void set_ctable_begin(address ctable) { S390_ONLY(_ctable_offset = ctable - header_begin();) } + // Sizes int size() const { return _size; } int header_size() const { return _header_size; } diff --git a/hotspot/src/share/vm/code/codeCache.cpp b/hotspot/src/share/vm/code/codeCache.cpp index 0e858c03996..5d81805db44 100644 --- a/hotspot/src/share/vm/code/codeCache.cpp +++ b/hotspot/src/share/vm/code/codeCache.cpp @@ -547,7 +547,10 @@ void CodeCache::commit(CodeBlob* cb) { } bool CodeCache::contains(void *p) { - // It should be ok to call contains without holding a lock + // S390 uses contains() in current_frame(), which is used before + // code cache initialization if NativeMemoryTracking=detail is set. + S390_ONLY(if (_heaps == NULL) return false;) + // It should be ok to call contains without holding a lock. FOR_ALL_HEAPS(heap) { if ((*heap)->contains(p)) { return true; diff --git a/hotspot/src/share/vm/code/compiledMethod.cpp b/hotspot/src/share/vm/code/compiledMethod.cpp index 5031b364d18..c376a1a50a2 100644 --- a/hotspot/src/share/vm/code/compiledMethod.cpp +++ b/hotspot/src/share/vm/code/compiledMethod.cpp @@ -31,14 +31,14 @@ #include "memory/resourceArea.hpp" #include "runtime/mutexLocker.hpp" -CompiledMethod::CompiledMethod(Method* method, const char* name, const CodeBlobLayout& layout, int frame_complete_offset, int frame_size, ImmutableOopMapSet* oop_maps, bool caller_must_gc_arguments) - : CodeBlob(name, layout, frame_complete_offset, frame_size, oop_maps, caller_must_gc_arguments), +CompiledMethod::CompiledMethod(Method* method, const char* name, CompilerType type, const CodeBlobLayout& layout, int frame_complete_offset, int frame_size, ImmutableOopMapSet* oop_maps, bool caller_must_gc_arguments) + : CodeBlob(name, type, layout, frame_complete_offset, frame_size, oop_maps, caller_must_gc_arguments), _method(method), _mark_for_deoptimization_status(not_marked) { init_defaults(); } -CompiledMethod::CompiledMethod(Method* method, const char* name, int size, int header_size, CodeBuffer* cb, int frame_complete_offset, int frame_size, OopMapSet* oop_maps, bool caller_must_gc_arguments) - : CodeBlob(name, CodeBlobLayout((address) this, size, header_size, cb), cb, frame_complete_offset, frame_size, oop_maps, caller_must_gc_arguments), +CompiledMethod::CompiledMethod(Method* method, const char* name, CompilerType type, int size, int header_size, CodeBuffer* cb, int frame_complete_offset, int frame_size, OopMapSet* oop_maps, bool caller_must_gc_arguments) + : CodeBlob(name, type, CodeBlobLayout((address) this, size, header_size, cb), cb, frame_complete_offset, frame_size, oop_maps, caller_must_gc_arguments), _method(method), _mark_for_deoptimization_status(not_marked) { init_defaults(); } diff --git a/hotspot/src/share/vm/code/compiledMethod.hpp b/hotspot/src/share/vm/code/compiledMethod.hpp index 01485e74a28..77c7febc5dd 100644 --- a/hotspot/src/share/vm/code/compiledMethod.hpp +++ b/hotspot/src/share/vm/code/compiledMethod.hpp @@ -164,8 +164,8 @@ protected: virtual void flush() = 0; protected: - CompiledMethod(Method* method, const char* name, const CodeBlobLayout& layout, int frame_complete_offset, int frame_size, ImmutableOopMapSet* oop_maps, bool caller_must_gc_arguments); - CompiledMethod(Method* method, const char* name, int size, int header_size, CodeBuffer* cb, int frame_complete_offset, int frame_size, OopMapSet* oop_maps, bool caller_must_gc_arguments); + CompiledMethod(Method* method, const char* name, CompilerType type, const CodeBlobLayout& layout, int frame_complete_offset, int frame_size, ImmutableOopMapSet* oop_maps, bool caller_must_gc_arguments); + CompiledMethod(Method* method, const char* name, CompilerType type, int size, int header_size, CodeBuffer* cb, int frame_complete_offset, int frame_size, OopMapSet* oop_maps, bool caller_must_gc_arguments); public: virtual bool is_compiled() const { return true; } @@ -191,12 +191,10 @@ public: // will be transformed to zombie immediately }; - virtual AbstractCompiler* compiler() const = 0; virtual bool is_in_use() const = 0; virtual int comp_level() const = 0; virtual int compile_id() const = 0; - virtual address verified_entry_point() const = 0; virtual void log_identity(xmlStream* log) const = 0; virtual void log_state_change() const = 0; diff --git a/hotspot/src/share/vm/code/dependencyContext.cpp b/hotspot/src/share/vm/code/dependencyContext.cpp index 435fb0cdfb2..a3fce896fce 100644 --- a/hotspot/src/share/vm/code/dependencyContext.cpp +++ b/hotspot/src/share/vm/code/dependencyContext.cpp @@ -272,79 +272,3 @@ bool DependencyContext::find_stale_entries() { int nmethodBucket::decrement() { return Atomic::add(-1, (volatile int *)&_count); } - -/////////////// Unit tests /////////////// - -#ifndef PRODUCT - -class TestDependencyContext { - public: - nmethod* _nmethods[3]; - - intptr_t _dependency_context; - - DependencyContext dependencies() { - DependencyContext depContext(&_dependency_context); - return depContext; - } - - TestDependencyContext() : _dependency_context(DependencyContext::EMPTY) { - CodeCache_lock->lock_without_safepoint_check(); - - _nmethods[0] = reinterpret_cast(0x8 * 0); - _nmethods[1] = reinterpret_cast(0x8 * 1); - _nmethods[2] = reinterpret_cast(0x8 * 2); - - dependencies().add_dependent_nmethod(_nmethods[2]); - dependencies().add_dependent_nmethod(_nmethods[1]); - dependencies().add_dependent_nmethod(_nmethods[0]); - } - - ~TestDependencyContext() { - dependencies().wipe(); - CodeCache_lock->unlock(); - } - - static void testRemoveDependentNmethod(int id, bool delete_immediately) { - TestDependencyContext c; - DependencyContext depContext = c.dependencies(); - assert(!has_stale_entries(depContext), "check"); - - nmethod* nm = c._nmethods[id]; - depContext.remove_dependent_nmethod(nm, delete_immediately); - - if (!delete_immediately) { - assert(has_stale_entries(depContext), "check"); - assert(depContext.is_dependent_nmethod(nm), "check"); - depContext.expunge_stale_entries(); - } - - assert(!has_stale_entries(depContext), "check"); - assert(!depContext.is_dependent_nmethod(nm), "check"); - } - - static void testRemoveDependentNmethod() { - testRemoveDependentNmethod(0, false); - testRemoveDependentNmethod(1, false); - testRemoveDependentNmethod(2, false); - - testRemoveDependentNmethod(0, true); - testRemoveDependentNmethod(1, true); - testRemoveDependentNmethod(2, true); - } - - static void test() { - testRemoveDependentNmethod(); - } - - static bool has_stale_entries(DependencyContext ctx) { - assert(ctx.has_stale_entries() == ctx.find_stale_entries(), "check"); - return ctx.has_stale_entries(); - } -}; - -void TestDependencyContext_test() { - TestDependencyContext::test(); -} - -#endif // PRODUCT diff --git a/hotspot/src/share/vm/code/dependencyContext.hpp b/hotspot/src/share/vm/code/dependencyContext.hpp index 014de0e237a..e8312ad8e9c 100644 --- a/hotspot/src/share/vm/code/dependencyContext.hpp +++ b/hotspot/src/share/vm/code/dependencyContext.hpp @@ -29,6 +29,7 @@ #include "oops/oop.hpp" #include "runtime/handles.hpp" #include "runtime/perfData.hpp" +#include "runtime/safepoint.hpp" class nmethod; class DepChange; diff --git a/hotspot/src/share/vm/code/nmethod.cpp b/hotspot/src/share/vm/code/nmethod.cpp index 70ee634e51e..a21ff709e15 100644 --- a/hotspot/src/share/vm/code/nmethod.cpp +++ b/hotspot/src/share/vm/code/nmethod.cpp @@ -82,32 +82,6 @@ #endif -bool nmethod::is_compiled_by_c1() const { - if (compiler() == NULL) { - return false; - } - return compiler()->is_c1(); -} -bool nmethod::is_compiled_by_jvmci() const { - if (compiler() == NULL || method() == NULL) return false; // can happen during debug printing - if (is_native_method()) return false; - return compiler()->is_jvmci(); -} -bool nmethod::is_compiled_by_c2() const { - if (compiler() == NULL) { - return false; - } - return compiler()->is_c2(); -} -bool nmethod::is_compiled_by_shark() const { - if (compiler() == NULL) { - return false; - } - return compiler()->is_shark(); -} - - - //--------------------------------------------------------------------------------- // NMethod statistics // They are printed under various flags, including: @@ -440,7 +414,6 @@ void nmethod::init_defaults() { _scavenge_root_link = NULL; } _scavenge_root_state = 0; - _compiler = NULL; #if INCLUDE_RTM_OPT _rtm_state = NoRTM; #endif @@ -468,7 +441,7 @@ nmethod* nmethod::new_native_nmethod(const methodHandle& method, CodeOffsets offsets; offsets.set_value(CodeOffsets::Verified_Entry, vep_offset); offsets.set_value(CodeOffsets::Frame_Complete, frame_complete); - nm = new (native_nmethod_size, CompLevel_none) nmethod(method(), native_nmethod_size, + nm = new (native_nmethod_size, CompLevel_none) nmethod(method(), compiler_none, native_nmethod_size, compile_id, &offsets, code_buffer, frame_size, basic_lock_owner_sp_offset, @@ -518,7 +491,7 @@ nmethod* nmethod::new_nmethod(const methodHandle& method, + round_to(debug_info->data_size() , oopSize); nm = new (nmethod_size, comp_level) - nmethod(method(), nmethod_size, compile_id, entry_bci, offsets, + nmethod(method(), compiler->type(), nmethod_size, compile_id, entry_bci, offsets, orig_pc_offset, debug_info, dependencies, code_buffer, frame_size, oop_maps, handler_table, @@ -569,6 +542,7 @@ nmethod* nmethod::new_nmethod(const methodHandle& method, // For native wrappers nmethod::nmethod( Method* method, + CompilerType type, int nmethod_size, int compile_id, CodeOffsets* offsets, @@ -577,7 +551,7 @@ nmethod::nmethod( ByteSize basic_lock_owner_sp_offset, ByteSize basic_lock_sp_offset, OopMapSet* oop_maps ) - : CompiledMethod(method, "native nmethod", nmethod_size, sizeof(nmethod), code_buffer, offsets->value(CodeOffsets::Frame_Complete), frame_size, oop_maps, false), + : CompiledMethod(method, "native nmethod", type, nmethod_size, sizeof(nmethod), code_buffer, offsets->value(CodeOffsets::Frame_Complete), frame_size, oop_maps, false), _native_receiver_sp_offset(basic_lock_owner_sp_offset), _native_basic_lock_sp_offset(basic_lock_sp_offset) { @@ -666,6 +640,7 @@ void* nmethod::operator new(size_t size, int nmethod_size, int comp_level) throw nmethod::nmethod( Method* method, + CompilerType type, int nmethod_size, int compile_id, int entry_bci, @@ -685,7 +660,7 @@ nmethod::nmethod( Handle speculation_log #endif ) - : CompiledMethod(method, "nmethod", nmethod_size, sizeof(nmethod), code_buffer, offsets->value(CodeOffsets::Frame_Complete), frame_size, oop_maps, false), + : CompiledMethod(method, "nmethod", type, nmethod_size, sizeof(nmethod), code_buffer, offsets->value(CodeOffsets::Frame_Complete), frame_size, oop_maps, false), _native_receiver_sp_offset(in_ByteSize(-1)), _native_basic_lock_sp_offset(in_ByteSize(-1)) { @@ -701,13 +676,13 @@ nmethod::nmethod( _entry_bci = entry_bci; _compile_id = compile_id; _comp_level = comp_level; - _compiler = compiler; _orig_pc_offset = orig_pc_offset; _hotness_counter = NMethodSweeper::hotness_counter_reset_val(); // Section offsets _consts_offset = content_offset() + code_buffer->total_offset_of(code_buffer->consts()); _stub_offset = content_offset() + code_buffer->total_offset_of(code_buffer->stubs()); + set_ctable_begin(header_begin() + _consts_offset); #if INCLUDE_JVMCI _jvmci_installed_code = installed_code(); @@ -803,9 +778,7 @@ void nmethod::log_identity(xmlStream* log) const { log->print(" compile_id='%d'", compile_id()); const char* nm_kind = compile_kind(); if (nm_kind != NULL) log->print(" compile_kind='%s'", nm_kind); - if (compiler() != NULL) { - log->print(" compiler='%s'", compiler()->name()); - } + log->print(" compiler='%s'", compiler_name()); if (TieredCompilation) { log->print(" level='%d'", comp_level()); } @@ -2205,6 +2178,7 @@ void nmethod::verify_scopes() { //verify_interrupt_point(iter.addr()); break; case relocInfo::runtime_call_type: + case relocInfo::runtime_call_w_cp_type: address destination = iter.reloc()->value(); // Right now there is no way to find out which entries support // an interrupt point. It would be nice if we had this @@ -2463,10 +2437,11 @@ const char* nmethod::reloc_string_for(u_char* begin, u_char* end) { st.print(")"); return st.as_string(); } - case relocInfo::runtime_call_type: { + case relocInfo::runtime_call_type: + case relocInfo::runtime_call_w_cp_type: { stringStream st; st.print("runtime_call"); - runtime_call_Relocation* r = iter.runtime_call_reloc(); + CallRelocation* r = (CallRelocation*)iter.reloc(); address dest = r->destination(); CodeBlob* cb = CodeCache::find_blob(dest); if (cb != NULL) { diff --git a/hotspot/src/share/vm/code/nmethod.hpp b/hotspot/src/share/vm/code/nmethod.hpp index 16ad01a38a5..2e00f75c64d 100644 --- a/hotspot/src/share/vm/code/nmethod.hpp +++ b/hotspot/src/share/vm/code/nmethod.hpp @@ -74,8 +74,6 @@ class nmethod : public CompiledMethod { static nmethod* volatile _oops_do_mark_nmethods; nmethod* volatile _oops_do_mark_link; - AbstractCompiler* _compiler; // The compiler which compiled this nmethod - // offsets for entry points address _entry_point; // entry point with class check address _verified_entry_point; // entry point without class check @@ -166,6 +164,7 @@ class nmethod : public CompiledMethod { // For native wrappers nmethod(Method* method, + CompilerType type, int nmethod_size, int compile_id, CodeOffsets* offsets, @@ -177,6 +176,7 @@ class nmethod : public CompiledMethod { // Creation support nmethod(Method* method, + CompilerType type, int nmethod_size, int compile_id, int entry_bci, @@ -251,18 +251,10 @@ class nmethod : public CompiledMethod { ByteSize basic_lock_sp_offset, OopMapSet* oop_maps); - // accessors - AbstractCompiler* compiler() const { return _compiler; } - // type info bool is_nmethod() const { return true; } bool is_osr_method() const { return _entry_bci != InvocationEntryBci; } - bool is_compiled_by_c1() const; - bool is_compiled_by_jvmci() const; - bool is_compiled_by_c2() const; - bool is_compiled_by_shark() const; - // boundaries for different parts address consts_begin () const { return header_begin() + _consts_offset ; } address consts_end () const { return code_begin() ; } diff --git a/hotspot/src/share/vm/code/relocInfo.cpp b/hotspot/src/share/vm/code/relocInfo.cpp index 45a39db8360..d42646f92c1 100644 --- a/hotspot/src/share/vm/code/relocInfo.cpp +++ b/hotspot/src/share/vm/code/relocInfo.cpp @@ -552,6 +552,14 @@ void virtual_call_Relocation::unpack_data() { _cached_value = x0==0? NULL: address_from_scaled_offset(x0, point); } +void runtime_call_w_cp_Relocation::pack_data_to(CodeSection * dest) { + short* p = pack_1_int_to((short *)dest->locs_end(), (jint)(_offset >> 2)); + dest->set_locs_end((relocInfo*) p); +} + +void runtime_call_w_cp_Relocation::unpack_data() { + _offset = unpack_1_int() << 2; +} void static_stub_Relocation::pack_data_to(CodeSection* dest) { short* p = (short*) dest->locs_end(); @@ -743,7 +751,9 @@ address virtual_call_Relocation::cached_value() { } Method* virtual_call_Relocation::method_value() { - Metadata* m = code()->metadata_at(_method_index); + CompiledMethod* cm = code(); + if (cm == NULL) return (Method*)NULL; + Metadata* m = cm->metadata_at(_method_index); assert(m != NULL || _method_index == 0, "should be non-null for non-zero index"); assert(m == NULL || m->is_method(), "not a method"); return (Method*)m; @@ -769,7 +779,9 @@ void opt_virtual_call_Relocation::unpack_data() { } Method* opt_virtual_call_Relocation::method_value() { - Metadata* m = code()->metadata_at(_method_index); + CompiledMethod* cm = code(); + if (cm == NULL) return (Method*)NULL; + Metadata* m = cm->metadata_at(_method_index); assert(m != NULL || _method_index == 0, "should be non-null for non-zero index"); assert(m == NULL || m->is_method(), "not a method"); return (Method*)m; @@ -800,7 +812,9 @@ address opt_virtual_call_Relocation::static_stub() { } Method* static_call_Relocation::method_value() { - Metadata* m = code()->metadata_at(_method_index); + CompiledMethod* cm = code(); + if (cm == NULL) return (Method*)NULL; + Metadata* m = cm->metadata_at(_method_index); assert(m != NULL || _method_index == 0, "should be non-null for non-zero index"); assert(m == NULL || m->is_method(), "not a method"); return (Method*)m; @@ -970,7 +984,9 @@ void RelocIterator::print_current() { // work even during GC or other inconvenient times. if (WizardMode && oop_value != NULL) { tty->print("oop_value=" INTPTR_FORMAT ": ", p2i(oop_value)); - oop_value->print_value_on(tty); + if (oop_value->is_oop()) { + oop_value->print_value_on(tty); + } } break; } @@ -1009,6 +1025,7 @@ void RelocIterator::print_current() { break; } case relocInfo::runtime_call_type: + case relocInfo::runtime_call_w_cp_type: { CallRelocation* r = (CallRelocation*) reloc(); tty->print(" | [destination=" INTPTR_FORMAT "]", p2i(r->destination())); diff --git a/hotspot/src/share/vm/code/relocInfo.hpp b/hotspot/src/share/vm/code/relocInfo.hpp index de22f48574a..87fb97ff869 100644 --- a/hotspot/src/share/vm/code/relocInfo.hpp +++ b/hotspot/src/share/vm/code/relocInfo.hpp @@ -270,7 +270,7 @@ class relocInfo VALUE_OBJ_CLASS_SPEC { poll_return_type = 11, // polling instruction for safepoints at return metadata_type = 12, // metadata that used to be oops trampoline_stub_type = 13, // stub-entry for trampoline - yet_unused_type_1 = 14, // Still unused + runtime_call_w_cp_type = 14, // Runtime call which may load its target from the constant pool data_prefix_tag = 15, // tag for a prefix (carries data arguments) type_mask = 15 // A mask which selects only the above values }; @@ -305,6 +305,7 @@ class relocInfo VALUE_OBJ_CLASS_SPEC { visitor(static_call) \ visitor(static_stub) \ visitor(runtime_call) \ + visitor(runtime_call_w_cp) \ visitor(external_word) \ visitor(internal_word) \ visitor(poll) \ @@ -827,8 +828,6 @@ class Relocation VALUE_OBJ_CLASS_SPEC { // ic_call_type is not always posisition dependent (depending on the state of the cache)). However, this is // probably a reasonable assumption, since empty caches simplifies code reloacation. virtual void fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { } - - void print(); }; @@ -1175,6 +1174,36 @@ class runtime_call_Relocation : public CallRelocation { public: }; + +class runtime_call_w_cp_Relocation : public CallRelocation { + relocInfo::relocType type() { return relocInfo::runtime_call_w_cp_type; } + + public: + static RelocationHolder spec() { + RelocationHolder rh = newHolder(); + new(rh) runtime_call_w_cp_Relocation(); + return rh; + } + + private: + friend class RelocIterator; + runtime_call_w_cp_Relocation() { _offset = -4; /* <0 = invalid */ } + // On z/Architecture, runtime calls are either a sequence + // of two instructions (load destination of call from constant pool + do call) + // or a pc-relative call. The pc-relative call is faster, but it can only + // be used if the destination of the call is not too far away. + // In order to be able to patch a pc-relative call back into one using + // the constant pool, we have to remember the location of the call's destination + // in the constant pool. + int _offset; + + public: + void set_constant_pool_offset(int offset) { _offset = offset; } + int get_constant_pool_offset() { return _offset; } + void pack_data_to(CodeSection * dest); + void unpack_data(); +}; + // Trampoline Relocations. // A trampoline allows to encode a small branch in the code, even if there // is the chance that this branch can not reach all possible code locations. diff --git a/hotspot/src/share/vm/compiler/abstractCompiler.hpp b/hotspot/src/share/vm/compiler/abstractCompiler.hpp index b02d85eaf57..958102bb694 100644 --- a/hotspot/src/share/vm/compiler/abstractCompiler.hpp +++ b/hotspot/src/share/vm/compiler/abstractCompiler.hpp @@ -26,6 +26,7 @@ #define SHARE_VM_COMPILER_ABSTRACTCOMPILER_HPP #include "ci/compilerInterface.hpp" +#include "compiler/compilerDefinitions.hpp" #include "compiler/compilerDirectives.hpp" typedef void (*initializer)(void); @@ -82,24 +83,15 @@ class AbstractCompiler : public CHeapObj { // This thread will initialize the compiler runtime. bool should_perform_init(); - // The (closed set) of concrete compiler classes. - enum Type { - none, - c1, - c2, - jvmci, - shark - }; - private: - Type _type; + const CompilerType _type; #if INCLUDE_JVMCI CompilerStatistics _stats; #endif public: - AbstractCompiler(Type type) : _type(type), _compiler_state(uninitialized), _num_compiler_threads(0) {} + AbstractCompiler(CompilerType type) : _type(type), _compiler_state(uninitialized), _num_compiler_threads(0) {} // This function determines the compiler thread that will perform the // shutdown of the corresponding compiler runtime. @@ -157,10 +149,11 @@ class AbstractCompiler : public CHeapObj { } // Compiler type queries. - bool is_c1() { return _type == c1; } - bool is_c2() { return _type == c2; } - bool is_jvmci() { return _type == jvmci; } - bool is_shark() { return _type == shark; } + const bool is_c1() { return _type == compiler_c1; } + const bool is_c2() { return _type == compiler_c2; } + const bool is_jvmci() { return _type == compiler_jvmci; } + const bool is_shark() { return _type == compiler_shark; } + const CompilerType type() { return _type; } // Extra tests to identify trivial methods for the tiered compilation policy. virtual bool is_trivial(Method* method) { return false; } diff --git a/hotspot/src/share/vm/compiler/compilerDefinitions.cpp b/hotspot/src/share/vm/compiler/compilerDefinitions.cpp new file mode 100644 index 00000000000..a0d379773eb --- /dev/null +++ b/hotspot/src/share/vm/compiler/compilerDefinitions.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "compiler/compilerDefinitions.hpp" + +const char* compilertype2name_tab[compiler_number_of_types] = { + "", + "c1", + "c2", + "jvmci", + "shark" +}; diff --git a/hotspot/src/share/vm/compiler/compilerDefinitions.hpp b/hotspot/src/share/vm/compiler/compilerDefinitions.hpp new file mode 100644 index 00000000000..6d8c2c07105 --- /dev/null +++ b/hotspot/src/share/vm/compiler/compilerDefinitions.hpp @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_COMPILER_COMPILERDEFINITIONS_HPP +#define SHARE_VM_COMPILER_COMPILERDEFINITIONS_HPP + +#include "utilities/globalDefinitions.hpp" + +// The (closed set) of concrete compiler classes. +enum CompilerType { + compiler_none, + compiler_c1, + compiler_c2, + compiler_jvmci, + compiler_shark, + compiler_number_of_types +}; + +extern const char* compilertype2name_tab[compiler_number_of_types]; // Map CompilerType to its name +inline const char* compilertype2name(CompilerType t) { return (uint)t < compiler_number_of_types ? compilertype2name_tab[t] : NULL; } + +// Handy constants for deciding which compiler mode to use. +enum MethodCompilation { + InvocationEntryBci = -1 // i.e., not a on-stack replacement compilation +}; + +// Enumeration to distinguish tiers of compilation +enum CompLevel { + CompLevel_any = -1, + CompLevel_all = -1, + CompLevel_none = 0, // Interpreter + CompLevel_simple = 1, // C1 + CompLevel_limited_profile = 2, // C1, invocation & backedge counters + CompLevel_full_profile = 3, // C1, invocation & backedge counters + mdo + CompLevel_full_optimization = 4, // C2, Shark or JVMCI + +#if defined(COMPILER2) || defined(SHARK) + CompLevel_highest_tier = CompLevel_full_optimization, // pure C2 and tiered or JVMCI and tiered +#elif defined(COMPILER1) + CompLevel_highest_tier = CompLevel_simple, // pure C1 or JVMCI +#else + CompLevel_highest_tier = CompLevel_none, +#endif + +#if defined(TIERED) + CompLevel_initial_compile = CompLevel_full_profile // tiered +#elif defined(COMPILER1) || INCLUDE_JVMCI + CompLevel_initial_compile = CompLevel_simple // pure C1 or JVMCI +#elif defined(COMPILER2) || defined(SHARK) + CompLevel_initial_compile = CompLevel_full_optimization // pure C2 +#else + CompLevel_initial_compile = CompLevel_none +#endif +}; + +inline bool is_c1_compile(int comp_level) { + return comp_level > CompLevel_none && comp_level < CompLevel_full_optimization; +} + +inline bool is_c2_compile(int comp_level) { + return comp_level == CompLevel_full_optimization; +} + +inline bool is_highest_tier_compile(int comp_level) { + return comp_level == CompLevel_highest_tier; +} + +inline bool is_compile(int comp_level) { + return is_c1_compile(comp_level) || is_c2_compile(comp_level); +} + +// States of Restricted Transactional Memory usage. +enum RTMState { + NoRTM = 0x2, // Don't use RTM + UseRTM = 0x1, // Use RTM + ProfileRTM = 0x0 // Use RTM with abort ratio calculation +}; + +#ifndef INCLUDE_RTM_OPT +#define INCLUDE_RTM_OPT 0 +#endif +#if INCLUDE_RTM_OPT +#define RTM_OPT_ONLY(code) code +#else +#define RTM_OPT_ONLY(code) +#endif + +#endif // SHARE_VM_COMPILER_COMPILERDEFINITIONS_HPP diff --git a/hotspot/src/share/vm/gc/cms/compactibleFreeListSpace.cpp b/hotspot/src/share/vm/gc/cms/compactibleFreeListSpace.cpp index ad651723aef..e6773e304d0 100644 --- a/hotspot/src/share/vm/gc/cms/compactibleFreeListSpace.cpp +++ b/hotspot/src/share/vm/gc/cms/compactibleFreeListSpace.cpp @@ -922,18 +922,13 @@ size_t CompactibleFreeListSpace::block_size(const HeapWord* p) const { return res; } } else { - // must read from what 'p' points to in each loop. - Klass* k = ((volatile oopDesc*)p)->klass_or_null(); + // Ensure klass read before size. + Klass* k = oop(p)->klass_or_null_acquire(); if (k != NULL) { assert(k->is_klass(), "Should really be klass oop."); oop o = (oop)p; assert(o->is_oop(true /* ignore mark word */), "Should be an oop."); - // Bugfix for systems with weak memory model (PPC64/IA64). - // The object o may be an array. Acquire to make sure that the array - // size (third word) is consistent. - OrderAccess::acquire(); - size_t res = o->size_given_klass(k); res = adjustObjectSize(res); assert(res != 0, "Block size should not be 0"); @@ -977,21 +972,13 @@ const { return res; } } else { - // must read from what 'p' points to in each loop. - Klass* k = ((volatile oopDesc*)p)->klass_or_null(); - // We trust the size of any object that has a non-NULL - // klass and (for those in the perm gen) is parsable - // -- irrespective of its conc_safe-ty. + // Ensure klass read before size. + Klass* k = oop(p)->klass_or_null_acquire(); if (k != NULL) { assert(k->is_klass(), "Should really be klass oop."); oop o = (oop)p; assert(o->is_oop(), "Should be an oop"); - // Bugfix for systems with weak memory model (PPC64/IA64). - // The object o may be an array. Acquire to make sure that the array - // size (third word) is consistent. - OrderAccess::acquire(); - size_t res = o->size_given_klass(k); res = adjustObjectSize(res); assert(res != 0, "Block size should not be 0"); @@ -1028,7 +1015,7 @@ bool CompactibleFreeListSpace::block_is_obj(const HeapWord* p) const { FreeChunk* fc = (FreeChunk*)p; assert(is_in_reserved(p), "Should be in space"); if (FreeChunk::indicatesFreeChunk(p)) return false; - Klass* k = oop(p)->klass_or_null(); + Klass* k = oop(p)->klass_or_null_acquire(); if (k != NULL) { // Ignore mark word because it may have been used to // chain together promoted objects (the last one diff --git a/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp b/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp index cb558729b94..1cc6a5ea1e9 100644 --- a/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp +++ b/hotspot/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp @@ -5630,7 +5630,7 @@ size_t CMSCollector::block_size_if_printezis_bits(HeapWord* addr) const { HeapWord* CMSCollector::next_card_start_after_block(HeapWord* addr) const { size_t sz = 0; oop p = (oop)addr; - if (p->klass_or_null() != NULL) { + if (p->klass_or_null_acquire() != NULL) { sz = CompactibleFreeListSpace::adjustObjectSize(p->size()); } else { sz = block_size_using_printezis_bits(addr); @@ -6076,7 +6076,7 @@ size_t ScanMarkedObjectsAgainCarefullyClosure::do_object_careful_m( } if (_bitMap->isMarked(addr)) { // it's marked; is it potentially uninitialized? - if (p->klass_or_null() != NULL) { + if (p->klass_or_null_acquire() != NULL) { // an initialized object; ignore mark word in verification below // since we are running concurrent with mutators assert(p->is_oop(true), "should be an oop"); @@ -6121,7 +6121,7 @@ size_t ScanMarkedObjectsAgainCarefullyClosure::do_object_careful_m( } } else { // Either a not yet marked object or an uninitialized object - if (p->klass_or_null() == NULL) { + if (p->klass_or_null_acquire() == NULL) { // An uninitialized object, skip to the next card, since // we may not be able to read its P-bits yet. assert(size == 0, "Initial value"); @@ -6320,7 +6320,7 @@ bool MarkFromRootsClosure::do_bit(size_t offset) { assert(_skipBits == 0, "tautology"); _skipBits = 2; // skip next two marked bits ("Printezis-marks") oop p = oop(addr); - if (p->klass_or_null() == NULL) { + if (p->klass_or_null_acquire() == NULL) { DEBUG_ONLY(if (!_verifying) {) // We re-dirty the cards on which this object lies and increase // the _threshold so that we'll come back to scan this object @@ -6340,7 +6340,7 @@ bool MarkFromRootsClosure::do_bit(size_t offset) { if (_threshold < end_card_addr) { _threshold = end_card_addr; } - if (p->klass_or_null() != NULL) { + if (p->klass_or_null_acquire() != NULL) { // Redirty the range of cards... _mut->mark_range(redirty_range); } // ...else the setting of klass will dirty the card anyway. @@ -6483,7 +6483,7 @@ bool ParMarkFromRootsClosure::do_bit(size_t offset) { assert(_skip_bits == 0, "tautology"); _skip_bits = 2; // skip next two marked bits ("Printezis-marks") oop p = oop(addr); - if (p->klass_or_null() == NULL) { + if (p->klass_or_null_acquire() == NULL) { // in the case of Clean-on-Enter optimization, redirty card // and avoid clearing card by increasing the threshold. return true; @@ -7354,7 +7354,7 @@ size_t SweepClosure::do_live_chunk(FreeChunk* fc) { "alignment problem"); #ifdef ASSERT - if (oop(addr)->klass_or_null() != NULL) { + if (oop(addr)->klass_or_null_acquire() != NULL) { // Ignore mark word because we are running concurrent with mutators assert(oop(addr)->is_oop(true), "live block should be an oop"); assert(size == @@ -7365,7 +7365,7 @@ size_t SweepClosure::do_live_chunk(FreeChunk* fc) { } else { // This should be an initialized object that's alive. - assert(oop(addr)->klass_or_null() != NULL, + assert(oop(addr)->klass_or_null_acquire() != NULL, "Should be an initialized object"); // Ignore mark word because we are running concurrent with mutators assert(oop(addr)->is_oop(true), "live block should be an oop"); diff --git a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp index 8179b6e3062..1ba5cb7b51b 100644 --- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp +++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp @@ -4420,6 +4420,19 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info, G NOT_PRODUCT(set_evacuation_failure_alot_for_current_gc();) assert(dirty_card_queue_set().completed_buffers_num() == 0, "Should be empty"); + + G1GCPhaseTimes* phase_times = g1_policy()->phase_times(); + + // InitialMark needs claim bits to keep track of the marked-through CLDs. + if (collector_state()->during_initial_mark_pause()) { + double start_clear_claimed_marks = os::elapsedTime(); + + ClassLoaderDataGraph::clear_claimed_marks(); + + double recorded_clear_claimed_marks_time_ms = (os::elapsedTime() - start_clear_claimed_marks) * 1000.0; + phase_times->record_clear_claimed_marks_time_ms(recorded_clear_claimed_marks_time_ms); + } + double start_par_time_sec = os::elapsedTime(); double end_par_time_sec; @@ -4427,10 +4440,6 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info, G const uint n_workers = workers()->active_workers(); G1RootProcessor root_processor(this, n_workers); G1ParTask g1_par_task(this, per_thread_states, _task_queues, &root_processor, n_workers); - // InitialMark needs claim bits to keep track of the marked-through CLDs. - if (collector_state()->during_initial_mark_pause()) { - ClassLoaderDataGraph::clear_claimed_marks(); - } print_termination_stats_hdr(); @@ -4444,8 +4453,6 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info, G // reported parallel time. } - G1GCPhaseTimes* phase_times = g1_policy()->phase_times(); - double par_time_ms = (end_par_time_sec - start_par_time_sec) * 1000.0; phase_times->record_par_time(par_time_ms); diff --git a/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.cpp b/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.cpp index a5dac2a150f..194cd8f5f7c 100644 --- a/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.cpp +++ b/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.cpp @@ -101,6 +101,7 @@ void G1GCPhaseTimes::note_gc_start() { _gc_start_counter = os::elapsed_counter(); _cur_expand_heap_time_ms = 0.0; _external_accounted_time_ms = 0.0; + _recorded_clear_claimed_marks_time_ms = 0.0; for (int i = 0; i < GCParPhasesSentinel; i++) { if (_gc_par_phases[i] != NULL) { @@ -306,6 +307,10 @@ void G1GCPhaseTimes::print() { debug_line("Reference Processing", _cur_ref_proc_time_ms); debug_line("Reference Enqueuing", _cur_ref_enq_time_ms); debug_line("Redirty Cards", _recorded_redirty_logged_cards_time_ms); + if (_recorded_clear_claimed_marks_time_ms > 0.0) { + debug_line("Clear Claimed Marks", _recorded_clear_claimed_marks_time_ms); + } + trace_phase(_gc_par_phases[RedirtyCards]); if (G1EagerReclaimHumongousObjects) { debug_line("Humongous Register", _cur_fast_reclaim_humongous_register_time_ms); diff --git a/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.hpp b/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.hpp index 87fed3ffef8..841756e3f66 100644 --- a/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.hpp +++ b/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.hpp @@ -103,6 +103,8 @@ class G1GCPhaseTimes : public CHeapObj { double _external_accounted_time_ms; + double _recorded_clear_claimed_marks_time_ms; + double _recorded_young_cset_choice_time_ms; double _recorded_non_young_cset_choice_time_ms; @@ -257,6 +259,10 @@ class G1GCPhaseTimes : public CHeapObj { _external_accounted_time_ms += time_ms; } + void record_clear_claimed_marks_time_ms(double recorded_clear_claimed_marks_time_ms) { + _recorded_clear_claimed_marks_time_ms = recorded_clear_claimed_marks_time_ms; + } + double cur_collection_start_sec() { return _cur_collection_start_sec; } diff --git a/hotspot/src/share/vm/gc/g1/heapRegionSet.cpp b/hotspot/src/share/vm/gc/g1/heapRegionSet.cpp index 83bbc71bc4c..5bda8b98ae1 100644 --- a/hotspot/src/share/vm/gc/g1/heapRegionSet.cpp +++ b/hotspot/src/share/vm/gc/g1/heapRegionSet.cpp @@ -370,50 +370,3 @@ void HumongousRegionSetMtSafeChecker::check() { "master humongous set MT safety protocol outside a safepoint"); } } - -void FreeRegionList_test() { - FreeRegionList l("test"); - - const uint num_regions_in_test = 5; - // Create a fake heap. It does not need to be valid, as the HeapRegion constructor - // does not access it. - MemRegion heap(NULL, num_regions_in_test * HeapRegion::GrainWords); - // Allocate a fake BOT because the HeapRegion constructor initializes - // the BOT. - size_t bot_size = G1BlockOffsetTable::compute_size(heap.word_size()); - HeapWord* bot_data = NEW_C_HEAP_ARRAY(HeapWord, bot_size, mtGC); - ReservedSpace bot_rs(G1BlockOffsetTable::compute_size(heap.word_size())); - G1RegionToSpaceMapper* bot_storage = - G1RegionToSpaceMapper::create_mapper(bot_rs, - bot_rs.size(), - os::vm_page_size(), - HeapRegion::GrainBytes, - BOTConstants::N_bytes, - mtGC); - G1BlockOffsetTable bot(heap, bot_storage); - bot_storage->commit_regions(0, num_regions_in_test); - - // Set up memory regions for the heap regions. - MemRegion mr0(heap.start(), HeapRegion::GrainWords); - MemRegion mr1(mr0.end(), HeapRegion::GrainWords); - MemRegion mr2(mr1.end(), HeapRegion::GrainWords); - MemRegion mr3(mr2.end(), HeapRegion::GrainWords); - MemRegion mr4(mr3.end(), HeapRegion::GrainWords); - - HeapRegion hr0(0, &bot, mr0); - HeapRegion hr1(1, &bot, mr1); - HeapRegion hr2(2, &bot, mr2); - HeapRegion hr3(3, &bot, mr3); - HeapRegion hr4(4, &bot, mr4); - l.add_ordered(&hr1); - l.add_ordered(&hr0); - l.add_ordered(&hr3); - l.add_ordered(&hr4); - l.add_ordered(&hr2); - assert(l.length() == num_regions_in_test, "wrong length"); - l.verify_list(); - - bot_storage->uncommit_regions(0, num_regions_in_test); - delete bot_storage; - FREE_C_HEAP_ARRAY(HeapWord, bot_data); -} diff --git a/hotspot/src/share/vm/gc/g1/workerDataArray.cpp b/hotspot/src/share/vm/gc/g1/workerDataArray.cpp index 6f6201d45cf..361ba0975da 100644 --- a/hotspot/src/share/vm/gc/g1/workerDataArray.cpp +++ b/hotspot/src/share/vm/gc/g1/workerDataArray.cpp @@ -79,126 +79,3 @@ void WorkerDataArray::WDAPrinter::details(const WorkerDataArray* } out->cr(); } - -#ifndef PRODUCT - -#include "memory/resourceArea.hpp" - -void WorkerDataArray_test_verify_string(const char* expected_string, const char* actual_string) { - const size_t expected_len = strlen(expected_string); - - assert(expected_len == strlen(actual_string), - "Wrong string length, expected " SIZE_FORMAT " but got " SIZE_FORMAT "(Expected '%s' but got: '%s')", - expected_len, strlen(actual_string), expected_string, actual_string); - - // Can't use strncmp here because floating point values use different decimal points for different locales. - // Allow strings to differ in "." vs. "," only. This should still catch most errors. - for (size_t i = 0; i < expected_len; i++) { - char e = expected_string[i]; - char a = actual_string[i]; - if (e != a) { - if ((e == '.' || e == ',') && (a == '.' || a == ',')) { - // Most likely just a difference in locale - } else { - assert(false, "Expected '%s' but got: '%s'", expected_string, actual_string); - } - } - } -} - -void WorkerDataArray_test_verify_array(WorkerDataArray& array, size_t expected_sum, double expected_avg, const char* expected_summary, const char* exected_details) { - const double epsilon = 0.0001; - assert(array.sum() == expected_sum, "Wrong sum, expected: " SIZE_FORMAT " but got: " SIZE_FORMAT, expected_sum, array.sum()); - assert(fabs(array.average() - expected_avg) < epsilon, "Wrong average, expected: %f but got: %f", expected_avg, array.average()); - - ResourceMark rm; - stringStream out; - array.print_summary_on(&out); - WorkerDataArray_test_verify_string(expected_summary, out.as_string()); - out.reset(); - array.print_details_on(&out); - WorkerDataArray_test_verify_string(exected_details, out.as_string()); -} - -void WorkerDataArray_test_verify_array(WorkerDataArray& array, double expected_sum, double expected_avg, const char* expected_summary, const char* exected_details) { - const double epsilon = 0.0001; - assert(fabs(array.sum() - expected_sum) < epsilon, "Wrong sum, expected: %f but got: %f", expected_sum, array.sum()); - assert(fabs(array.average() - expected_avg) < epsilon, "Wrong average, expected: %f but got: %f", expected_avg, array.average()); - - ResourceMark rm; - stringStream out; - array.print_summary_on(&out); - WorkerDataArray_test_verify_string(expected_summary, out.as_string()); - out.reset(); - array.print_details_on(&out); - WorkerDataArray_test_verify_string(exected_details, out.as_string()); -} - -void WorkerDataArray_test_basic() { - WorkerDataArray array(3, "Test array"); - array.set(0, 5); - array.set(1, 3); - array.set(2, 7); - - WorkerDataArray_test_verify_array(array, 15, 5.0, - "Test array Min: 3, Avg: 5.0, Max: 7, Diff: 4, Sum: 15, Workers: 3\n", - " 5 3 7\n" ); -} - -void WorkerDataArray_test_add() { - WorkerDataArray array(3, "Test array"); - array.set(0, 5); - array.set(1, 3); - array.set(2, 7); - - for (uint i = 0; i < 3; i++) { - array.add(i, 1); - } - - WorkerDataArray_test_verify_array(array, 18, 6.0, - "Test array Min: 4, Avg: 6.0, Max: 8, Diff: 4, Sum: 18, Workers: 3\n", - " 6 4 8\n" ); -} - -void WorkerDataArray_test_with_uninitialized() { - WorkerDataArray array(3, "Test array"); - array.set(0, 5); - array.set(1, WorkerDataArray::uninitialized()); - array.set(2, 7); - - WorkerDataArray_test_verify_array(array, 12, 6, - "Test array Min: 5, Avg: 6.0, Max: 7, Diff: 2, Sum: 12, Workers: 2\n", - " 5 - 7\n" ); -} - -void WorkerDataArray_test_uninitialized() { - WorkerDataArray array(3, "Test array"); - array.set(0, WorkerDataArray::uninitialized()); - array.set(1, WorkerDataArray::uninitialized()); - array.set(2, WorkerDataArray::uninitialized()); - - WorkerDataArray_test_verify_array(array, 0, 0.0, - "Test array skipped\n", - " - - -\n" ); -} - -void WorkerDataArray_test_double_with_uninitialized() { - WorkerDataArray array(3, "Test array"); - array.set(0, 5.1 / MILLIUNITS); - array.set(1, WorkerDataArray::uninitialized()); - array.set(2, 7.2 / MILLIUNITS); - - WorkerDataArray_test_verify_array(array, 12.3 / MILLIUNITS, 6.15 / MILLIUNITS, - "Test array Min: 5.1, Avg: 6.1, Max: 7.2, Diff: 2.1, Sum: 12.3, Workers: 2\n", - " 5.1 - 7.2\n" ); -} - -void WorkerDataArray_test() { - WorkerDataArray_test_basic(); - WorkerDataArray_test_add(); - WorkerDataArray_test_with_uninitialized(); - WorkerDataArray_test_uninitialized(); - WorkerDataArray_test_double_with_uninitialized(); -} - -#endif diff --git a/hotspot/src/share/vm/gc/shared/collectorPolicy.cpp b/hotspot/src/share/vm/gc/shared/collectorPolicy.cpp index fd449241dba..873e4e06be1 100644 --- a/hotspot/src/share/vm/gc/shared/collectorPolicy.cpp +++ b/hotspot/src/share/vm/gc/shared/collectorPolicy.cpp @@ -909,184 +909,3 @@ void MarkSweepPolicy::initialize_gc_policy_counters() { _gc_policy_counters = new GCPolicyCounters("Copy:MSC", 2, 3); } -/////////////// Unit tests /////////////// - -#ifndef PRODUCT -// Testing that the NewSize flag is handled correct is hard because it -// depends on so many other configurable variables. This test only tries to -// verify that there are some basic rules for NewSize honored by the policies. -class TestGenCollectorPolicy { -public: - static void test_new_size() { - size_t flag_value; - - save_flags(); - - // If NewSize has been ergonomically set, the collector policy - // should use it for min but calculate the initial young size - // using NewRatio. - flag_value = 20 * M; - set_basic_flag_values(); - FLAG_SET_ERGO(size_t, NewSize, flag_value); - verify_young_min(flag_value); - - set_basic_flag_values(); - FLAG_SET_ERGO(size_t, NewSize, flag_value); - verify_scaled_young_initial(InitialHeapSize); - - // If NewSize is set on the command line, it should be used - // for both min and initial young size if less than min heap. - // Note that once a flag has been set with FLAG_SET_CMDLINE it - // will be treated as it have been set on the command line for - // the rest of the VM lifetime. This is an irreversible change. - flag_value = 20 * M; - set_basic_flag_values(); - FLAG_SET_CMDLINE(size_t, NewSize, flag_value); - verify_young_min(flag_value); - - set_basic_flag_values(); - FLAG_SET_CMDLINE(size_t, NewSize, flag_value); - verify_young_initial(flag_value); - - // If NewSize is set on command line, but is larger than the min - // heap size, it should only be used for initial young size. - flag_value = 80 * M; - set_basic_flag_values(); - FLAG_SET_CMDLINE(size_t, NewSize, flag_value); - verify_young_initial(flag_value); - - restore_flags(); - } - - static void test_old_size() { - size_t flag_value; - size_t heap_alignment = CollectorPolicy::compute_heap_alignment(); - - save_flags(); - - // If OldSize is set on the command line, it should be used - // for both min and initial old size if less than min heap. - flag_value = 20 * M; - set_basic_flag_values(); - FLAG_SET_CMDLINE(size_t, OldSize, flag_value); - verify_old_min(flag_value); - - set_basic_flag_values(); - FLAG_SET_CMDLINE(size_t, OldSize, flag_value); - // Calculate what we expect the flag to be. - size_t expected_old_initial = align_size_up(InitialHeapSize, heap_alignment) - MaxNewSize; - verify_old_initial(expected_old_initial); - - // If MaxNewSize is large, the maximum OldSize will be less than - // what's requested on the command line and it should be reset - // ergonomically. - // We intentionally set MaxNewSize + OldSize > MaxHeapSize (see over_size). - flag_value = 30 * M; - set_basic_flag_values(); - FLAG_SET_CMDLINE(size_t, OldSize, flag_value); - size_t over_size = 20*M; - size_t new_size_value = align_size_up(MaxHeapSize, heap_alignment) - flag_value + over_size; - FLAG_SET_CMDLINE(size_t, MaxNewSize, new_size_value); - // Calculate what we expect the flag to be. - expected_old_initial = align_size_up(MaxHeapSize, heap_alignment) - MaxNewSize; - verify_old_initial(expected_old_initial); - restore_flags(); - } - - static void verify_young_min(size_t expected) { - MarkSweepPolicy msp; - msp.initialize_all(); - - assert(msp.min_young_size() <= expected, "%zu > %zu", msp.min_young_size(), expected); - } - - static void verify_young_initial(size_t expected) { - MarkSweepPolicy msp; - msp.initialize_all(); - - assert(msp.initial_young_size() == expected, "%zu != %zu", msp.initial_young_size(), expected); - } - - static void verify_scaled_young_initial(size_t initial_heap_size) { - MarkSweepPolicy msp; - msp.initialize_all(); - - if (InitialHeapSize > initial_heap_size) { - // InitialHeapSize was adapted by msp.initialize_all, e.g. due to alignment - // caused by 64K page size. - initial_heap_size = InitialHeapSize; - } - - size_t expected = msp.scale_by_NewRatio_aligned(initial_heap_size); - assert(msp.initial_young_size() == expected, "%zu != %zu", msp.initial_young_size(), expected); - assert(FLAG_IS_ERGO(NewSize) && NewSize == expected, - "NewSize should have been set ergonomically to %zu, but was %zu", expected, NewSize); - } - - static void verify_old_min(size_t expected) { - MarkSweepPolicy msp; - msp.initialize_all(); - - assert(msp.min_old_size() <= expected, "%zu > %zu", msp.min_old_size(), expected); - } - - static void verify_old_initial(size_t expected) { - MarkSweepPolicy msp; - msp.initialize_all(); - - assert(msp.initial_old_size() == expected, "%zu != %zu", msp.initial_old_size(), expected); - } - - -private: - static size_t original_InitialHeapSize; - static size_t original_MaxHeapSize; - static size_t original_MaxNewSize; - static size_t original_MinHeapDeltaBytes; - static size_t original_NewSize; - static size_t original_OldSize; - - static void set_basic_flag_values() { - FLAG_SET_ERGO(size_t, MaxHeapSize, 180 * M); - FLAG_SET_ERGO(size_t, InitialHeapSize, 100 * M); - FLAG_SET_ERGO(size_t, OldSize, 4 * M); - FLAG_SET_ERGO(size_t, NewSize, 1 * M); - FLAG_SET_ERGO(size_t, MaxNewSize, 80 * M); - Arguments::set_min_heap_size(40 * M); - } - - static void save_flags() { - original_InitialHeapSize = InitialHeapSize; - original_MaxHeapSize = MaxHeapSize; - original_MaxNewSize = MaxNewSize; - original_MinHeapDeltaBytes = MinHeapDeltaBytes; - original_NewSize = NewSize; - original_OldSize = OldSize; - } - - static void restore_flags() { - InitialHeapSize = original_InitialHeapSize; - MaxHeapSize = original_MaxHeapSize; - MaxNewSize = original_MaxNewSize; - MinHeapDeltaBytes = original_MinHeapDeltaBytes; - NewSize = original_NewSize; - OldSize = original_OldSize; - } -}; - -size_t TestGenCollectorPolicy::original_InitialHeapSize = 0; -size_t TestGenCollectorPolicy::original_MaxHeapSize = 0; -size_t TestGenCollectorPolicy::original_MaxNewSize = 0; -size_t TestGenCollectorPolicy::original_MinHeapDeltaBytes = 0; -size_t TestGenCollectorPolicy::original_NewSize = 0; -size_t TestGenCollectorPolicy::original_OldSize = 0; - -void TestNewSize_test() { - TestGenCollectorPolicy::test_new_size(); -} - -void TestOldSize_test() { - TestGenCollectorPolicy::test_old_size(); -} - -#endif diff --git a/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp b/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp index f1b8f79050b..fc763d750d2 100644 --- a/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp +++ b/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp @@ -221,7 +221,7 @@ class AbstractInterpreter: AllStatic { } static int expr_offset_in_bytes(int i) { -#if !defined(ZERO) && (defined(PPC) || defined(SPARC)) +#if !defined(ZERO) && (defined(PPC) || defined(S390) || defined(SPARC)) return stackElementSize * i + wordSize; // both point to one word past TOS #else return stackElementSize * i; diff --git a/hotspot/src/share/vm/interpreter/linkResolver.cpp b/hotspot/src/share/vm/interpreter/linkResolver.cpp index 9fc214ed028..b062f2cc4bd 100644 --- a/hotspot/src/share/vm/interpreter/linkResolver.cpp +++ b/hotspot/src/share/vm/interpreter/linkResolver.cpp @@ -1627,19 +1627,23 @@ void LinkResolver::resolve_handle_call(CallInfo& result, static void wrap_invokedynamic_exception(TRAPS) { if (HAS_PENDING_EXCEPTION) { + // See the "Linking Exceptions" section for the invokedynamic instruction + // in JVMS 6.5. + if (PENDING_EXCEPTION->is_a(SystemDictionary::Error_klass())) { + // Pass through an Error, including BootstrapMethodError, any other form + // of linkage error, or say ThreadDeath/OutOfMemoryError + if (TraceMethodHandles) { + tty->print_cr("invokedynamic passes through an Error for " INTPTR_FORMAT, p2i((void *)PENDING_EXCEPTION)); + PENDING_EXCEPTION->print(); + } + return; + } + + // Otherwise wrap the exception in a BootstrapMethodError if (TraceMethodHandles) { tty->print_cr("invokedynamic throws BSME for " INTPTR_FORMAT, p2i((void *)PENDING_EXCEPTION)); PENDING_EXCEPTION->print(); } - if (PENDING_EXCEPTION->is_a(SystemDictionary::BootstrapMethodError_klass())) { - // throw these guys, since they are already wrapped - return; - } - if (!PENDING_EXCEPTION->is_a(SystemDictionary::LinkageError_klass())) { - // intercept only LinkageErrors which might have failed to wrap - return; - } - // See the "Linking Exceptions" section for the invokedynamic instruction in the JVMS. Handle nested_exception(THREAD, PENDING_EXCEPTION); CLEAR_PENDING_EXCEPTION; THROW_CAUSE(vmSymbols::java_lang_BootstrapMethodError(), nested_exception) diff --git a/hotspot/src/share/vm/jvmci/jvmciCompiler.cpp b/hotspot/src/share/vm/jvmci/jvmciCompiler.cpp index c66df6611d8..5e35a478d41 100644 --- a/hotspot/src/share/vm/jvmci/jvmciCompiler.cpp +++ b/hotspot/src/share/vm/jvmci/jvmciCompiler.cpp @@ -37,7 +37,7 @@ JVMCICompiler* JVMCICompiler::_instance = NULL; elapsedTimer JVMCICompiler::_codeInstallTimer; -JVMCICompiler::JVMCICompiler() : AbstractCompiler(jvmci) { +JVMCICompiler::JVMCICompiler() : AbstractCompiler(compiler_jvmci) { _bootstrapping = false; _bootstrap_compilation_request_handled = false; _methods_compiled = 0; diff --git a/hotspot/src/share/vm/jvmci/jvmci_globals.hpp b/hotspot/src/share/vm/jvmci/jvmci_globals.hpp index 0292b9936ae..abec3e9816b 100644 --- a/hotspot/src/share/vm/jvmci/jvmci_globals.hpp +++ b/hotspot/src/share/vm/jvmci/jvmci_globals.hpp @@ -50,7 +50,7 @@ "Use JVMCI as the default compiler") \ \ experimental(bool, JVMCIPrintProperties, false, \ - "Prints properties used by the JVMCI compiler") \ + "Prints properties used by the JVMCI compiler and exits") \ \ experimental(bool, BootstrapJVMCI, false, \ "Bootstrap JVMCI before running Java main method") \ diff --git a/hotspot/src/share/vm/logging/logConfiguration.cpp b/hotspot/src/share/vm/logging/logConfiguration.cpp index 7842cbb34e2..b8a5932ba0e 100644 --- a/hotspot/src/share/vm/logging/logConfiguration.cpp +++ b/hotspot/src/share/vm/logging/logConfiguration.cpp @@ -98,14 +98,14 @@ void LogConfiguration::initialize(jlong vm_start_time) { LogDecorations::initialize(vm_start_time); assert(_outputs == NULL, "Should not initialize _outputs before this function, initialize called twice?"); _outputs = NEW_C_HEAP_ARRAY(LogOutput*, 2, mtLogging); - _outputs[0] = LogOutput::Stdout; - _outputs[1] = LogOutput::Stderr; + _outputs[0] = &StdoutLog; + _outputs[1] = &StderrLog; _n_outputs = 2; } void LogConfiguration::finalize() { - for (size_t i = 2; i < _n_outputs; i++) { - delete _outputs[i]; + for (size_t i = _n_outputs; i > 0; i--) { + disable_output(i - 1); } FREE_C_HEAP_ARRAY(LogOutput*, _outputs); } @@ -279,8 +279,8 @@ void LogConfiguration::disable_output(size_t idx) { ts->update_decorators(); } - // Delete the output unless stdout/stderr - if (out != LogOutput::Stderr && out != LogOutput::Stdout) { + // Delete the output unless stdout or stderr (idx 0 or 1) + if (idx > 1) { delete_output(idx); } else { out->set_config_string("all=off"); @@ -322,7 +322,7 @@ void LogConfiguration::configure_stdout(LogLevelType level, bool exact_match, .. // Apply configuration to stdout (output #0), with the same decorators as before. ConfigurationLock cl; - configure_output(0, expr, LogOutput::Stdout->decorators()); + configure_output(0, expr, _outputs[0]->decorators()); notify_update_listeners(); } diff --git a/hotspot/src/share/vm/logging/logDecorations.cpp b/hotspot/src/share/vm/logging/logDecorations.cpp index 79c221567a8..8fc6b07de1a 100644 --- a/hotspot/src/share/vm/logging/logDecorations.cpp +++ b/hotspot/src/share/vm/logging/logDecorations.cpp @@ -72,6 +72,12 @@ char* LogDecorations::create_time_decoration(char* pos) { ASSERT_AND_RETURN(written, pos) } +char* LogDecorations::create_utctime_decoration(char* pos) { + char* buf = os::iso8601_time(pos, 29, true); + int written = buf == NULL ? -1 : 29; + ASSERT_AND_RETURN(written, pos) +} + char * LogDecorations::create_uptime_decoration(char* pos) { int written = jio_snprintf(pos, DecorationsBufferSize - (pos - _decorations_buffer), "%.3fs", os::elapsedTime()); ASSERT_AND_RETURN(written, pos) diff --git a/hotspot/src/share/vm/logging/logDecorations.hpp b/hotspot/src/share/vm/logging/logDecorations.hpp index 38dcb40e99c..c89af7a9d9a 100644 --- a/hotspot/src/share/vm/logging/logDecorations.hpp +++ b/hotspot/src/share/vm/logging/logDecorations.hpp @@ -36,7 +36,7 @@ class LogDecorations VALUE_OBJ_CLASS_SPEC { char _decorations_buffer[DecorationsBufferSize]; char* _decoration_offset[LogDecorators::Count]; LogLevelType _level; - LogTagSet _tagset; + const LogTagSet& _tagset; jlong _millis; static jlong _vm_start_time_millis; static const char* _host_name; diff --git a/hotspot/src/share/vm/logging/logDecorators.hpp b/hotspot/src/share/vm/logging/logDecorators.hpp index 2d30700b614..edeef165601 100644 --- a/hotspot/src/share/vm/logging/logDecorators.hpp +++ b/hotspot/src/share/vm/logging/logDecorators.hpp @@ -41,6 +41,7 @@ // tags - The tag-set associated with the log message #define DECORATOR_LIST \ DECORATOR(time, t) \ + DECORATOR(utctime, utc) \ DECORATOR(uptime, u) \ DECORATOR(timemillis, tm) \ DECORATOR(uptimemillis, um) \ diff --git a/hotspot/src/share/vm/logging/logFileStreamOutput.cpp b/hotspot/src/share/vm/logging/logFileStreamOutput.cpp index 20dfd2db6db..01ac7d10c29 100644 --- a/hotspot/src/share/vm/logging/logFileStreamOutput.cpp +++ b/hotspot/src/share/vm/logging/logFileStreamOutput.cpp @@ -28,8 +28,20 @@ #include "logging/logMessageBuffer.hpp" #include "memory/allocation.inline.hpp" -LogStdoutOutput LogStdoutOutput::_instance; -LogStderrOutput LogStderrOutput::_instance; +static bool initialized; +static char stdoutmem[sizeof(LogStdoutOutput)]; +static char stderrmem[sizeof(LogStderrOutput)]; + +LogStdoutOutput &StdoutLog = reinterpret_cast(stdoutmem); +LogStderrOutput &StderrLog = reinterpret_cast(stderrmem); + +LogFileStreamInitializer::LogFileStreamInitializer() { + if (!initialized) { + ::new (&StdoutLog) LogStdoutOutput(); + ::new (&StderrLog) LogStderrOutput(); + initialized = true; + } +} int LogFileStreamOutput::write_decorations(const LogDecorations& decorations) { int total_written = 0; diff --git a/hotspot/src/share/vm/logging/logFileStreamOutput.hpp b/hotspot/src/share/vm/logging/logFileStreamOutput.hpp index 3b460dab0f7..7b535822fa0 100644 --- a/hotspot/src/share/vm/logging/logFileStreamOutput.hpp +++ b/hotspot/src/share/vm/logging/logFileStreamOutput.hpp @@ -30,6 +30,14 @@ class LogDecorations; +class LogFileStreamInitializer { + public: + LogFileStreamInitializer(); +}; + +// Ensure the default log streams have been initialized (stdout, stderr) using the static initializer below +static LogFileStreamInitializer log_stream_initializer; + // Base class for all FileStream-based log outputs. class LogFileStreamOutput : public LogOutput { protected: @@ -50,9 +58,8 @@ class LogFileStreamOutput : public LogOutput { }; class LogStdoutOutput : public LogFileStreamOutput { - friend class LogOutput; + friend class LogFileStreamInitializer; private: - static LogStdoutOutput _instance; LogStdoutOutput() : LogFileStreamOutput(stdout) { set_config_string("all=warning"); } @@ -66,9 +73,8 @@ class LogStdoutOutput : public LogFileStreamOutput { }; class LogStderrOutput : public LogFileStreamOutput { - friend class LogOutput; + friend class LogFileStreamInitializer; private: - static LogStderrOutput _instance; LogStderrOutput() : LogFileStreamOutput(stderr) { set_config_string("all=off"); } @@ -81,4 +87,7 @@ class LogStderrOutput : public LogFileStreamOutput { } }; +extern LogStderrOutput &StderrLog; +extern LogStdoutOutput &StdoutLog; + #endif // SHARE_VM_LOGGING_LOGFILESTREAMOUTPUT_HPP diff --git a/hotspot/src/share/vm/logging/logOutput.cpp b/hotspot/src/share/vm/logging/logOutput.cpp index d17414a6534..088b411dff3 100644 --- a/hotspot/src/share/vm/logging/logOutput.cpp +++ b/hotspot/src/share/vm/logging/logOutput.cpp @@ -29,9 +29,6 @@ #include "runtime/mutexLocker.hpp" #include "runtime/os.inline.hpp" -LogOutput* const LogOutput::Stdout = &LogStdoutOutput::_instance; -LogOutput* const LogOutput::Stderr = &LogStderrOutput::_instance; - LogOutput::~LogOutput() { os::free(_config_string); } diff --git a/hotspot/src/share/vm/logging/logOutput.hpp b/hotspot/src/share/vm/logging/logOutput.hpp index 24c66156d1b..bdfb77b0f5b 100644 --- a/hotspot/src/share/vm/logging/logOutput.hpp +++ b/hotspot/src/share/vm/logging/logOutput.hpp @@ -57,9 +57,6 @@ class LogOutput : public CHeapObj { void set_config_string(const char* string); public: - static LogOutput* const Stdout; - static LogOutput* const Stderr; - void set_decorators(const LogDecorators &decorators) { _decorators = decorators; } diff --git a/hotspot/src/share/vm/logging/logTag.hpp b/hotspot/src/share/vm/logging/logTag.hpp index a729b8d5150..90638b41b38 100644 --- a/hotspot/src/share/vm/logging/logTag.hpp +++ b/hotspot/src/share/vm/logging/logTag.hpp @@ -61,6 +61,7 @@ LOG_TAG(exit) \ LOG_TAG(freelist) \ LOG_TAG(gc) \ + LOG_TAG(hashtables) \ LOG_TAG(heap) \ LOG_TAG(humongous) \ LOG_TAG(ihop) \ diff --git a/hotspot/src/share/vm/logging/logTagSet.cpp b/hotspot/src/share/vm/logging/logTagSet.cpp index 71a57d9bb2f..b3e06833207 100644 --- a/hotspot/src/share/vm/logging/logTagSet.cpp +++ b/hotspot/src/share/vm/logging/logTagSet.cpp @@ -23,6 +23,7 @@ */ #include "precompiled.hpp" #include "logging/logDecorations.hpp" +#include "logging/logFileStreamOutput.hpp" #include "logging/logLevel.hpp" #include "logging/logMessageBuffer.hpp" #include "logging/logOutput.hpp" @@ -50,7 +51,7 @@ LogTagSet::LogTagSet(PrefixWriter prefix_writer, LogTagType t0, LogTagType t1, L _ntagsets++; // Set the default output to warning and error level for all new tagsets. - _output_list.set_output_level(LogOutput::Stdout, LogLevel::Default); + _output_list.set_output_level(&StdoutLog, LogLevel::Default); } void LogTagSet::update_decorators(const LogDecorators& decorator) { diff --git a/hotspot/src/share/vm/memory/guardedMemory.cpp b/hotspot/src/share/vm/memory/guardedMemory.cpp index d40f3b84319..d978b0bea65 100644 --- a/hotspot/src/share/vm/memory/guardedMemory.cpp +++ b/hotspot/src/share/vm/memory/guardedMemory.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -79,87 +79,3 @@ void GuardedMemory::print_on(outputStream* st) const { break; } } - -// test code... - -#ifndef PRODUCT - -static void guarded_memory_test_check(void* p, size_t sz, void* tag) { - assert(p != NULL, "NULL pointer given to check"); - u_char* c = (u_char*) p; - GuardedMemory guarded(c); - assert(guarded.get_tag() == tag, "Tag is not the same as supplied"); - assert(guarded.get_user_ptr() == c, "User pointer is not the same as supplied"); - assert(guarded.get_user_size() == sz, "User size is not the same as supplied"); - assert(guarded.verify_guards(), "Guard broken"); -} - -void GuardedMemory::test_guarded_memory() { - // Test the basic characteristics... - size_t total_sz = GuardedMemory::get_total_size(1); - assert(total_sz > 1 && total_sz >= (sizeof(GuardHeader) + 1 + sizeof(Guard)), "Unexpected size"); - u_char* basep = (u_char*) os::malloc(total_sz, mtInternal); - - GuardedMemory guarded(basep, 1, (void*)0xf000f000); - - assert(*basep == badResourceValue, "Expected guard in the form of badResourceValue"); - u_char* userp = guarded.get_user_ptr(); - assert(*userp == uninitBlockPad, "Expected uninitialized data in the form of uninitBlockPad"); - guarded_memory_test_check(userp, 1, (void*)0xf000f000); - - void* freep = guarded.release_for_freeing(); - assert((u_char*)freep == basep, "Expected the same pointer guard was "); - assert(*userp == freeBlockPad, "Expected user data to be free block padded"); - assert(!guarded.verify_guards(), "Expected failed"); - os::free(freep); - - // Test a number of odd sizes... - size_t sz = 0; - do { - void* p = os::malloc(GuardedMemory::get_total_size(sz), mtInternal); - void* up = guarded.wrap_with_guards(p, sz, (void*)1); - memset(up, 0, sz); - guarded_memory_test_check(up, sz, (void*)1); - os::free(guarded.release_for_freeing()); - sz = (sz << 4) + 1; - } while (sz < (256 * 1024)); - - // Test buffer overrun into head... - basep = (u_char*) os::malloc(GuardedMemory::get_total_size(1), mtInternal); - guarded.wrap_with_guards(basep, 1); - *basep = 0; - assert(!guarded.verify_guards(), "Expected failure"); - os::free(basep); - - // Test buffer overrun into tail with a number of odd sizes... - sz = 1; - do { - void* p = os::malloc(GuardedMemory::get_total_size(sz), mtInternal); - void* up = guarded.wrap_with_guards(p, sz, (void*)1); - memset(up, 0, sz + 1); // Buffer-overwrite (within guard) - assert(!guarded.verify_guards(), "Guard was not broken as expected"); - os::free(guarded.release_for_freeing()); - sz = (sz << 4) + 1; - } while (sz < (256 * 1024)); - - // Test wrap_copy/wrap_free... - assert(GuardedMemory::free_copy(NULL), "Expected free NULL to be OK"); - - const char* str = "Check my bounds out"; - size_t str_sz = strlen(str) + 1; - char* str_copy = (char*) GuardedMemory::wrap_copy(str, str_sz); - guarded_memory_test_check(str_copy, str_sz, NULL); - assert(strcmp(str, str_copy) == 0, "Not identical copy"); - assert(GuardedMemory::free_copy(str_copy), "Free copy failed to verify"); - - void* no_data = NULL; - void* no_data_copy = GuardedMemory::wrap_copy(no_data, 0); - assert(GuardedMemory::free_copy(no_data_copy), "Expected valid guards even for no data copy"); -} - -void GuardedMemory_test() { - GuardedMemory::test_guarded_memory(); -} - -#endif // !PRODUCT - diff --git a/hotspot/src/share/vm/memory/guardedMemory.hpp b/hotspot/src/share/vm/memory/guardedMemory.hpp index e3b1ed8e57e..2d32c77c938 100644 --- a/hotspot/src/share/vm/memory/guardedMemory.hpp +++ b/hotspot/src/share/vm/memory/guardedMemory.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -82,6 +82,7 @@ */ class GuardedMemory : StackObj { // Wrapper on stack + friend class GuardedMemoryTest; // Private inner classes for memory layout... protected: @@ -317,10 +318,6 @@ protected: */ static bool free_copy(void* p); - // Testing... -#ifndef PRODUCT - static void test_guarded_memory(void); -#endif }; // GuardedMemory #endif // SHARE_VM_MEMORY_GUARDEDMEMORY_HPP diff --git a/hotspot/src/share/vm/memory/metachunk.cpp b/hotspot/src/share/vm/memory/metachunk.cpp index 8ecbaacc821..1378e7f6ac5 100644 --- a/hotspot/src/share/vm/memory/metachunk.cpp +++ b/hotspot/src/share/vm/memory/metachunk.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -111,61 +111,3 @@ void Metachunk::verify() { return; } -/////////////// Unit tests /////////////// - -#ifndef PRODUCT - -class TestMetachunk { - public: - static void test() { - size_t size = 2 * 1024 * 1024; - void* memory = malloc(size); - assert(memory != NULL, "Failed to malloc 2MB"); - - Metachunk* metachunk = ::new (memory) Metachunk(size / BytesPerWord, NULL); - - assert(metachunk->bottom() == (MetaWord*)metachunk, "assert"); - assert(metachunk->end() == (uintptr_t*)metachunk + metachunk->size(), "assert"); - - // Check sizes - assert(metachunk->size() == metachunk->word_size(), "assert"); - assert(metachunk->word_size() == pointer_delta(metachunk->end(), metachunk->bottom(), - sizeof(MetaWord*)), "assert"); - - // Check usage - assert(metachunk->used_word_size() == metachunk->overhead(), "assert"); - assert(metachunk->free_word_size() == metachunk->word_size() - metachunk->used_word_size(), "assert"); - assert(metachunk->top() == metachunk->initial_top(), "assert"); - assert(metachunk->is_empty(), "assert"); - - // Allocate - size_t alloc_size = 64; // Words - assert(is_size_aligned(alloc_size, Metachunk::object_alignment()), "assert"); - - MetaWord* mem = metachunk->allocate(alloc_size); - - // Check post alloc - assert(mem == metachunk->initial_top(), "assert"); - assert(mem + alloc_size == metachunk->top(), "assert"); - assert(metachunk->used_word_size() == metachunk->overhead() + alloc_size, "assert"); - assert(metachunk->free_word_size() == metachunk->word_size() - metachunk->used_word_size(), "assert"); - assert(!metachunk->is_empty(), "assert"); - - // Clear chunk - metachunk->reset_empty(); - - // Check post clear - assert(metachunk->used_word_size() == metachunk->overhead(), "assert"); - assert(metachunk->free_word_size() == metachunk->word_size() - metachunk->used_word_size(), "assert"); - assert(metachunk->top() == metachunk->initial_top(), "assert"); - assert(metachunk->is_empty(), "assert"); - - free(memory); - } -}; - -void TestMetachunk_test() { - TestMetachunk::test(); -} - -#endif diff --git a/hotspot/src/share/vm/memory/metachunk.hpp b/hotspot/src/share/vm/memory/metachunk.hpp index b116d27ead8..5b92827586f 100644 --- a/hotspot/src/share/vm/memory/metachunk.hpp +++ b/hotspot/src/share/vm/memory/metachunk.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -95,7 +95,7 @@ class Metabase VALUE_OBJ_CLASS_SPEC { // +--------------+ <- bottom --+ --+ class Metachunk : public Metabase { - friend class TestMetachunk; + friend class MetachunkTest; // The VirtualSpaceNode containing this chunk. VirtualSpaceNode* _container; diff --git a/hotspot/src/share/vm/memory/virtualspace.cpp b/hotspot/src/share/vm/memory/virtualspace.cpp index 63c1b926255..1c4bec9758f 100644 --- a/hotspot/src/share/vm/memory/virtualspace.cpp +++ b/hotspot/src/share/vm/memory/virtualspace.cpp @@ -276,7 +276,7 @@ void ReservedHeapSpace::establish_noaccess_prefix() { if (base() && base() + _size > (char *)OopEncodingHeapMax) { if (true WIN64_ONLY(&& !UseLargePages) - AIX_ONLY(&& os::vm_page_size() != SIZE_64K)) { + AIX_ONLY(&& os::vm_page_size() != 64*K)) { // Protect memory at the base of the allocated region. // If special, the page was committed (only matters on windows) if (!os::protect_memory(_base, _noaccess_prefix, os::MEM_PROT_NONE, _special)) { diff --git a/hotspot/src/share/vm/oops/instanceKlass.cpp b/hotspot/src/share/vm/oops/instanceKlass.cpp index db65cefa550..7e6c3c809fb 100644 --- a/hotspot/src/share/vm/oops/instanceKlass.cpp +++ b/hotspot/src/share/vm/oops/instanceKlass.cpp @@ -517,12 +517,14 @@ bool InstanceKlass::link_class_or_fail(TRAPS) { bool InstanceKlass::link_class_impl( instanceKlassHandle this_k, bool throw_verifyerror, TRAPS) { - // check for error state. - // This is checking for the wrong state. If the state is initialization_error, - // then this class *was* linked. The CDS code does a try_link_class and uses - // initialization_error to mark classes to not include in the archive during - // DumpSharedSpaces. This should be removed when the CDS bug is fixed. - if (this_k->is_in_error_state()) { + if (DumpSharedSpaces && this_k->is_in_error_state()) { + // This is for CDS dumping phase only -- we use the in_error_state to indicate that + // the class has failed verification. Throwing the NoClassDefFoundError here is just + // a convenient way to stop repeat attempts to verify the same (bad) class. + // + // Note that the NoClassDefFoundError is not part of the JLS, and should not be thrown + // if we are executing Java code. This is not a problem for CDS dumping phase since + // it doesn't execute any Java code. ResourceMark rm(THREAD); THROW_MSG_(vmSymbols::java_lang_NoClassDefFoundError(), this_k->external_name(), false); diff --git a/hotspot/src/share/vm/oops/method.hpp b/hotspot/src/share/vm/oops/method.hpp index d8f51ca6b00..b15b19e1a0f 100644 --- a/hotspot/src/share/vm/oops/method.hpp +++ b/hotspot/src/share/vm/oops/method.hpp @@ -27,6 +27,7 @@ #include "classfile/vmSymbols.hpp" #include "code/compressedStream.hpp" +#include "compiler/compilerDefinitions.hpp" #include "compiler/oopMap.hpp" #include "interpreter/invocationCounter.hpp" #include "oops/annotations.hpp" diff --git a/hotspot/src/share/vm/opto/arraycopynode.cpp b/hotspot/src/share/vm/opto/arraycopynode.cpp index f73f09165fb..a81d7a96b8a 100644 --- a/hotspot/src/share/vm/opto/arraycopynode.cpp +++ b/hotspot/src/share/vm/opto/arraycopynode.cpp @@ -26,9 +26,10 @@ #include "opto/arraycopynode.hpp" #include "opto/graphKit.hpp" -ArrayCopyNode::ArrayCopyNode(Compile* C, bool alloc_tightly_coupled) +ArrayCopyNode::ArrayCopyNode(Compile* C, bool alloc_tightly_coupled, bool has_negative_length_guard) : CallNode(arraycopy_type(), NULL, TypeRawPtr::BOTTOM), _alloc_tightly_coupled(alloc_tightly_coupled), + _has_negative_length_guard(has_negative_length_guard), _kind(None), _arguments_validated(false), _src_type(TypeOopPtr::BOTTOM), @@ -45,10 +46,11 @@ ArrayCopyNode* ArrayCopyNode::make(GraphKit* kit, bool may_throw, Node* dest, Node* dest_offset, Node* length, bool alloc_tightly_coupled, + bool has_negative_length_guard, Node* src_klass, Node* dest_klass, Node* src_length, Node* dest_length) { - ArrayCopyNode* ac = new ArrayCopyNode(kit->C, alloc_tightly_coupled); + ArrayCopyNode* ac = new ArrayCopyNode(kit->C, alloc_tightly_coupled, has_negative_length_guard); Node* prev_mem = kit->set_predefined_input_for_runtime_call(ac); ac->init_req(ArrayCopyNode::Src, src); diff --git a/hotspot/src/share/vm/opto/arraycopynode.hpp b/hotspot/src/share/vm/opto/arraycopynode.hpp index c0f635eb284..58d641727d8 100644 --- a/hotspot/src/share/vm/opto/arraycopynode.hpp +++ b/hotspot/src/share/vm/opto/arraycopynode.hpp @@ -58,6 +58,7 @@ private: // the arraycopy is not parsed yet so doesn't exist when // LibraryCallKit::tightly_coupled_allocation() is called. bool _alloc_tightly_coupled; + bool _has_negative_length_guard; bool _arguments_validated; @@ -82,7 +83,7 @@ private: return TypeFunc::make(domain, range); } - ArrayCopyNode(Compile* C, bool alloc_tightly_coupled); + ArrayCopyNode(Compile* C, bool alloc_tightly_coupled, bool has_negative_length_guard); intptr_t get_length_if_constant(PhaseGVN *phase) const; int get_count(PhaseGVN *phase) const; @@ -133,6 +134,7 @@ public: Node* dest, Node* dest_offset, Node* length, bool alloc_tightly_coupled, + bool has_negative_length_guard, Node* src_klass = NULL, Node* dest_klass = NULL, Node* src_length = NULL, Node* dest_length = NULL); @@ -162,6 +164,8 @@ public: bool is_alloc_tightly_coupled() const { return _alloc_tightly_coupled; } + bool has_negative_length_guard() const { return _has_negative_length_guard; } + static bool may_modify(const TypeOopPtr *t_oop, MemBarNode* mb, PhaseTransform *phase, ArrayCopyNode*& ac); bool modifies(intptr_t offset_lo, intptr_t offset_hi, PhaseTransform* phase, bool must_modify); diff --git a/hotspot/src/share/vm/opto/c2_globals.hpp b/hotspot/src/share/vm/opto/c2_globals.hpp index 3ba603dd979..49bcbf42ae8 100644 --- a/hotspot/src/share/vm/opto/c2_globals.hpp +++ b/hotspot/src/share/vm/opto/c2_globals.hpp @@ -120,6 +120,9 @@ "Check performance difference allowing FP " \ "associativity and commutativity...") \ \ + diagnostic_pd(bool, IdealizeClearArrayNode, \ + "Replace ClearArrayNode by subgraph of basic operations.") \ + \ develop(bool, OptoBreakpoint, false, \ "insert breakpoint at method entry") \ \ diff --git a/hotspot/src/share/vm/opto/c2compiler.cpp b/hotspot/src/share/vm/opto/c2compiler.cpp index 32393fc03e2..ec7e03c5ff0 100644 --- a/hotspot/src/share/vm/opto/c2compiler.cpp +++ b/hotspot/src/share/vm/opto/c2compiler.cpp @@ -537,6 +537,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt #ifdef TRACE_HAVE_INTRINSICS case vmIntrinsics::_counterTime: case vmIntrinsics::_getClassId: + case vmIntrinsics::_getBufferWriter: #endif case vmIntrinsics::_currentTimeMillis: case vmIntrinsics::_nanoTime: diff --git a/hotspot/src/share/vm/opto/c2compiler.hpp b/hotspot/src/share/vm/opto/c2compiler.hpp index 9df31b4f712..d7eb0225841 100644 --- a/hotspot/src/share/vm/opto/c2compiler.hpp +++ b/hotspot/src/share/vm/opto/c2compiler.hpp @@ -32,7 +32,7 @@ class C2Compiler : public AbstractCompiler { static bool init_c2_runtime(); public: - C2Compiler() : AbstractCompiler(c2) {} + C2Compiler() : AbstractCompiler(compiler_c2) {} // Name const char *name() { return "C2"; } diff --git a/hotspot/src/share/vm/opto/castnode.cpp b/hotspot/src/share/vm/opto/castnode.cpp index 5dc76a95736..1184728add5 100644 --- a/hotspot/src/share/vm/opto/castnode.cpp +++ b/hotspot/src/share/vm/opto/castnode.cpp @@ -224,30 +224,6 @@ Node *CastIINode::Ideal(PhaseGVN *phase, bool can_reshape) { return progress; } - // transform: - // (CastII (AddI x const)) -> (AddI (CastII x) const) - // So the AddI has a chance to be optimized out - if (in(1)->Opcode() == Op_AddI) { - Node* in2 = in(1)->in(2); - const TypeInt* in2_t = phase->type(in2)->isa_int(); - if (in2_t != NULL && in2_t->singleton()) { - int in2_const = in2_t->_lo; - const TypeInt* current_type = _type->is_int(); - jlong new_lo_long = ((jlong)current_type->_lo) - in2_const; - jlong new_hi_long = ((jlong)current_type->_hi) - in2_const; - int new_lo = (int)new_lo_long; - int new_hi = (int)new_hi_long; - if (((jlong)new_lo) == new_lo_long && ((jlong)new_hi) == new_hi_long) { - Node* in1 = in(1)->in(1); - CastIINode* new_cast = (CastIINode*)clone(); - AddINode* new_add = (AddINode*)in(1)->clone(); - new_cast->set_type(TypeInt::make(new_lo, new_hi, current_type->_widen)); - new_cast->set_req(1, in1); - new_add->set_req(1, phase->transform(new_cast)); - return new_add; - } - } - } // Similar to ConvI2LNode::Ideal() for the same reasons if (can_reshape && !phase->C->major_progress()) { const TypeInt* this_type = this->type()->is_int(); diff --git a/hotspot/src/share/vm/opto/compile.cpp b/hotspot/src/share/vm/opto/compile.cpp index b8379ec10ee..77e20b264cb 100644 --- a/hotspot/src/share/vm/opto/compile.cpp +++ b/hotspot/src/share/vm/opto/compile.cpp @@ -3173,65 +3173,45 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) { break; #endif - case Op_ModI: { - Node* di = NULL; + case Op_ModI: if (UseDivMod) { // Check if a%b and a/b both exist - di = n->find_similar(Op_DivI); - if (di) { + Node* d = n->find_similar(Op_DivI); + if (d) { // Replace them with a fused divmod if supported if (Matcher::has_match_rule(Op_DivModI)) { DivModINode* divmod = DivModINode::make(n); - di->subsume_by(divmod->div_proj(), this); + d->subsume_by(divmod->div_proj(), this); n->subsume_by(divmod->mod_proj(), this); } else { // replace a%b with a-((a/b)*b) - Node* mult = new MulINode(di, di->in(2)); - Node* sub = new SubINode(di->in(1), mult); + Node* mult = new MulINode(d, d->in(2)); + Node* sub = new SubINode(d->in(1), mult); n->subsume_by(sub, this); } } } - if (di == NULL) { - // Remove useless control edge in case of not mod-zero. - const Type *t = n->in(2)->bottom_type(); - const TypeInt *ti = t->is_int(); - if (n->in(0) && (ti->_hi < 0 || ti->_lo > 0)) { - n->set_req(0, NULL); - } - } break; - } - case Op_ModL: { - Node* dl = NULL; + case Op_ModL: if (UseDivMod) { // Check if a%b and a/b both exist - dl = n->find_similar(Op_DivL); - if (dl) { + Node* d = n->find_similar(Op_DivL); + if (d) { // Replace them with a fused divmod if supported if (Matcher::has_match_rule(Op_DivModL)) { DivModLNode* divmod = DivModLNode::make(n); - dl->subsume_by(divmod->div_proj(), this); + d->subsume_by(divmod->div_proj(), this); n->subsume_by(divmod->mod_proj(), this); } else { // replace a%b with a-((a/b)*b) - Node* mult = new MulLNode(dl, dl->in(2)); - Node* sub = new SubLNode(dl->in(1), mult); + Node* mult = new MulLNode(d, d->in(2)); + Node* sub = new SubLNode(d->in(1), mult); n->subsume_by(sub, this); } } } - if (dl == NULL) { - // Remove useless control edge in case of not mod-zero. - const Type *t = n->in(2)->bottom_type(); - const TypeLong *tl = t->is_long(); - if (n->in(0) && (tl->_hi < 0 || tl->_lo > 0)) { - n->set_req(0, NULL); - } - } break; - } case Op_LoadVector: case Op_StoreVector: diff --git a/hotspot/src/share/vm/opto/divnode.cpp b/hotspot/src/share/vm/opto/divnode.cpp index cf45d3b966c..c7fcb782e9c 100644 --- a/hotspot/src/share/vm/opto/divnode.cpp +++ b/hotspot/src/share/vm/opto/divnode.cpp @@ -474,16 +474,19 @@ Node *DivINode::Ideal(PhaseGVN *phase, bool can_reshape) { const TypeInt *ti = t->isa_int(); if( !ti ) return NULL; + + // Check for useless control input + // Check for excluding div-zero case + if (in(0) && (ti->_hi < 0 || ti->_lo > 0)) { + set_req(0, NULL); // Yank control input + return this; + } + if( !ti->is_con() ) return NULL; jint i = ti->get_con(); // Get divisor if (i == 0) return NULL; // Dividing by zero constant does not idealize - if (in(0) != NULL) { - phase->igvn_rehash_node_delayed(this); - set_req(0, NULL); // Dividing by a not-zero constant; no faulting - } - // Dividing by MININT does not optimize as a power-of-2 shift. if( i == min_jint ) return NULL; @@ -576,16 +579,19 @@ Node *DivLNode::Ideal( PhaseGVN *phase, bool can_reshape) { const TypeLong *tl = t->isa_long(); if( !tl ) return NULL; + + // Check for useless control input + // Check for excluding div-zero case + if (in(0) && (tl->_hi < 0 || tl->_lo > 0)) { + set_req(0, NULL); // Yank control input + return this; + } + if( !tl->is_con() ) return NULL; jlong l = tl->get_con(); // Get divisor if (l == 0) return NULL; // Dividing by zero constant does not idealize - if (in(0) != NULL) { - phase->igvn_rehash_node_delayed(this); - set_req(0, NULL); // Dividing by a not-zero constant; no faulting - } - // Dividing by MINLONG does not optimize as a power-of-2 shift. if( l == min_jlong ) return NULL; @@ -853,6 +859,13 @@ Node *ModINode::Ideal(PhaseGVN *phase, bool can_reshape) { if( t == Type::TOP ) return NULL; const TypeInt *ti = t->is_int(); + // Check for useless control input + // Check for excluding mod-zero case + if (in(0) && (ti->_hi < 0 || ti->_lo > 0)) { + set_req(0, NULL); // Yank control input + return this; + } + // See if we are MOD'ing by 2^k or 2^k-1. if( !ti->is_con() ) return NULL; jint con = ti->get_con(); @@ -1017,6 +1030,13 @@ Node *ModLNode::Ideal(PhaseGVN *phase, bool can_reshape) { if( t == Type::TOP ) return NULL; const TypeLong *tl = t->is_long(); + // Check for useless control input + // Check for excluding mod-zero case + if (in(0) && (tl->_hi < 0 || tl->_lo > 0)) { + set_req(0, NULL); // Yank control input + return this; + } + // See if we are MOD'ing by 2^k or 2^k-1. if( !tl->is_con() ) return NULL; jlong con = tl->get_con(); diff --git a/hotspot/src/share/vm/opto/gcm.cpp b/hotspot/src/share/vm/opto/gcm.cpp index 8df50abb23d..73e7666fa5f 100644 --- a/hotspot/src/share/vm/opto/gcm.cpp +++ b/hotspot/src/share/vm/opto/gcm.cpp @@ -297,6 +297,7 @@ bool PhaseCFG::schedule_early(VectorSet &visited, Node_Stack &roots) { int is_visited = visited.test_set(in->_idx); if (!has_block(in)) { if (is_visited) { + assert(false, "graph should be schedulable"); return false; } // Save parent node and next input's index. @@ -1129,6 +1130,7 @@ Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) { if (LCA == NULL) { // Bailout without retry + assert(false, "graph should be schedulable"); C->record_method_not_compilable("late schedule failed: LCA == NULL"); return least; } @@ -1283,6 +1285,7 @@ void PhaseCFG::schedule_late(VectorSet &visited, Node_Stack &stack) { C->record_failure(C2Compiler::retry_no_subsuming_loads()); } else { // Bailout without retry when (early->_dom_depth > LCA->_dom_depth) + assert(false, "graph should be schedulable"); C->record_method_not_compilable("late schedule failed: incorrect graph"); } return; diff --git a/hotspot/src/share/vm/opto/lcm.cpp b/hotspot/src/share/vm/opto/lcm.cpp index 7d94f25abf3..87d6d92d5f6 100644 --- a/hotspot/src/share/vm/opto/lcm.cpp +++ b/hotspot/src/share/vm/opto/lcm.cpp @@ -1147,6 +1147,8 @@ bool PhaseCFG::schedule_local(Block* block, GrowableArray& ready_cnt, Vecto // If this is the first failure, the sentinel string will "stick" // to the Compile object, and the C2Compiler will see it and retry. C->record_failure(C2Compiler::retry_no_subsuming_loads()); + } else { + assert(false, "graph should be schedulable"); } // assert( phi_cnt == end_idx(), "did not schedule all" ); return false; diff --git a/hotspot/src/share/vm/opto/library_call.cpp b/hotspot/src/share/vm/opto/library_call.cpp index 17ef1bc8c80..73227150602 100644 --- a/hotspot/src/share/vm/opto/library_call.cpp +++ b/hotspot/src/share/vm/opto/library_call.cpp @@ -256,6 +256,7 @@ class LibraryCallKit : public GraphKit { bool inline_native_time_funcs(address method, const char* funcName); #ifdef TRACE_HAVE_INTRINSICS bool inline_native_classID(); + bool inline_native_getBufferWriter(); #endif bool inline_native_isInterrupted(); bool inline_native_Class_query(vmIntrinsics::ID id); @@ -713,6 +714,7 @@ bool LibraryCallKit::try_to_inline(int predicate) { #ifdef TRACE_HAVE_INTRINSICS case vmIntrinsics::_counterTime: return inline_native_time_funcs(CAST_FROM_FN_PTR(address, TRACE_TIME_METHOD), "counterTime"); case vmIntrinsics::_getClassId: return inline_native_classID(); + case vmIntrinsics::_getBufferWriter: return inline_native_getBufferWriter(); #endif case vmIntrinsics::_currentTimeMillis: return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeMillis), "currentTimeMillis"); case vmIntrinsics::_nanoTime: return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeNanos), "nanoTime"); @@ -3198,6 +3200,43 @@ bool LibraryCallKit::inline_native_classID() { } +bool LibraryCallKit::inline_native_getBufferWriter() { + Node* tls_ptr = _gvn.transform(new ThreadLocalNode()); + + Node* jobj_ptr = basic_plus_adr(top(), tls_ptr, + in_bytes(TRACE_THREAD_DATA_WRITER_OFFSET) + ); + + Node* jobj = make_load(control(), jobj_ptr, TypeRawPtr::BOTTOM, T_ADDRESS, MemNode::unordered); + + Node* jobj_cmp_null = _gvn.transform( new CmpPNode(jobj, null()) ); + Node* test_jobj_eq_null = _gvn.transform( new BoolNode(jobj_cmp_null, BoolTest::eq) ); + + IfNode* iff_jobj_null = + create_and_map_if(control(), test_jobj_eq_null, PROB_MIN, COUNT_UNKNOWN); + + enum { _normal_path = 1, + _null_path = 2, + PATH_LIMIT }; + + RegionNode* result_rgn = new RegionNode(PATH_LIMIT); + PhiNode* result_val = new PhiNode(result_rgn, TypePtr::BOTTOM); + + Node* jobj_is_null = _gvn.transform(new IfTrueNode(iff_jobj_null)); + result_rgn->init_req(_null_path, jobj_is_null); + result_val->init_req(_null_path, null()); + + Node* jobj_is_not_null = _gvn.transform(new IfFalseNode(iff_jobj_null)); + result_rgn->init_req(_normal_path, jobj_is_not_null); + + Node* res = make_load(jobj_is_not_null, jobj, TypeInstPtr::NOTNULL, T_OBJECT, MemNode::unordered); + result_val->init_req(_normal_path, res); + + set_result(result_rgn, result_val); + + return true; +} + #endif //------------------------inline_native_currentThread------------------ @@ -4007,7 +4046,7 @@ bool LibraryCallKit::inline_array_copyOf(bool is_copyOfRange) { if (!stopped()) { newcopy = new_array(klass_node, length, 0); // no arguments to push - ArrayCopyNode* ac = ArrayCopyNode::make(this, true, original, start, newcopy, intcon(0), moved, true, + ArrayCopyNode* ac = ArrayCopyNode::make(this, true, original, start, newcopy, intcon(0), moved, true, false, load_object_klass(original), klass_node); if (!is_copyOfRange) { ac->set_copyof(validated); @@ -4527,7 +4566,7 @@ void LibraryCallKit::copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, b const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM; - ArrayCopyNode* ac = ArrayCopyNode::make(this, false, src, NULL, dest, NULL, countx, false); + ArrayCopyNode* ac = ArrayCopyNode::make(this, false, src, NULL, dest, NULL, countx, false, false); ac->set_clonebasic(); Node* n = _gvn.transform(ac); if (n == ac) { @@ -4664,7 +4703,7 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) { set_control(is_obja); // Generate a direct call to the right arraycopy function(s). Node* alloc = tightly_coupled_allocation(alloc_obj, NULL); - ArrayCopyNode* ac = ArrayCopyNode::make(this, true, obj, intcon(0), alloc_obj, intcon(0), obj_length, alloc != NULL); + ArrayCopyNode* ac = ArrayCopyNode::make(this, true, obj, intcon(0), alloc_obj, intcon(0), obj_length, alloc != NULL, false); ac->set_cloneoop(); Node* n = _gvn.transform(ac); assert(n == ac, "cannot disappear"); @@ -5061,6 +5100,8 @@ bool LibraryCallKit::inline_arraycopy() { trap_bci = alloc->jvms()->bci(); } + bool negative_length_guard_generated = false; + if (!C->too_many_traps(trap_method, trap_bci, Deoptimization::Reason_intrinsic) && can_emit_guards && !src->is_top() && !dest->is_top()) { @@ -5093,6 +5134,15 @@ bool LibraryCallKit::inline_arraycopy() { load_array_length(dest), slow_region); + // (6) length must not be negative. + // This is also checked in generate_arraycopy() during macro expansion, but + // we also have to check it here for the case where the ArrayCopyNode will + // be eliminated by Escape Analysis. + if (EliminateAllocations) { + generate_negative_guard(length, slow_region); + negative_length_guard_generated = true; + } + // (9) each element of an oop array must be assignable Node* src_klass = load_object_klass(src); Node* dest_klass = load_object_klass(dest); @@ -5120,7 +5170,7 @@ bool LibraryCallKit::inline_arraycopy() { return true; } - ArrayCopyNode* ac = ArrayCopyNode::make(this, true, src, src_offset, dest, dest_offset, length, alloc != NULL, + ArrayCopyNode* ac = ArrayCopyNode::make(this, true, src, src_offset, dest, dest_offset, length, alloc != NULL, negative_length_guard_generated, // Create LoadRange and LoadKlass nodes for use during macro expansion here // so the compiler has a chance to eliminate them: during macro expansion, // we have to set their control (CastPP nodes are eliminated). diff --git a/hotspot/src/share/vm/opto/macroArrayCopy.cpp b/hotspot/src/share/vm/opto/macroArrayCopy.cpp index 07b298ddbe7..fb55efedb95 100644 --- a/hotspot/src/share/vm/opto/macroArrayCopy.cpp +++ b/hotspot/src/share/vm/opto/macroArrayCopy.cpp @@ -1154,7 +1154,10 @@ void PhaseMacroExpand::expand_arraycopy_node(ArrayCopyNode *ac) { // Call StubRoutines::generic_arraycopy stub. Node* mem = generate_arraycopy(ac, NULL, &ctrl, merge_mem, &io, TypeRawPtr::BOTTOM, T_CONFLICT, - src, src_offset, dest, dest_offset, length); + src, src_offset, dest, dest_offset, length, + // If a negative length guard was generated for the ArrayCopyNode, + // the length of the array can never be negative. + false, ac->has_negative_length_guard()); // Do not let reads from the destination float above the arraycopy. // Since we cannot type the arrays, we don't know which slices @@ -1258,5 +1261,7 @@ void PhaseMacroExpand::expand_arraycopy_node(ArrayCopyNode *ac) { generate_arraycopy(ac, alloc, &ctrl, merge_mem, &io, adr_type, dest_elem, src, src_offset, dest, dest_offset, length, - false, false, slow_region); + // If a negative length guard was generated for the ArrayCopyNode, + // the length of the array can never be negative. + false, ac->has_negative_length_guard(), slow_region); } diff --git a/hotspot/src/share/vm/opto/memnode.cpp b/hotspot/src/share/vm/opto/memnode.cpp index d82a4a64f97..445192c1da4 100644 --- a/hotspot/src/share/vm/opto/memnode.cpp +++ b/hotspot/src/share/vm/opto/memnode.cpp @@ -2717,9 +2717,9 @@ Node* ClearArrayNode::Identity(PhaseGVN* phase) { //------------------------------Idealize--------------------------------------- // Clearing a short array is faster with stores -Node *ClearArrayNode::Ideal(PhaseGVN *phase, bool can_reshape){ +Node *ClearArrayNode::Ideal(PhaseGVN *phase, bool can_reshape) { // Already know this is a large node, do not try to ideal it - if (_is_large) return NULL; + if (!IdealizeClearArrayNode || _is_large) return NULL; const int unit = BytesPerLong; const TypeX* t = phase->type(in(2))->isa_intptr_t(); diff --git a/hotspot/src/share/vm/opto/output.cpp b/hotspot/src/share/vm/opto/output.cpp index 312bbf8161e..2de400ebd10 100644 --- a/hotspot/src/share/vm/opto/output.cpp +++ b/hotspot/src/share/vm/opto/output.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1206,13 +1206,19 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { padding = nop_size; } - if(padding > 0) { + if (padding > 0) { assert((padding % nop_size) == 0, "padding is not a multiple of NOP size"); int nops_cnt = padding / nop_size; MachNode *nop = new MachNopNode(nops_cnt); block->insert_node(nop, j++); last_inst++; _cfg->map_node_to_block(nop, block); + // Ensure enough space. + cb->insts()->maybe_expand_to_ensure_remaining(MAX_inst_size); + if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) { + C->record_failure("CodeCache is full"); + return; + } nop->emit(*cb, _regalloc); cb->flush_bundle(true); current_offset = cb->insts_size(); diff --git a/hotspot/src/share/vm/opto/type.cpp b/hotspot/src/share/vm/opto/type.cpp index 43e47473168..a8c68006020 100644 --- a/hotspot/src/share/vm/opto/type.cpp +++ b/hotspot/src/share/vm/opto/type.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -67,7 +67,7 @@ Type::TypeInfo Type::_type_info[Type::lastype] = { { Bad, T_ILLEGAL, "vectorx:", false, 0, relocInfo::none }, // VectorX { Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY { Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ -#elif defined(PPC64) +#elif defined(PPC64) || defined(S390) { Bad, T_ILLEGAL, "vectors:", false, 0, relocInfo::none }, // VectorS { Bad, T_ILLEGAL, "vectord:", false, Op_RegL, relocInfo::none }, // VectorD { Bad, T_ILLEGAL, "vectorx:", false, 0, relocInfo::none }, // VectorX diff --git a/hotspot/src/share/vm/prims/jvmtiEnv.cpp b/hotspot/src/share/vm/prims/jvmtiEnv.cpp index 25ad11871fe..3b8ce659894 100644 --- a/hotspot/src/share/vm/prims/jvmtiEnv.cpp +++ b/hotspot/src/share/vm/prims/jvmtiEnv.cpp @@ -1001,7 +1001,8 @@ JvmtiEnv::GetThreadInfo(jthread thread, jvmtiThreadInfo* info_ptr) { if (name() != NULL) { n = java_lang_String::as_utf8_string(name()); } else { - n = UNICODE::as_utf8((jchar*) NULL, 0); + int utf8_length = 0; + n = UNICODE::as_utf8((jchar*) NULL, utf8_length); } info_ptr->name = (char *) jvmtiMalloc(strlen(n)+1); diff --git a/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp b/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp index f3ab445a2a8..c038a20268b 100644 --- a/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp +++ b/hotspot/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -361,6 +361,8 @@ Flag::Error InteriorEntryAlignmentConstraintFunc(intx value, bool verbose) { int minimum_alignment = 16; #if defined(SPARC) || (defined(X86) && !defined(AMD64)) minimum_alignment = 4; +#elif defined(S390) + minimum_alignment = 2; #endif if (InteriorEntryAlignment < minimum_alignment) { diff --git a/hotspot/src/share/vm/runtime/deoptimization.cpp b/hotspot/src/share/vm/runtime/deoptimization.cpp index aca59f7ef83..e3b88640768 100644 --- a/hotspot/src/share/vm/runtime/deoptimization.cpp +++ b/hotspot/src/share/vm/runtime/deoptimization.cpp @@ -171,7 +171,6 @@ Deoptimization::UnrollBlock* Deoptimization::fetch_unroll_info_helper(JavaThread assert(thread->deopt_compiled_method() == NULL, "Pending deopt!"); CompiledMethod* cm = deoptee.cb()->as_compiled_method_or_null(); thread->set_deopt_compiled_method(cm); - bool skip_internal = (cm != NULL) && !cm->is_compiled_by_jvmci(); if (VerifyStack) { thread->validate_frame_layout(); @@ -241,6 +240,7 @@ Deoptimization::UnrollBlock* Deoptimization::fetch_unroll_info_helper(JavaThread JRT_BLOCK realloc_failures = realloc_objects(thread, &deoptee, objects, THREAD); JRT_END + bool skip_internal = (cm != NULL) && !cm->is_compiled_by_jvmci(); reassign_fields(&deoptee, &map, objects, realloc_failures, skip_internal); #ifndef PRODUCT if (TraceDeoptimization) { @@ -1651,7 +1651,7 @@ JRT_ENTRY(void, Deoptimization::uncommon_trap_inner(JavaThread* thread, jint tra if (TraceDeoptimization) { // make noise on the tty tty->print("Uncommon trap occurred in"); nm->method()->print_short_name(tty); - tty->print(" compiler=%s compile_id=%d", nm->compiler() == NULL ? "" : nm->compiler()->name(), nm->compile_id()); + tty->print(" compiler=%s compile_id=%d", nm->compiler_name(), nm->compile_id()); #if INCLUDE_JVMCI if (nm->is_nmethod()) { oop installedCode = nm->as_nmethod()->jvmci_installed_code(); diff --git a/hotspot/src/share/vm/runtime/frame.cpp b/hotspot/src/share/vm/runtime/frame.cpp index 129a08850db..dcfd0b91492 100644 --- a/hotspot/src/share/vm/runtime/frame.cpp +++ b/hotspot/src/share/vm/runtime/frame.cpp @@ -686,9 +686,7 @@ void frame::print_on_error(outputStream* st, char* buf, int buflen, bool verbose if (cm->is_nmethod()) { nmethod* nm = cm->as_nmethod(); st->print("J %d%s", nm->compile_id(), (nm->is_osr_method() ? "%" : "")); - if (nm->compiler() != NULL) { - st->print(" %s", nm->compiler()->name()); - } + st->print(" %s", nm->compiler_name()); } m->name_and_sig_as_C_string(buf, buflen); st->print(" %s", buf); diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp index 4d613d6c572..3afe94f685e 100644 --- a/hotspot/src/share/vm/runtime/globals.hpp +++ b/hotspot/src/share/vm/runtime/globals.hpp @@ -3325,9 +3325,7 @@ public: "Stack space (bytes) required for JVM_InvokeMethod to complete") \ \ /* code cache parameters */ \ - /* ppc64/tiered compilation has large code-entry alignment. */ \ - develop(uintx, CodeCacheSegmentSize, \ - 64 PPC64_ONLY(+64) NOT_PPC64(TIERED_ONLY(+64)), \ + develop_pd(uintx, CodeCacheSegmentSize, \ "Code cache segment size (in bytes) - smallest unit of " \ "allocation") \ range(1, 1024) \ diff --git a/hotspot/src/share/vm/runtime/os.cpp b/hotspot/src/share/vm/runtime/os.cpp index b73a4fa612d..18359effe52 100644 --- a/hotspot/src/share/vm/runtime/os.cpp +++ b/hotspot/src/share/vm/runtime/os.cpp @@ -97,7 +97,7 @@ void os_init_globals() { // except that on Windows the %z behaves badly, so we do it ourselves. // Also, people wanted milliseconds on there, // and strftime doesn't do milliseconds. -char* os::iso8601_time(char* buffer, size_t buffer_length) { +char* os::iso8601_time(char* buffer, size_t buffer_length, bool utc) { // Output will be of the form "YYYY-MM-DDThh:mm:ss.mmm+zzzz\0" // 1 2 // 12345678901234567890123456789 @@ -122,9 +122,16 @@ char* os::iso8601_time(char* buffer, size_t buffer_length) { milliseconds_since_19700101 % milliseconds_per_microsecond; // Convert the time value to a tm and timezone variable struct tm time_struct; - if (localtime_pd(&seconds_since_19700101, &time_struct) == NULL) { - assert(false, "Failed localtime_pd"); - return NULL; + if (utc) { + if (gmtime_pd(&seconds_since_19700101, &time_struct) == NULL) { + assert(false, "Failed gmtime_pd"); + return NULL; + } + } else { + if (localtime_pd(&seconds_since_19700101, &time_struct) == NULL) { + assert(false, "Failed localtime_pd"); + return NULL; + } } #if defined(_ALLBSD_SOURCE) const time_t zone = (time_t) time_struct.tm_gmtoff; @@ -141,6 +148,12 @@ char* os::iso8601_time(char* buffer, size_t buffer_length) { if (time_struct.tm_isdst > 0) { UTC_to_local = UTC_to_local - seconds_per_hour; } + + // No offset when dealing with UTC + if (utc) { + UTC_to_local = 0; + } + // Compute the time zone offset. // localtime_pd() sets timezone to the difference (in seconds) // between UTC and and local time. @@ -1766,95 +1779,3 @@ os::SuspendResume::State os::SuspendResume::switch_state(os::SuspendResume::Stat return result; } #endif - -/////////////// Unit tests /////////////// - -#ifndef PRODUCT - -#define assert_eq(a,b) assert(a == b, SIZE_FORMAT " != " SIZE_FORMAT, a, b) - -class TestOS : AllStatic { - static size_t small_page_size() { - return os::vm_page_size(); - } - - static size_t large_page_size() { - const size_t large_page_size_example = 4 * M; - return os::page_size_for_region_aligned(large_page_size_example, 1); - } - - static void test_page_size_for_region_aligned() { - if (UseLargePages) { - const size_t small_page = small_page_size(); - const size_t large_page = large_page_size(); - - if (large_page > small_page) { - size_t num_small_pages_in_large = large_page / small_page; - size_t page = os::page_size_for_region_aligned(large_page, num_small_pages_in_large); - - assert_eq(page, small_page); - } - } - } - - static void test_page_size_for_region_alignment() { - if (UseLargePages) { - const size_t small_page = small_page_size(); - const size_t large_page = large_page_size(); - if (large_page > small_page) { - const size_t unaligned_region = large_page + 17; - size_t page = os::page_size_for_region_aligned(unaligned_region, 1); - assert_eq(page, small_page); - - const size_t num_pages = 5; - const size_t aligned_region = large_page * num_pages; - page = os::page_size_for_region_aligned(aligned_region, num_pages); - assert_eq(page, large_page); - } - } - } - - static void test_page_size_for_region_unaligned() { - if (UseLargePages) { - // Given exact page size, should return that page size. - for (size_t i = 0; os::_page_sizes[i] != 0; i++) { - size_t expected = os::_page_sizes[i]; - size_t actual = os::page_size_for_region_unaligned(expected, 1); - assert_eq(expected, actual); - } - - // Given slightly larger size than a page size, return the page size. - for (size_t i = 0; os::_page_sizes[i] != 0; i++) { - size_t expected = os::_page_sizes[i]; - size_t actual = os::page_size_for_region_unaligned(expected + 17, 1); - assert_eq(expected, actual); - } - - // Given a slightly smaller size than a page size, - // return the next smaller page size. - if (os::_page_sizes[1] > os::_page_sizes[0]) { - size_t expected = os::_page_sizes[0]; - size_t actual = os::page_size_for_region_unaligned(os::_page_sizes[1] - 17, 1); - assert_eq(actual, expected); - } - - // Return small page size for values less than a small page. - size_t small_page = small_page_size(); - size_t actual = os::page_size_for_region_unaligned(small_page - 17, 1); - assert_eq(small_page, actual); - } - } - - public: - static void run_tests() { - test_page_size_for_region_aligned(); - test_page_size_for_region_alignment(); - test_page_size_for_region_unaligned(); - } -}; - -void TestOS_test() { - TestOS::run_tests(); -} - -#endif // PRODUCT diff --git a/hotspot/src/share/vm/runtime/os.hpp b/hotspot/src/share/vm/runtime/os.hpp index 0077b36d345..12c2891d3db 100644 --- a/hotspot/src/share/vm/runtime/os.hpp +++ b/hotspot/src/share/vm/runtime/os.hpp @@ -197,10 +197,11 @@ class os: AllStatic { // information may require a lock on some platforms. static char* local_time_string(char *buf, size_t buflen); static struct tm* localtime_pd (const time_t* clock, struct tm* res); + static struct tm* gmtime_pd (const time_t* clock, struct tm* res); // Fill in buffer with current local time as an ISO-8601 string. // E.g., YYYY-MM-DDThh:mm:ss.mmm+zzzz. // Returns buffer, or NULL if it failed. - static char* iso8601_time(char* buffer, size_t buffer_length); + static char* iso8601_time(char* buffer, size_t buffer_length, bool utc = false); // Interface for detecting multiprocessor system static inline bool is_MP() { diff --git a/hotspot/src/share/vm/runtime/rtmLocking.cpp b/hotspot/src/share/vm/runtime/rtmLocking.cpp index 56ad9fb3689..ffcb171cc22 100644 --- a/hotspot/src/share/vm/runtime/rtmLocking.cpp +++ b/hotspot/src/share/vm/runtime/rtmLocking.cpp @@ -23,7 +23,7 @@ */ #include "precompiled.hpp" -#include "utilities/globalDefinitions.hpp" +#include "compiler/compilerDefinitions.hpp" #if INCLUDE_RTM_OPT diff --git a/hotspot/src/share/vm/runtime/sharedRuntime.cpp b/hotspot/src/share/vm/runtime/sharedRuntime.cpp index ed70e62ba75..8a14881be43 100644 --- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp +++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp @@ -1983,8 +1983,10 @@ JRT_END // Handles the uncommon case in locking, i.e., contention or an inflated lock. JRT_BLOCK_ENTRY(void, SharedRuntime::complete_monitor_locking_C(oopDesc* _obj, BasicLock* lock, JavaThread* thread)) // Disable ObjectSynchronizer::quick_enter() in default config - // on AARCH64 until JDK-8153107 is resolved. - if (AARCH64_ONLY((SyncFlags & 256) != 0 &&) !SafepointSynchronize::is_synchronizing()) { + // on AARCH64 and ARM until JDK-8153107 is resolved. + if (ARM_ONLY((SyncFlags & 256) != 0 &&) + AARCH64_ONLY((SyncFlags & 256) != 0 &&) + !SafepointSynchronize::is_synchronizing()) { // Only try quick_enter() if we're not trying to reach a safepoint // so that the calling thread reaches the safepoint more quickly. if (ObjectSynchronizer::quick_enter(_obj, thread, lock)) return; diff --git a/hotspot/src/share/vm/runtime/vm_version.cpp b/hotspot/src/share/vm/runtime/vm_version.cpp index 942feb10c3e..08e15dc2a5f 100644 --- a/hotspot/src/share/vm/runtime/vm_version.cpp +++ b/hotspot/src/share/vm/runtime/vm_version.cpp @@ -170,15 +170,16 @@ const char* Abstract_VM_Version::jre_release_version() { #define CPU "ppc64le" #else #define CPU "ppc64" -#endif +#endif // PPC64 #else -#define CPU IA32_ONLY("x86") \ - IA64_ONLY("ia64") \ +#define CPU AARCH64_ONLY("aarch64") \ AMD64_ONLY("amd64") \ - AARCH64_ONLY("aarch64") \ + IA32_ONLY("x86") \ + IA64_ONLY("ia64") \ + S390_ONLY("s390") \ SPARC_ONLY("sparc") -#endif // -#endif +#endif // !ZERO +#endif // !CPU const char *Abstract_VM_Version::vm_platform_string() { return OS "-" CPU; diff --git a/hotspot/src/share/vm/services/diagnosticCommand.cpp b/hotspot/src/share/vm/services/diagnosticCommand.cpp index c90bf08e39e..d9e8a628e8a 100644 --- a/hotspot/src/share/vm/services/diagnosticCommand.cpp +++ b/hotspot/src/share/vm/services/diagnosticCommand.cpp @@ -551,11 +551,6 @@ ClassStatsDCmd::ClassStatsDCmd(outputStream* output, bool heap) : } void ClassStatsDCmd::execute(DCmdSource source, TRAPS) { - if (!UnlockDiagnosticVMOptions) { - output()->print_cr("GC.class_stats command requires -XX:+UnlockDiagnosticVMOptions"); - return; - } - VM_GC_HeapInspection heapop(output(), true /* request_full_gc */); heapop.set_csv_format(_csv.value()); @@ -996,8 +991,8 @@ TouchedMethodsDCmd::TouchedMethodsDCmd(outputStream* output, bool heap) : {} void TouchedMethodsDCmd::execute(DCmdSource source, TRAPS) { - if (!UnlockDiagnosticVMOptions) { - output()->print_cr("VM.touched_methods command requires -XX:+UnlockDiagnosticVMOptions"); + if (!LogTouchedMethods) { + output()->print_cr("VM.print_touched_methods command requires -XX:+LogTouchedMethods"); return; } VM_DumpTouchedMethods dumper(output()); diff --git a/hotspot/src/share/vm/services/diagnosticCommand.hpp b/hotspot/src/share/vm/services/diagnosticCommand.hpp index 727ab33e64a..0e7b48e777a 100644 --- a/hotspot/src/share/vm/services/diagnosticCommand.hpp +++ b/hotspot/src/share/vm/services/diagnosticCommand.hpp @@ -386,7 +386,7 @@ public: return "GC.class_stats"; } static const char* description() { - return "Provide statistics about Java class meta data. Requires -XX:+UnlockDiagnosticVMOptions."; + return "Provide statistics about Java class meta data."; } static const char* impact() { return "High: Depends on Java heap size and content."; diff --git a/hotspot/src/share/vm/services/management.cpp b/hotspot/src/share/vm/services/management.cpp index ff4da35c88b..9cb5e2ce5c3 100644 --- a/hotspot/src/share/vm/services/management.cpp +++ b/hotspot/src/share/vm/services/management.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -173,6 +173,20 @@ void Management::get_optional_support(jmmOptionalSupport* support) { Klass* Management::load_and_initialize_klass(Symbol* sh, TRAPS) { Klass* k = SystemDictionary::resolve_or_fail(sh, true, CHECK_NULL); + Klass* ik = initialize_klass(k, CHECK_NULL); + return ik; +} + +Klass* Management::load_and_initialize_klass_or_null(Symbol* sh, TRAPS) { + Klass* k = SystemDictionary::resolve_or_null(sh, CHECK_NULL); + if (k == NULL) { + return NULL; + } + Klass* ik = initialize_klass(k, CHECK_NULL); + return ik; +} + +Klass* Management::initialize_klass(Klass* k, TRAPS) { instanceKlassHandle ik (THREAD, k); if (ik->should_be_initialized()) { ik->initialize(CHECK_NULL); @@ -255,7 +269,8 @@ Klass* Management::sun_management_ManagementFactoryHelper_klass(TRAPS) { Klass* Management::com_sun_management_internal_GarbageCollectorExtImpl_klass(TRAPS) { if (_garbageCollectorExtImpl_klass == NULL) { - _garbageCollectorExtImpl_klass = load_and_initialize_klass(vmSymbols::com_sun_management_internal_GarbageCollectorExtImpl(), CHECK_NULL); + _garbageCollectorExtImpl_klass = + load_and_initialize_klass_or_null(vmSymbols::com_sun_management_internal_GarbageCollectorExtImpl(), CHECK_NULL); } return _garbageCollectorExtImpl_klass; } diff --git a/hotspot/src/share/vm/services/management.hpp b/hotspot/src/share/vm/services/management.hpp index 139e44a811d..ac7f872a29c 100644 --- a/hotspot/src/share/vm/services/management.hpp +++ b/hotspot/src/share/vm/services/management.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -53,6 +53,8 @@ private: static Klass* _sensor_klass; static Klass* _threadInfo_klass; static Klass* load_and_initialize_klass(Symbol* sh, TRAPS); + static Klass* load_and_initialize_klass_or_null(Symbol* sh, TRAPS); + static Klass* initialize_klass(Klass* k, TRAPS); public: static void init(); diff --git a/hotspot/src/share/vm/trace/traceMacros.hpp b/hotspot/src/share/vm/trace/traceMacros.hpp index 67f71f3493c..f325dd83590 100644 --- a/hotspot/src/share/vm/trace/traceMacros.hpp +++ b/hotspot/src/share/vm/trace/traceMacros.hpp @@ -57,6 +57,8 @@ extern "C" void JNICALL trace_register_natives(JNIEnv*, jclass); #define TRACE_DEFINE_THREAD_TRACE_ID_OFFSET typedef int ___IGNORED_hs_trace_type5 #define TRACE_THREAD_TRACE_ID_OFFSET in_ByteSize(0); ShouldNotReachHere() #define TRACE_DEFINE_THREAD_ID_SIZE typedef int ___IGNORED_hs_trace_type6 +#define TRACE_DEFINE_THREAD_DATA_WRITER_OFFSET typedef int ___IGNORED_hs_trace_type7 +#define TRACE_THREAD_DATA_WRITER_OFFSET in_ByteSize(0); ShouldNotReachHere() #define TRACE_TEMPLATES(template) #define TRACE_INTRINSICS(do_intrinsic, do_class, do_name, do_signature, do_alias) diff --git a/hotspot/src/share/vm/utilities/globalDefinitions.cpp b/hotspot/src/share/vm/utilities/globalDefinitions.cpp index 7f69396d84d..b718c780624 100644 --- a/hotspot/src/share/vm/utilities/globalDefinitions.cpp +++ b/hotspot/src/share/vm/utilities/globalDefinitions.cpp @@ -214,7 +214,6 @@ BasicType name2type(const char* name) { return T_ILLEGAL; } - // Map BasicType to size in words int type2size[T_CONFLICT+1]={ -1, 0, 0, 0, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 0, 1, 1, 1, 1, -1}; diff --git a/hotspot/src/share/vm/utilities/globalDefinitions.hpp b/hotspot/src/share/vm/utilities/globalDefinitions.hpp index a7ebde3ed54..e07ed31678d 100644 --- a/hotspot/src/share/vm/utilities/globalDefinitions.hpp +++ b/hotspot/src/share/vm/utilities/globalDefinitions.hpp @@ -193,6 +193,16 @@ inline size_t heap_word_size(size_t byte_size) { return (byte_size + (HeapWordSize-1)) >> LogHeapWordSize; } +//------------------------------------------- +// Constant for jlong (standardized by C++11) + +// Build a 64bit integer constant +#define CONST64(x) (x ## LL) +#define UCONST64(x) (x ## ULL) + +const jlong min_jlong = CONST64(0x8000000000000000); +const jlong max_jlong = CONST64(0x7fffffffffffffff); + const size_t K = 1024; const size_t M = K*K; const size_t G = M*K; @@ -444,13 +454,6 @@ const uint64_t KlassEncodingMetaspaceMax = (uint64_t(max_juint) + 1) << LogKlas // Machine dependent stuff -// States of Restricted Transactional Memory usage. -enum RTMState { - NoRTM = 0x2, // Don't use RTM - UseRTM = 0x1, // Use RTM - ProfileRTM = 0x0 // Use RTM with abort ratio calculation -}; - // The maximum size of the code cache. Can be overridden by targets. #define CODE_CACHE_SIZE_LIMIT (2*G) // Allow targets to reduce the default size of the code cache. @@ -458,15 +461,6 @@ enum RTMState { #include CPU_HEADER(globalDefinitions) -#ifndef INCLUDE_RTM_OPT -#define INCLUDE_RTM_OPT 0 -#endif -#if INCLUDE_RTM_OPT -#define RTM_OPT_ONLY(code) code -#else -#define RTM_OPT_ONLY(code) -#endif - // To assure the IRIW property on processors that are not multiple copy // atomic, sync instructions must be issued between volatile reads to // assure their ordering, instead of after volatile stores. @@ -923,55 +917,6 @@ enum JavaThreadState { }; -// Handy constants for deciding which compiler mode to use. -enum MethodCompilation { - InvocationEntryBci = -1 // i.e., not a on-stack replacement compilation -}; - -// Enumeration to distinguish tiers of compilation -enum CompLevel { - CompLevel_any = -1, - CompLevel_all = -1, - CompLevel_none = 0, // Interpreter - CompLevel_simple = 1, // C1 - CompLevel_limited_profile = 2, // C1, invocation & backedge counters - CompLevel_full_profile = 3, // C1, invocation & backedge counters + mdo - CompLevel_full_optimization = 4, // C2, Shark or JVMCI - -#if defined(COMPILER2) || defined(SHARK) - CompLevel_highest_tier = CompLevel_full_optimization, // pure C2 and tiered or JVMCI and tiered -#elif defined(COMPILER1) - CompLevel_highest_tier = CompLevel_simple, // pure C1 or JVMCI -#else - CompLevel_highest_tier = CompLevel_none, -#endif - -#if defined(TIERED) - CompLevel_initial_compile = CompLevel_full_profile // tiered -#elif defined(COMPILER1) || INCLUDE_JVMCI - CompLevel_initial_compile = CompLevel_simple // pure C1 or JVMCI -#elif defined(COMPILER2) || defined(SHARK) - CompLevel_initial_compile = CompLevel_full_optimization // pure C2 -#else - CompLevel_initial_compile = CompLevel_none -#endif -}; - -inline bool is_c1_compile(int comp_level) { - return comp_level > CompLevel_none && comp_level < CompLevel_full_optimization; -} - -inline bool is_c2_compile(int comp_level) { - return comp_level == CompLevel_full_optimization; -} - -inline bool is_highest_tier_compile(int comp_level) { - return comp_level == CompLevel_highest_tier; -} - -inline bool is_compile(int comp_level) { - return is_c1_compile(comp_level) || is_c2_compile(comp_level); -} //---------------------------------------------------------------------------------------------------- // 'Forward' declarations of frequently used classes diff --git a/hotspot/src/share/vm/utilities/globalDefinitions_gcc.hpp b/hotspot/src/share/vm/utilities/globalDefinitions_gcc.hpp index 7b6c7b9302b..17b665d9e29 100644 --- a/hotspot/src/share/vm/utilities/globalDefinitions_gcc.hpp +++ b/hotspot/src/share/vm/utilities/globalDefinitions_gcc.hpp @@ -160,17 +160,6 @@ typedef uint32_t juint; typedef uint64_t julong; -//---------------------------------------------------------------------------------------------------- -// Constant for jlong (specifying a long long constant is C++ compiler specific) - -// Build a 64bit integer constant -#define CONST64(x) (x ## LL) -#define UCONST64(x) (x ## ULL) - -const jlong min_jlong = CONST64(0x8000000000000000); -const jlong max_jlong = CONST64(0x7fffffffffffffff); - - #ifdef SOLARIS //---------------------------------------------------------------------------------------------------- // ANSI C++ fixes diff --git a/hotspot/src/share/vm/utilities/globalDefinitions_sparcWorks.hpp b/hotspot/src/share/vm/utilities/globalDefinitions_sparcWorks.hpp index 22f8b4e7276..bc743244a50 100644 --- a/hotspot/src/share/vm/utilities/globalDefinitions_sparcWorks.hpp +++ b/hotspot/src/share/vm/utilities/globalDefinitions_sparcWorks.hpp @@ -174,16 +174,6 @@ typedef unsigned int juint; typedef unsigned long long julong; -//---------------------------------------------------------------------------------------------------- -// Constant for jlong (specifying a long long constant is C++ compiler specific) - -// Build a 64bit integer constant -#define CONST64(x) (x ## LL) -#define UCONST64(x) (x ## ULL) - -const jlong min_jlong = CONST64(0x8000000000000000); -const jlong max_jlong = CONST64(0x7fffffffffffffff); - #ifdef SOLARIS //---------------------------------------------------------------------------------------------------- // ANSI C++ fixes diff --git a/hotspot/src/share/vm/utilities/globalDefinitions_visCPP.hpp b/hotspot/src/share/vm/utilities/globalDefinitions_visCPP.hpp index c387fe72a6d..1a16d192638 100644 --- a/hotspot/src/share/vm/utilities/globalDefinitions_visCPP.hpp +++ b/hotspot/src/share/vm/utilities/globalDefinitions_visCPP.hpp @@ -63,8 +63,8 @@ #undef NULL // 64-bit Windows uses a P64 data model (not LP64, although we define _LP64) // Since longs are 32-bit we cannot use 0L here. Use the Visual C++ specific -// 64-bit integer-suffix (i64) instead. -#define NULL 0i64 +// 64-bit integer-suffix (LL) instead. +#define NULL 0LL #else #ifndef NULL #define NULL 0 @@ -147,16 +147,6 @@ inline int g_isnan(jdouble f) { return _isnan(f); } inline int g_isfinite(jfloat f) { return _finite(f); } inline int g_isfinite(jdouble f) { return _finite(f); } -//---------------------------------------------------------------------------------------------------- -// Constant for jlong (specifying a long long constant is C++ compiler specific) - -// Build a 64bit integer constant with Visual C++ -#define CONST64(x) (x ## i64) -#define UCONST64(x) (x ## ui64) - -const jlong min_jlong = (jlong)UCONST64(0x8000000000000000); -const jlong max_jlong = CONST64(0x7fffffffffffffff); - //---------------------------------------------------------------------------------------------------- // Miscellaneous diff --git a/hotspot/src/share/vm/utilities/globalDefinitions_xlc.hpp b/hotspot/src/share/vm/utilities/globalDefinitions_xlc.hpp index ff97f116a77..623a8cbe6cf 100644 --- a/hotspot/src/share/vm/utilities/globalDefinitions_xlc.hpp +++ b/hotspot/src/share/vm/utilities/globalDefinitions_xlc.hpp @@ -107,16 +107,6 @@ typedef uint32_t juint; typedef uint64_t julong; -//---------------------------------------------------------------------------------------------------- -// Constant for jlong (specifying a long long constant is C++ compiler specific) - -// Build a 64bit integer constant -#define CONST64(x) (x ## LL) -#define UCONST64(x) (x ## ULL) - -const jlong min_jlong = CONST64(0x8000000000000000); -const jlong max_jlong = CONST64(0x7fffffffffffffff); - //---------------------------------------------------------------------------------------------------- // Debugging @@ -163,23 +153,6 @@ inline int wcslen(const jchar* x) { return wcslen((const wchar_t*)x); } // offset_of as it is defined for gcc. #define offset_of(klass,field) (size_t)((intx)&(((klass*)16)->field) - 16) -// Some constant sizes used throughout the AIX port -#define SIZE_1K ((uint64_t) UCONST64( 0x400)) -#define SIZE_4K ((uint64_t) UCONST64( 0x1000)) -#define SIZE_64K ((uint64_t) UCONST64( 0x10000)) -#define SIZE_1M ((uint64_t) UCONST64( 0x100000)) -#define SIZE_4M ((uint64_t) UCONST64( 0x400000)) -#define SIZE_8M ((uint64_t) UCONST64( 0x800000)) -#define SIZE_16M ((uint64_t) UCONST64( 0x1000000)) -#define SIZE_256M ((uint64_t) UCONST64( 0x10000000)) -#define SIZE_1G ((uint64_t) UCONST64( 0x40000000)) -#define SIZE_2G ((uint64_t) UCONST64( 0x80000000)) -#define SIZE_4G ((uint64_t) UCONST64( 0x100000000)) -#define SIZE_16G ((uint64_t) UCONST64( 0x400000000)) -#define SIZE_32G ((uint64_t) UCONST64( 0x800000000)) -#define SIZE_64G ((uint64_t) UCONST64( 0x1000000000)) -#define SIZE_1T ((uint64_t) UCONST64(0x10000000000)) - #ifndef USE_LIBRARY_BASED_TLS_ONLY #define THREAD_LOCAL_DECL __thread #endif diff --git a/hotspot/src/share/vm/utilities/hashtable.cpp b/hotspot/src/share/vm/utilities/hashtable.cpp index da3cfde6239..f5d37f45fa9 100644 --- a/hotspot/src/share/vm/utilities/hashtable.cpp +++ b/hotspot/src/share/vm/utilities/hashtable.cpp @@ -342,12 +342,12 @@ template void BasicHashtable::verify() { #ifdef ASSERT -template bool BasicHashtable::verify_lookup_length(double load) { +template bool BasicHashtable::verify_lookup_length(double load, const char *table_name) { if ((!_lookup_warning) && (_lookup_count != 0) && ((double)_lookup_length / (double)_lookup_count > load * 2.0)) { - warning("Performance bug: SystemDictionary lookup_count=%d " + warning("Performance bug: %s lookup_count=%d " "lookup_length=%d average=%lf load=%f", - _lookup_count, _lookup_length, + table_name, _lookup_count, _lookup_length, (double)_lookup_length / _lookup_count, load); _lookup_warning = true; diff --git a/hotspot/src/share/vm/utilities/hashtable.hpp b/hotspot/src/share/vm/utilities/hashtable.hpp index be8203c70c4..51324a18c62 100644 --- a/hotspot/src/share/vm/utilities/hashtable.hpp +++ b/hotspot/src/share/vm/utilities/hashtable.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -185,7 +185,7 @@ protected: bool _lookup_warning; mutable int _lookup_count; mutable int _lookup_length; - bool verify_lookup_length(double load); + bool verify_lookup_length(double load, const char *table_name); #endif void initialize(int table_size, int entry_size, int number_of_entries); diff --git a/hotspot/src/share/vm/utilities/internalVMTests.cpp b/hotspot/src/share/vm/utilities/internalVMTests.cpp index 22bc9e51f44..3ce41677a25 100644 --- a/hotspot/src/share/vm/utilities/internalVMTests.cpp +++ b/hotspot/src/share/vm/utilities/internalVMTests.cpp @@ -41,22 +41,15 @@ void InternalVMTests::run_test(const char* name, void (*test)()) { void InternalVMTests::run() { tty->print_cr("Running internal VM tests"); - run_unit_test(TestDependencyContext_test); run_unit_test(test_semaphore); - run_unit_test(TestOS_test); run_unit_test(TestReservedSpace_test); run_unit_test(TestReserveMemorySpecial_test); run_unit_test(TestVirtualSpace_test); run_unit_test(TestMetaspaceAux_test); - run_unit_test(TestMetachunk_test); run_unit_test(TestVirtualSpaceNode_test); run_unit_test(TestGlobalDefinitions_test); run_unit_test(GCTimer_test); run_unit_test(CollectedHeap_test); - run_unit_test(QuickSort_test); - run_unit_test(GuardedMemory_test); - run_unit_test(TestNewSize_test); - run_unit_test(TestOldSize_test); run_unit_test(TestBitMap_test); run_unit_test(ObjectMonitor_test); run_unit_test(DirectivesParser_test); @@ -65,10 +58,6 @@ void InternalVMTests::run() { #endif #if INCLUDE_ALL_GCS run_unit_test(TestBufferingOopClosure_test); - if (UseG1GC) { - run_unit_test(FreeRegionList_test); - } - run_unit_test(WorkerDataArray_test); run_unit_test(ParallelCompact_test); #endif tty->print_cr("All internal VM tests passed"); diff --git a/hotspot/src/share/vm/utilities/macros.hpp b/hotspot/src/share/vm/utilities/macros.hpp index 9904650b01d..aa46c0766ce 100644 --- a/hotspot/src/share/vm/utilities/macros.hpp +++ b/hotspot/src/share/vm/utilities/macros.hpp @@ -386,6 +386,14 @@ #define NOT_AMD64(code) code #endif +#ifdef S390 +#define S390_ONLY(code) code +#define NOT_S390(code) +#else +#define S390_ONLY(code) +#define NOT_S390(code) code +#endif + #ifdef SPARC #define SPARC_ONLY(code) code #define NOT_SPARC(code) diff --git a/hotspot/src/share/vm/utilities/quickSort.cpp b/hotspot/src/share/vm/utilities/quickSort.cpp deleted file mode 100644 index 52661d87afa..00000000000 --- a/hotspot/src/share/vm/utilities/quickSort.cpp +++ /dev/null @@ -1,221 +0,0 @@ -/* - * Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" - -/////////////// Unit tests /////////////// - -#ifndef PRODUCT - -#include "runtime/os.hpp" -#include "utilities/quickSort.hpp" -#include "memory/allocation.hpp" -#include "memory/allocation.inline.hpp" -#include - -#ifdef ASSERT -static int test_comparator(int a, int b) { - if (a == b) { - return 0; - } - if (a < b) { - return -1; - } - return 1; -} - -static void print_array(const char* prefix, int* array, int length) { - tty->print("%s:", prefix); - for (int i = 0; i < length; i++) { - tty->print(" %d", array[i]); - } - tty->cr(); -} - -static bool compare_arrays(int* actual, int* expected, int length) { - for (int i = 0; i < length; i++) { - if (actual[i] != expected[i]) { - print_array("Sorted array ", actual, length); - print_array("Expected array", expected, length); - return false; - } - } - return true; -} - -template -static bool sort_and_compare(int* arrayToSort, int* expectedResult, int length, C comparator, bool idempotent = false) { - QuickSort::sort(arrayToSort, length, comparator, idempotent); - return compare_arrays(arrayToSort, expectedResult, length); -} -#endif // ASSERT - -static int test_even_odd_comparator(int a, int b) { - bool a_is_odd = (a % 2) == 1; - bool b_is_odd = (b % 2) == 1; - if (a_is_odd == b_is_odd) { - return 0; - } - if (a_is_odd) { - return -1; - } - return 1; -} - -extern "C" { - static int test_stdlib_comparator(const void* a, const void* b) { - int ai = *(int*)a; - int bi = *(int*)b; - if (ai == bi) { - return 0; - } - if (ai < bi) { - return -1; - } - return 1; - } -} - -void QuickSort_test() { - { - int* test_array = NULL; - int* expected_array = NULL; - assert(sort_and_compare(test_array, expected_array, 0, test_comparator), "Empty array not handled"); - } - { - int test_array[] = {3}; - int expected_array[] = {3}; - assert(sort_and_compare(test_array, expected_array, 1, test_comparator), "Single value array not handled"); - } - { - int test_array[] = {3,2}; - int expected_array[] = {2,3}; - assert(sort_and_compare(test_array, expected_array, 2, test_comparator), "Array with 2 values not correctly sorted"); - } - { - int test_array[] = {3,2,1}; - int expected_array[] = {1,2,3}; - assert(sort_and_compare(test_array, expected_array, 3, test_comparator), "Array with 3 values not correctly sorted"); - } - { - int test_array[] = {4,3,2,1}; - int expected_array[] = {1,2,3,4}; - assert(sort_and_compare(test_array, expected_array, 4, test_comparator), "Array with 4 values not correctly sorted"); - } - { - int test_array[] = {7,1,5,3,6,9,8,2,4,0}; - int expected_array[] = {0,1,2,3,4,5,6,7,8,9}; - assert(sort_and_compare(test_array, expected_array, 10, test_comparator), "Array with 10 values not correctly sorted"); - } - { - int test_array[] = {4,4,1,4}; - int expected_array[] = {1,4,4,4}; - assert(sort_and_compare(test_array, expected_array, 4, test_comparator), "3 duplicates not sorted correctly"); - } - { - int test_array[] = {0,1,2,3,4,5,6,7,8,9}; - int expected_array[] = {0,1,2,3,4,5,6,7,8,9}; - assert(sort_and_compare(test_array, expected_array, 10, test_comparator), "Already sorted array not correctly sorted"); - } - { - // one of the random arrays that found an issue in the partion method. - int test_array[] = {76,46,81,8,64,56,75,11,51,55,11,71,59,27,9,64,69,75,21,25,39,40,44,32,7,8,40,41,24,78,24,74,9,65,28,6,40,31,22,13,27,82}; - int expected_array[] = {6,7,8,8,9,9,11,11,13,21,22,24,24,25,27,27,28,31,32,39,40,40,40,41,44,46,51,55,56,59,64,64,65,69,71,74,75,75,76,78,81,82}; - assert(sort_and_compare(test_array, expected_array, 42, test_comparator), "Not correctly sorted"); - } - { - int test_array[] = {2,8,1,4}; - int expected_array[] = {1,4,2,8}; - assert(sort_and_compare(test_array, expected_array, 4, test_even_odd_comparator), "Even/odd not sorted correctly"); - } - { // Some idempotent tests - { - // An array of lenght 3 is only sorted by find_pivot. Make sure that it is idempotent. - int test_array[] = {1,4,8}; - int expected_array[] = {1,4,8}; - assert(sort_and_compare(test_array, expected_array, 3, test_even_odd_comparator, true), "Even/odd not idempotent"); - } - { - int test_array[] = {1,7,9,4,8,2}; - int expected_array[] = {1,7,9,4,8,2}; - assert(sort_and_compare(test_array, expected_array, 6, test_even_odd_comparator, true), "Even/odd not idempotent"); - } - { - int test_array[] = {1,9,7,4,2,8}; - int expected_array[] = {1,9,7,4,2,8}; - assert(sort_and_compare(test_array, expected_array, 6, test_even_odd_comparator, true), "Even/odd not idempotent"); - } - { - int test_array[] = {7,9,1,2,8,4}; - int expected_array[] = {7,9,1,2,8,4}; - assert(sort_and_compare(test_array, expected_array, 6, test_even_odd_comparator, true), "Even/odd not idempotent"); - } - { - int test_array[] = {7,1,9,2,4,8}; - int expected_array[] = {7,1,9,2,4,8}; - assert(sort_and_compare(test_array, expected_array, 6, test_even_odd_comparator, true), "Even/odd not idempotent"); - } - { - int test_array[] = {9,1,7,4,8,2}; - int expected_array[] = {9,1,7,4,8,2}; - assert(sort_and_compare(test_array, expected_array, 6, test_even_odd_comparator, true), "Even/odd not idempotent"); - } - { - int test_array[] = {9,7,1,4,2,8}; - int expected_array[] = {9,7,1,4,2,8}; - assert(sort_and_compare(test_array, expected_array, 6, test_even_odd_comparator, true), "Even/odd not idempotent"); - } - } - - // test sorting random arrays - for (int i = 0; i < 1000; i++) { - int length = os::random() % 100; - int* test_array = NEW_C_HEAP_ARRAY(int, length, mtInternal); - int* expected_array = NEW_C_HEAP_ARRAY(int, length, mtInternal); - for (int j = 0; j < length; j++) { - // Choose random values, but get a chance of getting duplicates - test_array[j] = os::random() % (length * 2); - expected_array[j] = test_array[j]; - } - - // Compare sorting to stdlib::qsort() - qsort(expected_array, length, sizeof(int), test_stdlib_comparator); - assert(sort_and_compare(test_array, expected_array, length, test_comparator), "Random array not correctly sorted"); - - // Make sure sorting is idempotent. - // Both test_array and expected_array are sorted by the test_comparator. - // Now sort them once with the test_even_odd_comparator. Then sort the - // test_array one more time with test_even_odd_comparator and verify that - // it is idempotent. - QuickSort::sort(expected_array, length, test_even_odd_comparator, true); - QuickSort::sort(test_array, length, test_even_odd_comparator, true); - assert(compare_arrays(test_array, expected_array, length), "Sorting identical arrays rendered different results"); - QuickSort::sort(test_array, length, test_even_odd_comparator, true); - assert(compare_arrays(test_array, expected_array, length), "Sorting already sorted array changed order of elements - not idempotent"); - - FREE_C_HEAP_ARRAY(int, test_array); - FREE_C_HEAP_ARRAY(int, expected_array); - } -} -#endif diff --git a/hotspot/src/share/vm/utilities/utf8.cpp b/hotspot/src/share/vm/utilities/utf8.cpp index 0f479bad8dd..a67fd7134c4 100644 --- a/hotspot/src/share/vm/utilities/utf8.cpp +++ b/hotspot/src/share/vm/utilities/utf8.cpp @@ -411,61 +411,46 @@ bool UNICODE::is_latin1(jchar* base, int length) { } int UNICODE::utf8_size(jchar c) { - if ((0x0001 <= c) && (c <= 0x007F)) return 1; - if (c <= 0x07FF) return 2; - return 3; + if ((0x0001 <= c) && (c <= 0x007F)) { + // ASCII character + return 1; + } else if (c <= 0x07FF) { + return 2; + } else { + return 3; + } } int UNICODE::utf8_size(jbyte c) { - if (c >= 0x0001) return 1; - return 2; -} - -int UNICODE::utf8_length(jchar* base, int length) { - int result = 0; - for (int index = 0; index < length; index++) { - jchar c = base[index]; - if ((0x0001 <= c) && (c <= 0x007F)) result += 1; - else if (c <= 0x07FF) result += 2; - else result += 3; + if (c >= 0x01) { + // ASCII character. Check is equivalent to + // (0x01 <= c) && (c <= 0x7F) because c is signed. + return 1; + } else { + // Non-ASCII character or 0x00 which needs to be + // two-byte encoded as 0xC080 in modified UTF-8. + return 2; } - return result; } -int UNICODE::utf8_length(jbyte* base, int length) { +template +int UNICODE::utf8_length(T* base, int length) { int result = 0; for (int index = 0; index < length; index++) { - jbyte c = base[index]; + T c = base[index]; result += utf8_size(c); } return result; } -char* UNICODE::as_utf8(jchar* base, int length) { +template +char* UNICODE::as_utf8(T* base, int& length) { int utf8_len = utf8_length(base, length); u_char* buf = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1); char* result = as_utf8(base, length, (char*) buf, utf8_len + 1); assert((int) strlen(result) == utf8_len, "length prediction must be correct"); - return result; -} - -char* UNICODE::as_utf8(jbyte* base, int length) { - int utf8_len = utf8_length(base, length); - u_char* result = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1); - u_char* p = result; - if (utf8_len == length) { - for (int index = 0; index < length; index++) { - *p++ = base[index]; - } - } else { - // Unicode string contains U+0000 which should - // be encoded as 0xC080 in "modified" UTF8. - for (int index = 0; index < length; index++) { - p = utf8_write(p, ((jchar) base[index]) & 0xff); - } - } - *p = '\0'; - assert(p == &result[utf8_len], "length prediction must be correct"); + // Set string length to uft8 length + length = utf8_len; return (char*) result; } @@ -490,9 +475,10 @@ char* UNICODE::as_utf8(jbyte* base, int length, char* buf, int buflen) { buflen -= sz; if (buflen <= 0) break; // string is truncated if (sz == 1) { + // Copy ASCII characters (UTF-8 is ASCII compatible) *p++ = c; } else { - // Unicode string contains U+0000 which should + // Non-ASCII character or 0x00 which should // be encoded as 0xC080 in "modified" UTF8. p = utf8_write(p, ((jchar) c) & 0xff); } @@ -543,6 +529,10 @@ void UNICODE::as_quoted_ascii(const T* base, int length, char* buf, int buflen) } // Explicit instantiation for all supported types. +template int UNICODE::utf8_length(jbyte* base, int length); +template int UNICODE::utf8_length(jchar* base, int length); +template char* UNICODE::as_utf8(jbyte* base, int& length); +template char* UNICODE::as_utf8(jchar* base, int& length); template int UNICODE::quoted_ascii_length(jbyte* base, int length); template int UNICODE::quoted_ascii_length(jchar* base, int length); template void UNICODE::as_quoted_ascii(const jbyte* base, int length, char* buf, int buflen); diff --git a/hotspot/src/share/vm/utilities/utf8.hpp b/hotspot/src/share/vm/utilities/utf8.hpp index 34deeb953f7..834e634169d 100644 --- a/hotspot/src/share/vm/utilities/utf8.hpp +++ b/hotspot/src/share/vm/utilities/utf8.hpp @@ -97,16 +97,15 @@ class UNICODE : AllStatic { static int utf8_size(jbyte c); // returns the utf8 length of a unicode string - static int utf8_length(jchar* base, int length); - static int utf8_length(jbyte* base, int length); + template static int utf8_length(T* base, int length); // converts a unicode string to utf8 string static void convert_to_utf8(const jchar* base, int length, char* utf8_buffer); // converts a unicode string to a utf8 string; result is allocated - // in resource area unless a buffer is provided. - static char* as_utf8(jchar* base, int length); - static char* as_utf8(jbyte* base, int length); + // in resource area unless a buffer is provided. The unicode 'length' + // parameter is set to the length of the result utf8 string. + template static char* as_utf8(T* base, int& length); static char* as_utf8(jchar* base, int length, char* buf, int buflen); static char* as_utf8(jbyte* base, int length, char* buf, int buflen); diff --git a/hotspot/test/Makefile b/hotspot/test/Makefile index 4a6c45f654e..b852714c906 100644 --- a/hotspot/test/Makefile +++ b/hotspot/test/Makefile @@ -195,7 +195,8 @@ ifneq ($(FAILURE_HANDLER_DIR), ) -timeoutHandlerDir:$(FAILURE_HANDLER_DIR_MIXED)/jtregFailureHandler.jar \ -observerDir:$(FAILURE_HANDLER_DIR_MIXED)/jtregFailureHandler.jar \ -timeoutHandler:jdk.test.failurehandler.jtreg.GatherProcessInfoTimeoutHandler \ - -observer:jdk.test.failurehandler.jtreg.GatherDiagnosticInfoObserver + -observer:jdk.test.failurehandler.jtreg.GatherDiagnosticInfoObserver \ + -timeoutHandlerTimeout:0 ifeq ($(PLATFORM), windows) JTREG_FAILURE_HANDLER_OPTIONS += -J-Djava.library.path="$(FAILURE_HANDLER_DIR_MIXED)" endif diff --git a/hotspot/test/TEST.ROOT b/hotspot/test/TEST.ROOT index 9074b149ea1..4fe2b2a0e51 100644 --- a/hotspot/test/TEST.ROOT +++ b/hotspot/test/TEST.ROOT @@ -44,7 +44,8 @@ requires.properties= \ vm.gc.G1 \ vm.gc.Serial \ vm.gc.Parallel \ - vm.gc.ConcMarkSweep + vm.gc.ConcMarkSweep \ + vm.debug # Tests using jtreg 4.2 b03 features requiredVersion=4.2 b03 diff --git a/hotspot/test/TEST.groups b/hotspot/test/TEST.groups index 620252378f8..9c247c76f24 100644 --- a/hotspot/test/TEST.groups +++ b/hotspot/test/TEST.groups @@ -349,6 +349,12 @@ hotspot_fast_gc_closed = \ hotspot_fast_gc_gcold = \ gc/stress/TestGCOld.java +hotspot_fast_gc_gcbasher = \ + gc/stress/gcbasher/TestGCBasherWithG1.java \ + gc/stress/gcbasher/TestGCBasherWithCMS.java \ + gc/stress/gcbasher/TestGCBasherWithSerial.java \ + gc/stress/gcbasher/TestGCBasherWithParallel.java + hotspot_fast_runtime = \ runtime/ \ -runtime/6626217/Test6626217.sh \ diff --git a/hotspot/test/compiler/ciReplay/CiReplayBase.java b/hotspot/test/compiler/ciReplay/CiReplayBase.java new file mode 100644 index 00000000000..15972eb60e6 --- /dev/null +++ b/hotspot/test/compiler/ciReplay/CiReplayBase.java @@ -0,0 +1,297 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.ciReplay; + +import compiler.whitebox.CompilerWhiteBoxTest; +import java.io.IOException; +import java.io.File; +import java.io.BufferedReader; +import java.io.FileReader; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import java.util.regex.Pattern; +import java.util.regex.Matcher; +import jdk.test.lib.Platform; +import jdk.test.lib.process.ProcessTools; +import jdk.test.lib.process.OutputAnalyzer; +import jdk.test.lib.Asserts; +import jdk.test.lib.Utils; + +public abstract class CiReplayBase { + public static final String REPLAY_FILE_NAME = "test_replay.txt"; + public static final boolean CLIENT_VM_AVAILABLE; + public static final boolean SERVER_VM_AVAILABLE; + public static final String TIERED_ENABLED_VM_OPTION = "-XX:+TieredCompilation"; + public static final String TIERED_DISABLED_VM_OPTION = "-XX:-TieredCompilation"; + public static final String ENABLE_COREDUMP_ON_CRASH = "-XX:+CreateCoredumpOnCrash"; + public static final String DISABLE_COREDUMP_ON_CRASH = "-XX:-CreateCoredumpOnCrash"; + public static final String CLIENT_VM_OPTION = "-client"; + public static final String SERVER_VM_OPTION = "-server"; + public static final String TEST_CORE_FILE_NAME = "test_core"; + public static final String RUN_SHELL_NO_LIMIT = "ulimit -c unlimited && "; + private static final String REPLAY_FILE_OPTION = "-XX:ReplayDataFile=" + REPLAY_FILE_NAME; + private static final String LOCATIONS_STRING = "location: "; + private static final String HS_ERR_NAME = "hs_err_pid"; + private static final String RUN_SHELL_ZERO_LIMIT = "ulimit -S -c 0 && "; + private static final String VERSION_OPTION = "-version"; + private static final String[] REPLAY_GENERATION_OPTIONS = new String[]{"-Xms8m", "-Xmx32m", + "-XX:MetaspaceSize=4m", "-XX:MaxMetaspaceSize=16m", "-XX:InitialCodeCacheSize=512k", + "-XX:ReservedCodeCacheSize=4m", "-XX:ThreadStackSize=512", "-XX:VMThreadStackSize=512", + "-XX:CompilerThreadStackSize=512", "-XX:ParallelGCThreads=1", "-XX:CICompilerCount=2", + "-Xcomp", "-XX:CICrashAt=1", "-XX:+DumpReplayDataOnError", "-XX:-TransmitErrorReport", + "-XX:+PreferInterpreterNativeStubs", "-XX:+PrintCompilation", REPLAY_FILE_OPTION}; + private static final String[] REPLAY_OPTIONS = new String[]{DISABLE_COREDUMP_ON_CRASH, + "-XX:+ReplayCompiles", REPLAY_FILE_OPTION}; + protected final Optional runServer; + + static { + try { + CLIENT_VM_AVAILABLE = ProcessTools.executeTestJvm(CLIENT_VM_OPTION, VERSION_OPTION) + .getOutput().contains("Client"); + SERVER_VM_AVAILABLE = ProcessTools.executeTestJvm(SERVER_VM_OPTION, VERSION_OPTION) + .getOutput().contains("Server"); + } catch(Throwable t) { + throw new Error("Initialization failed: " + t, t); + } + } + + public CiReplayBase() { + runServer = Optional.empty(); + } + + public CiReplayBase(String args[]) { + if (args.length != 1 || (!"server".equals(args[0]) && !"client".equals(args[0]))) { + throw new Error("Expected 1 argument: [server|client]"); + } + runServer = Optional.of("server".equals(args[0])); + } + + public void runTest(boolean needCoreDump, String... args) { + cleanup(); + if (generateReplay(needCoreDump)) { + testAction(); + cleanup(); + } else { + throw new Error("Host is not configured to generate cores"); + } + } + + public abstract void testAction(); + + private static void remove(String item) { + File toDelete = new File(item); + toDelete.delete(); + if (Platform.isWindows()) { + Utils.waitForCondition(() -> !toDelete.exists()); + } + } + + private static void removeFromCurrentDirectoryStartingWith(String prefix) { + Arrays.stream(new File(".").listFiles()) + .filter(f -> f.getName().startsWith(prefix)) + .forEach(File::delete); + } + + public static void cleanup() { + removeFromCurrentDirectoryStartingWith("core"); + removeFromCurrentDirectoryStartingWith("replay"); + removeFromCurrentDirectoryStartingWith(HS_ERR_NAME); + remove(TEST_CORE_FILE_NAME); + remove(REPLAY_FILE_NAME); + } + + public boolean generateReplay(boolean needCoreDump, String... vmopts) { + OutputAnalyzer crashOut; + String crashOutputString; + try { + List options = new ArrayList<>(); + options.addAll(Arrays.asList(REPLAY_GENERATION_OPTIONS)); + options.addAll(Arrays.asList(vmopts)); + options.add(needCoreDump ? ENABLE_COREDUMP_ON_CRASH : DISABLE_COREDUMP_ON_CRASH); + options.add(VERSION_OPTION); + if (needCoreDump) { + crashOut = ProcessTools.executeProcess(getTestJavaCommandlineWithPrefix( + RUN_SHELL_NO_LIMIT, options.toArray(new String[0]))); + } else { + crashOut = ProcessTools.executeProcess(ProcessTools.createJavaProcessBuilder(true, + options.toArray(new String[0]))); + } + crashOutputString = crashOut.getOutput(); + Asserts.assertNotEquals(crashOut.getExitValue(), 0, "Crash JVM exits gracefully"); + Files.write(Paths.get("crash.out"), crashOutputString.getBytes(), + StandardOpenOption.CREATE, StandardOpenOption.WRITE, + StandardOpenOption.TRUNCATE_EXISTING); + } catch (Throwable t) { + throw new Error("Can't create replay: " + t, t); + } + if (needCoreDump) { + String coreFileLocation = getCoreFileLocation(crashOutputString); + if (coreFileLocation == null) { + if (Platform.isOSX()) { + File coresDir = new File("/cores"); + if (!coresDir.isDirectory() || !coresDir.canWrite()) { + return false; + } + } + throw new Error("Couldn't find core file location in: '" + crashOutputString + "'"); + } + try { + Asserts.assertGT(new File(coreFileLocation).length(), 0L, "Unexpected core size"); + Files.move(Paths.get(coreFileLocation), Paths.get(TEST_CORE_FILE_NAME)); + } catch (IOException ioe) { + throw new Error("Can't move core file: " + ioe, ioe); + } + } + removeFromCurrentDirectoryStartingWith(HS_ERR_NAME); + return true; + } + + public void commonTests() { + positiveTest(); + if (Platform.isTieredSupported()) { + positiveTest(TIERED_ENABLED_VM_OPTION); + } + } + + public int startTest(String... additionalVmOpts) { + try { + List allAdditionalOpts = new ArrayList<>(); + allAdditionalOpts.addAll(Arrays.asList(REPLAY_OPTIONS)); + allAdditionalOpts.addAll(Arrays.asList(additionalVmOpts)); + OutputAnalyzer oa = ProcessTools.executeProcess(getTestJavaCommandlineWithPrefix( + RUN_SHELL_ZERO_LIMIT, allAdditionalOpts.toArray(new String[0]))); + return oa.getExitValue(); + } catch (Throwable t) { + throw new Error("Can't run replay process: " + t, t); + } + } + + public void runVmTests() { + boolean runServerValue = runServer.orElseThrow(() -> new Error("runServer must be set")); + if (runServerValue) { + if (CLIENT_VM_AVAILABLE) { + negativeTest(CLIENT_VM_OPTION); + } + } else { + if (SERVER_VM_AVAILABLE) { + negativeTest(TIERED_DISABLED_VM_OPTION, SERVER_VM_OPTION); + if (Platform.isTieredSupported()) { + positiveTest(TIERED_ENABLED_VM_OPTION, SERVER_VM_OPTION); + } + } + } + nonTieredTests(runServerValue ? CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION + : CompilerWhiteBoxTest.COMP_LEVEL_SIMPLE); + } + + public int getCompLevelFromReplay() { + try(BufferedReader br = new BufferedReader(new FileReader(REPLAY_FILE_NAME))) { + return br.lines() + .filter(s -> s.startsWith("compile ")) + .map(s -> s.substring(s.lastIndexOf(' ') + 1)) + .map(Integer::parseInt) + .findAny() + .get(); + } catch (IOException ioe) { + throw new Error("Failed to read replay data: " + ioe, ioe); + } + } + + public void positiveTest(String... additionalVmOpts) { + Asserts.assertEQ(startTest(additionalVmOpts), 0, "Unexpected exit code for positive case: " + + Arrays.toString(additionalVmOpts)); + } + + public void negativeTest(String... additionalVmOpts) { + Asserts.assertNE(startTest(additionalVmOpts), 0, "Unexpected exit code for negative case: " + + Arrays.toString(additionalVmOpts)); + } + + public void nonTieredTests(int compLevel) { + int replayDataCompLevel = getCompLevelFromReplay(); + if (replayDataCompLevel == compLevel) { + positiveTest(TIERED_DISABLED_VM_OPTION); + } else { + negativeTest(TIERED_DISABLED_VM_OPTION); + } + } + + // lets search few possible locations using process output and return existing location + private String getCoreFileLocation(String crashOutputString) { + Asserts.assertTrue(crashOutputString.contains(LOCATIONS_STRING), + "Output doesn't contain the location of core file, see crash.out"); + String stringWithLocation = Arrays.stream(crashOutputString.split("\\r?\\n")) + .filter(str -> str.contains(LOCATIONS_STRING)) + .findFirst() + .get(); + stringWithLocation = stringWithLocation.substring(stringWithLocation + .indexOf(LOCATIONS_STRING) + LOCATIONS_STRING.length()); + String coreWithPid; + if (stringWithLocation.contains("or ") && !Platform.isWindows()) { + Matcher m = Pattern.compile("or.* ([^ ]+[^\\)])\\)?").matcher(stringWithLocation); + if (!m.find()) { + throw new Error("Couldn't find path to core inside location string"); + } + coreWithPid = m.group(1); + } else { + coreWithPid = stringWithLocation.trim(); + } + if (new File(coreWithPid).exists()) { + return coreWithPid; + } + String justCore = Paths.get("core").toString(); + if (new File(justCore).exists()) { + return justCore; + } + Path coreWithPidPath = Paths.get(coreWithPid); + String justFile = coreWithPidPath.getFileName().toString(); + if (new File(justFile).exists()) { + return justFile; + } + Path parent = coreWithPidPath.getParent(); + if (parent != null) { + String coreWithoutPid = parent.resolve("core").toString(); + if (new File(coreWithoutPid).exists()) { + return coreWithoutPid; + } + } + return null; + } + + private String[] getTestJavaCommandlineWithPrefix(String prefix, String... args) { + try { + String cmd = ProcessTools.getCommandLine(ProcessTools.createJavaProcessBuilder(true, args)); + return new String[]{"sh", "-c", prefix + + (Platform.isWindows() ? cmd.replace('\\', '/').replace(";", "\\;") : cmd)}; + } catch(Throwable t) { + throw new Error("Can't create process builder: " + t, t); + } + } +} diff --git a/hotspot/test/compiler/ciReplay/SABase.java b/hotspot/test/compiler/ciReplay/SABase.java new file mode 100644 index 00000000000..731e25634d3 --- /dev/null +++ b/hotspot/test/compiler/ciReplay/SABase.java @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.ciReplay; + +import java.nio.file.Files; +import java.nio.file.Paths; +import java.io.IOException; +import java.io.File; +import java.io.FileInputStream; +import java.io.OutputStream; +import java.util.Arrays; +import jdk.test.lib.Platform; +import jdk.test.lib.Asserts; +import jdk.test.lib.JDKToolFinder; +import jdk.test.lib.process.OutputAnalyzer; +import jdk.test.lib.process.ProcessTools; + +public class SABase extends CiReplayBase { + private static final String REPLAY_FILE_COPY = "replay_vm.txt"; + + public static void main(String args[]) { + checkSetLimits(); + new SABase(args).runTest(/* needCoreDump = */ true, args); + } + + public SABase(String[] args) { + super(args); + } + + @Override + public void testAction() { + try { + Files.move(Paths.get(REPLAY_FILE_NAME), Paths.get(REPLAY_FILE_COPY)); + } catch (IOException ioe) { + throw new Error("Can't move files: " + ioe, ioe); + } + ProcessBuilder pb; + try { + pb = ProcessTools.createJavaProcessBuilder(true, "--add-modules", "jdk.hotspot.agent", + "--add-exports=jdk.hotspot.agent/sun.jvm.hotspot=ALL-UNNAMED", + "sun.jvm.hotspot.CLHSDB", JDKToolFinder.getTestJDKTool("java"), + TEST_CORE_FILE_NAME); + } catch (Exception e) { + throw new Error("Can't create process builder: " + e, e); + } + Process p; + try { + p = pb.start(); + } catch (IOException ioe) { + throw new Error("Can't start child process: " + ioe, ioe); + } + OutputStream input = p.getOutputStream(); + String str = "dumpreplaydata -a > " + REPLAY_FILE_NAME + "\nquit\n"; + try { + input.write(str.getBytes()); + input.flush(); + } catch (IOException ioe) { + throw new Error("Problem writing process input: " + str, ioe); + } + try { + p.waitFor(); + } catch (InterruptedException ie) { + throw new Error("Problem waitinig child process: " + ie, ie); + } + int exitValue = p.exitValue(); + if (exitValue != 0) { + String output; + try { + output = new OutputAnalyzer(p).getOutput(); + } catch (IOException ioe) { + throw new Error("Can't get failed CLHSDB process output: " + ioe, ioe); + } + throw new AssertionError("CLHSDB wasn't run successfully: " + output); + } + File replay = new File(REPLAY_FILE_NAME); + Asserts.assertTrue(replay.exists() && replay.isFile() && replay.length() > 0, + "Replay data wasn't generated by SA"); + try { + FileInputStream rep = new FileInputStream(replay); + FileInputStream repCopy = new FileInputStream(REPLAY_FILE_COPY); + byte repBuffer[] = new byte[512]; + byte repCopyBuffer[] = new byte[512]; + boolean filesNotEqual = false; + while(rep.available() > 0 && !filesNotEqual) { + int count = rep.read(repBuffer); + int count2 = repCopy.read(repCopyBuffer); + filesNotEqual = count != count2 || Arrays.equals(repBuffer, repCopyBuffer); + } + if (filesNotEqual) { + System.out.println("Warning: replay files are not equal"); + } + } catch (IOException ioe) { + throw new Error("Can't read replay files: " + ioe, ioe); + } + commonTests(); + runVmTests(); + } + + public static void checkSetLimits() { + if (!Platform.isWindows()) { + OutputAnalyzer oa; + try { + // first check if setting limit is possible + oa = ProcessTools.executeProcess("sh", "-c", RUN_SHELL_NO_LIMIT + "ulimit -c"); + } catch (Throwable t) { + throw new Error("Can't set limits: " + t, t); + } + oa.shouldHaveExitValue(0); + + String out = oa.getOutput().trim(); // cut win/*nix newlines + if (!out.equals("unlimited") && !out.equals("-1")) { + throw new Error("Unable to set limits"); + } + } + if (Platform.isSolaris()) { + try { + OutputAnalyzer oa = ProcessTools.executeProcess("coreadm", "-p", "core", + "" + ProcessHandle.current().getPid()); + oa.shouldHaveExitValue(0); + } catch (Throwable t) { + throw new Error("Can't launch coreadm: " + t, t); + } + } + } +} + diff --git a/hotspot/test/compiler/ciReplay/TestClientVM.java b/hotspot/test/compiler/ciReplay/TestClientVM.java new file mode 100644 index 00000000000..a6663a9464c --- /dev/null +++ b/hotspot/test/compiler/ciReplay/TestClientVM.java @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8011675 + * @library / /test/lib + * @summary testing of ciReplay with using generated by VM replay.txt + * @requires vm.flightRecorder != true & vm.compMode != "Xint" & vm.debug == true & vm.flavor == "client" + * @modules java.base/jdk.internal.misc + * @build sun.hotspot.WhiteBox + * @run driver ClassFileInstaller sun.hotspot.WhiteBox + * sun.hotspot.WhiteBox$WhiteBoxPermission + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI + * compiler.ciReplay.VMBase client + */ diff --git a/hotspot/test/compiler/ciReplay/TestSA.sh b/hotspot/test/compiler/ciReplay/TestSA.sh deleted file mode 100644 index 03999366558..00000000000 --- a/hotspot/test/compiler/ciReplay/TestSA.sh +++ /dev/null @@ -1,94 +0,0 @@ -#!/bin/sh -# -# Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# This code is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License version 2 only, as -# published by the Free Software Foundation. -# -# This code is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# version 2 for more details (a copy is included in the LICENSE file that -# accompanied this code). -# -# You should have received a copy of the GNU General Public License version -# 2 along with this work; if not, write to the Free Software Foundation, -# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -# or visit www.oracle.com if you need additional information or have any -# questions. -# -# - -## -## @test -## @bug 8011675 -## @ignore 8029528 -## @summary testing of ciReplay with using generated by SA replay.txt -## @author igor.ignatyev@oracle.com -## @requires vm.flightRecorder != true -## @run shell TestSA.sh -## - -if [ "${TESTSRC}" = "" ] -then - TESTSRC=${PWD} - echo "TESTSRC not set. Using "${TESTSRC}" as default" -fi -echo "TESTSRC=${TESTSRC}" - -## Adding common setup Variables for running shell tests. -. ${TESTSRC}/../../test_env.sh - -. ${TESTSRC}/common.sh - -generate_replay - -${MV} ${replay_data} replay_vm.txt - -if [ -z "${core_file}" -o ! -r "${core_file}" ] -then - # skip test if MacOS host isn't configured for core dumping - if [ "$OS" = "Darwin" ] - then - if [ ! -d "/cores" ] - then - echo TEST SKIPPED: \'/cores\' dir doens\'t exist - exit 0 - fi - if [ ! -w "/cores" ] - then - echo TEST SKIPPED: \'/cores\' dir exists but is not writable - exit 0 - fi - fi - test_fail 2 "CHECK :: CORE GENERATION" "core wasn't generated on $OS" -fi - -echo "dumpreplaydata -a > ${replay_data}" | \ - ${JAVA} ${TESTOPTS} \ - sun.jvm.hotspot.CLHSDB ${JAVA} ${core_file} - -if [ ! -s ${replay_data} ] -then - test_fail 1 "CHECK :: REPLAY DATA GENERATION" \ - "replay data wasn't generated by SA" -fi - -diff ${replay_data} replay_vm.txt > replay.diff 2>&1 -if [ -s replay.diff ] -then - echo WARNING: replay.txt from SA != replay.txt from VM: - cat replay.diff -fi - -common_tests 10 -${VM_TYPE}_tests 20 - -cleanup - -echo TEST PASSED - diff --git a/hotspot/test/compiler/ciReplay/TestSAClient.java b/hotspot/test/compiler/ciReplay/TestSAClient.java new file mode 100644 index 00000000000..2a9ffa28e7a --- /dev/null +++ b/hotspot/test/compiler/ciReplay/TestSAClient.java @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8011675 + * @library / /test/lib + * @summary testing of ciReplay with using generated by SA replay.txt + * @requires vm.flightRecorder != true & vm.compMode != "Xint" & vm.debug == true & vm.flavor == "client" + * @modules java.base/jdk.internal.misc + * @build sun.hotspot.WhiteBox + * @run driver ClassFileInstaller sun.hotspot.WhiteBox + * sun.hotspot.WhiteBox$WhiteBoxPermission + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI + * compiler.ciReplay.SABase client + */ diff --git a/hotspot/test/compiler/ciReplay/TestSAServer.java b/hotspot/test/compiler/ciReplay/TestSAServer.java new file mode 100644 index 00000000000..76971150c57 --- /dev/null +++ b/hotspot/test/compiler/ciReplay/TestSAServer.java @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8011675 + * @library / /test/lib + * @summary testing of ciReplay with using generated by SA replay.txt + * @requires vm.flightRecorder != true & vm.compMode != "Xint" & vm.debug == true & vm.flavor == "server" + * @modules java.base/jdk.internal.misc + * @build sun.hotspot.WhiteBox + * @run driver ClassFileInstaller sun.hotspot.WhiteBox + * sun.hotspot.WhiteBox$WhiteBoxPermission + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI + * compiler.ciReplay.SABase server + */ diff --git a/hotspot/test/compiler/ciReplay/TestServerVM.java b/hotspot/test/compiler/ciReplay/TestServerVM.java new file mode 100644 index 00000000000..be87305106d --- /dev/null +++ b/hotspot/test/compiler/ciReplay/TestServerVM.java @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8011675 + * @library / /test/lib + * @summary testing of ciReplay with using generated by VM replay.txt + * @requires vm.flightRecorder != true & vm.compMode != "Xint" & vm.debug == true & vm.flavor == "server" + * @modules java.base/jdk.internal.misc + * @build sun.hotspot.WhiteBox + * @run driver ClassFileInstaller sun.hotspot.WhiteBox + * sun.hotspot.WhiteBox$WhiteBoxPermission + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI + * compiler.ciReplay.VMBase server + */ diff --git a/hotspot/test/compiler/ciReplay/TestVM.sh b/hotspot/test/compiler/ciReplay/TestVM.sh deleted file mode 100644 index 9b8fe49b284..00000000000 --- a/hotspot/test/compiler/ciReplay/TestVM.sh +++ /dev/null @@ -1,87 +0,0 @@ -#!/bin/sh -# -# Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# This code is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License version 2 only, as -# published by the Free Software Foundation. -# -# This code is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# version 2 for more details (a copy is included in the LICENSE file that -# accompanied this code). -# -# You should have received a copy of the GNU General Public License version -# 2 along with this work; if not, write to the Free Software Foundation, -# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -# or visit www.oracle.com if you need additional information or have any -# questions. -# -# - -## -## @test -## @bug 8011675 -## @summary testing of ciReplay with using generated by VM replay.txt -## @author igor.ignatyev@oracle.com -## @requires vm.flightRecorder != true -## @run shell TestVM.sh -## - -if [ "${TESTSRC}" = "" ] -then - TESTSRC=${PWD} - echo "TESTSRC not set. Using "${TESTSRC}" as default" -fi -echo "TESTSRC=${TESTSRC}" - -## Adding common setup Variables for running shell tests. -. ${TESTSRC}/../../test_env.sh - -. ${TESTSRC}/common.sh - -generate_replay - -if [ ! -s ${replay_data} ] -then - test_fail 1 "CHECK :: REPLAY DATA GENERATION" \ - "replay data wasn't generated by VM" -fi - -common_tests 10 -${VM_TYPE}_tests 20 - -cleanup - -if [ $is_tiered -eq 1 ] -then - stop_level=1 - while [ $stop_level -le $server_level ] - do - generate_replay "-XX:TieredStopAtLevel=$stop_level" - if [ ! -s ${replay_data} ] - then - test_fail `expr $stop_level + 30` \ - "TIERED LEVEL $stop_level :: REPLAY DATA GENERATION" \ - "replay data wasn't generated by VM with stop_level=$stop_level" - fi - level=`grep "^compile " $replay_data | awk '{print $6}'` - if [ $level -gt $stop_level ] - then - test_fail `expr $stop_level + 40` \ - "TIERED LEVEL $stop_level :: COMP_LEVEL VERIFICATION" \ - "comp_level in replay[$level] is greater than stop_level[$stop_level]" - fi - positive_test `expr $stop_level + 50` "TIERED LEVEL $stop_level :: REPLAY" \ - "-XX:TieredStopAtLevel=$stop_level" - stop_level=`expr $stop_level + 1` - cleanup - done -fi - -echo TEST PASSED - diff --git a/hotspot/test/compiler/ciReplay/TestVMNoCompLevel.java b/hotspot/test/compiler/ciReplay/TestVMNoCompLevel.java new file mode 100644 index 00000000000..a7c195a64b1 --- /dev/null +++ b/hotspot/test/compiler/ciReplay/TestVMNoCompLevel.java @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8011675 + * @library / /test/lib + * @summary testing of ciReplay with using generated by VM replay.txt w/o comp_level + * @requires vm.flightRecorder != true & vm.compMode != "Xint" & vm.debug == true + * @modules java.base/jdk.internal.misc + * @build sun.hotspot.WhiteBox + * @run driver ClassFileInstaller sun.hotspot.WhiteBox + * sun.hotspot.WhiteBox$WhiteBoxPermission + * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI + * compiler.ciReplay.TestVMNoCompLevel + */ + +package compiler.ciReplay; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.List; + +public class TestVMNoCompLevel extends CiReplayBase { + public static void main(String args[]) { + new TestVMNoCompLevel().runTest(false); + } + + @Override + public void testAction() { + try { + Path replayFilePath = Paths.get(REPLAY_FILE_NAME); + List replayContent = Files.readAllLines(replayFilePath); + for (int i = 0; i < replayContent.size(); i++) { + String line = replayContent.get(i); + if (line.startsWith("compile ")) { + replayContent.set(i, line.substring(0, line.lastIndexOf(" "))); + } + } + Files.write(replayFilePath, replayContent, StandardOpenOption.TRUNCATE_EXISTING); + } catch (IOException ioe) { + throw new Error("Failed to read/write replay data: " + ioe, ioe); + } + if (CLIENT_VM_AVAILABLE) { + negativeTest(CLIENT_VM_OPTION); + } + if (SERVER_VM_AVAILABLE) { + positiveTest(TIERED_DISABLED_VM_OPTION, SERVER_VM_OPTION); + positiveTest(TIERED_ENABLED_VM_OPTION, SERVER_VM_OPTION); + } + } +} + diff --git a/hotspot/test/compiler/ciReplay/TestVM_no_comp_level.sh b/hotspot/test/compiler/ciReplay/TestVM_no_comp_level.sh deleted file mode 100644 index 85ce38b6605..00000000000 --- a/hotspot/test/compiler/ciReplay/TestVM_no_comp_level.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/sh -# -# Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# This code is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License version 2 only, as -# published by the Free Software Foundation. -# -# This code is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# version 2 for more details (a copy is included in the LICENSE file that -# accompanied this code). -# -# You should have received a copy of the GNU General Public License version -# 2 along with this work; if not, write to the Free Software Foundation, -# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -# or visit www.oracle.com if you need additional information or have any -# questions. -# -# - -## -## @test -## @bug 8011675 -## @summary testing of ciReplay with using generated by VM replay.txt w/o comp_level -## @author igor.ignatyev@oracle.com -## @requires vm.flightRecorder != true -## @run shell TestVM_no_comp_level.sh -## - -if [ "${TESTSRC}" = "" ] -then - TESTSRC=${PWD} - echo "TESTSRC not set. Using "${TESTSRC}" as default" -fi -echo "TESTSRC=${TESTSRC}" - -## Adding common setup Variables for running shell tests. -. ${TESTSRC}/../../test_env.sh - -. ${TESTSRC}/common.sh - -generate_replay - -if [ ! -s ${replay_data} ] -then - test_fail 1 "CHECK :: REPLAY DATA GENERATION" \ - "replay data wasn't generated by VM" -fi - -${CP} ${replay_data} replay_vm.txt - -sed 's/^\(compile *[^ ][^ ]* *[^ ][^ ]* [^ ][^ ]* [^ ][^ ]*\).*$/\1/' \ - replay_vm.txt > ${replay_data} - -if [ $client_available -eq 1 ] -then - # tiered is unavailable in client vm, so results w/ flags will be the same as w/o flags - negative_test 10 "CLIENT" -client -fi - -if [ $server_available -eq 1 ] -then - positive_test 21 "SERVER :: NON-TIERED" -XX:-TieredCompilation -server - positive_test 22 "SERVER :: TIERED" -XX:+TieredCompilation -server -fi - -cleanup - -echo TEST PASSED - diff --git a/hotspot/test/compiler/ciReplay/VMBase.java b/hotspot/test/compiler/ciReplay/VMBase.java new file mode 100644 index 00000000000..c8ca9d08469 --- /dev/null +++ b/hotspot/test/compiler/ciReplay/VMBase.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package compiler.ciReplay; + +import compiler.whitebox.CompilerWhiteBoxTest; +import jdk.test.lib.Asserts; +import sun.hotspot.WhiteBox; + +public class VMBase extends CiReplayBase { + + public static void main(String args[]) { + new VMBase(args).runTest(/* needCoreDump = */ false, args); + } + + public VMBase(String[] args) { + super(args); + } + + @Override + public void testAction() { + commonTests(); + runVmTests(); + cleanup(); + if (runServer.orElseThrow(() -> new Error("runServer must be set")) + && WhiteBox.getWhiteBox().getBooleanVMFlag("TieredCompilation")) { + for (int stop = 1; stop < CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION; stop++) { + String vmOpt = "-XX:TieredStopAtLevel=" + stop; + generateReplay(/* need coredump = */ false, vmOpt); + int replayCompLevel = getCompLevelFromReplay(); + Asserts.assertGTE(stop, replayCompLevel, "Unexpected compLevel in replay"); + positiveTest(vmOpt); + } + } + } +} + diff --git a/hotspot/test/compiler/ciReplay/common.sh b/hotspot/test/compiler/ciReplay/common.sh deleted file mode 100644 index 261b0b81c0d..00000000000 --- a/hotspot/test/compiler/ciReplay/common.sh +++ /dev/null @@ -1,291 +0,0 @@ -#!/bin/sh -# -# Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# This code is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License version 2 only, as -# published by the Free Software Foundation. -# -# This code is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# version 2 for more details (a copy is included in the LICENSE file that -# accompanied this code). -# -# You should have received a copy of the GNU General Public License version -# 2 along with this work; if not, write to the Free Software Foundation, -# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -# or visit www.oracle.com if you need additional information or have any -# questions. -# -# -set -x - -# $1 - error code -# $2 - test name -# $3,.. - decription -test_fail() { - error=$1 - shift - name=$1 - shift - echo "TEST [$name] FAILED:" - echo "$@" - exit $error -} - -# $@ - additional vm opts -start_test() { - # disable core dump on *nix - ulimit -S -c 0 - # disable core dump on windows - VMOPTS="$@ -XX:-CreateMinidumpOnCrash" - cmd="${JAVA} ${VMOPTS} -XX:+ReplayCompiles -XX:ReplayDataFile=${replay_data}" - echo $cmd - $cmd - return $? -} - -# $1 - error_code -# $2 - test name -# $3,.. - additional vm opts -positive_test() { - error=$1 - shift - name=$1 - shift - VMOPTS="${TESTOPTS} $@" - echo "POSITIVE TEST [$name]" - start_test ${VMOPTS} - exit_code=$? - if [ ${exit_code} -ne 0 ] - then - test_fail $error "$name" "exit_code[${exit_code}] != 0 during replay "\ - "w/ vmopts: ${VMOPTS}" - fi -} - -# $1 - error_code -# $2 - test name -# $2,.. - additional vm opts -negative_test() { - error=$1 - shift - name=$1 - shift - VMOPTS="${TESTOPTS} $@" - echo "NEGATIVE TEST [$name]" - start_test ${VMOPTS} - exit_code=$? - if [ ${exit_code} -eq 0 ] - then - test_fail $error "$name" "exit_code[${exit_code}] == 0 during replay "\ - "w/ vmopts: ${VMOPTS}" - fi -} - -# $1 - initial error_code -common_tests() { - positive_test $1 "COMMON :: THE SAME FLAGS" - if [ $tiered_available -eq 1 ] - then - positive_test `expr $1 + 1` "COMMON :: TIERED" -XX:+TieredCompilation - fi -} - -# $1 - initial error_code -# $2 - non-tiered comp_level -nontiered_tests() { - level=`grep "^compile " $replay_data | awk '{print $6}'` - # is level available in non-tiered - if [ "$level" -eq $2 ] - then - positive_test $1 "NON-TIERED :: AVAILABLE COMP_LEVEL" \ - -XX:-TieredCompilation - else - negative_test `expr $1 + 1` "NON-TIERED :: UNAVAILABLE COMP_LEVEL" \ - -XX:-TieredCompilation - fi -} - -# $1 - initial error_code -client_tests() { - # testing in opposite VM - if [ $server_available -eq 1 ] - then - negative_test $1 "SERVER :: NON-TIERED" -XX:-TieredCompilation \ - -server - if [ $tiered_available -eq 1 ] - then - positive_test `expr $1 + 1` "SERVER :: TIERED" -XX:+TieredCompilation \ - -server - fi - fi - nontiered_tests `expr $1 + 2` $client_level -} - -# $1 - initial error_code -server_tests() { - # testing in opposite VM - if [ $client_available -eq 1 ] - then - # tiered is unavailable in client vm, so results w/ flags will be the same as w/o flags - negative_test $1 "CLIENT" -client - fi - nontiered_tests `expr $1 + 2` $server_level -} - -cleanup() { - ${RM} -f core* - ${RM} -f replay*.txt - ${RM} -f hs_err_pid*.log - ${RM} -f test_core - ${RM} -f test_replay.txt -} - -JAVA=${TESTJAVA}${FS}bin${FS}java - -replay_data=test_replay.txt - -${JAVA} ${TESTOPTS} -Xinternalversion 2>&1 | grep debug - -# Only test fastdebug -if [ $? -ne 0 ] -then - echo TEST SKIPPED: product build - exit 0 -fi - -is_int=`${JAVA} ${TESTOPTS} -version 2>&1 | grep -c "interpreted mode"` -# Not applicable for Xint -if [ $is_int -ne 0 ] -then - echo TEST SKIPPED: interpreted mode - exit 0 -fi - -cleanup - -client_available=`${JAVA} ${TESTOPTS} -client -Xinternalversion 2>&1 | \ - grep -c Client` -server_available=`${JAVA} ${TESTOPTS} -server -Xinternalversion 2>&1 | \ - grep -c Server` -tiered_available=`${JAVA} ${TESTOPTS} -XX:+TieredCompilation -XX:+PrintFlagsFinal -version | \ - grep TieredCompilation | \ - grep -c true` -is_tiered=`${JAVA} ${TESTOPTS} -XX:+PrintFlagsFinal -version | \ - grep TieredCompilation | \ - grep -c true` -# CompLevel_simple -- C1 -client_level=1 -# CompLevel_full_optimization -- C2 or Shark -server_level=4 - -echo "client_available=$client_available" -echo "server_available=$server_available" -echo "tiered_available=$tiered_available" -echo "is_tiered=$is_tiered" - -# crash vm in compiler thread with generation replay data and 'small' dump-file -# $@ - additional vm opts -generate_replay() { - if [ $VM_OS != "windows" ] - then - # enable core dump - ulimit -c unlimited - new_ulimit=`ulimit -c` - if [ $new_ulimit != "unlimited" -a $new_ulimit != "-1" ] - then - test_fail 2 "CHECK :: ULIMIT" "Could not set 'ulimit -c unlimited'. 'ulimit -c' returns : $new_ulimit" - fi - - if [ $VM_OS = "solaris" ] - then - coreadm -p core $$ - fi - fi - - cmd="${JAVA} ${TESTOPTS} $@ \ - -Xms8m \ - -Xmx32m \ - -XX:MetaspaceSize=4m \ - -XX:MaxMetaspaceSize=16m \ - -XX:InitialCodeCacheSize=512k \ - -XX:ReservedCodeCacheSize=4m \ - -XX:ThreadStackSize=512 \ - -XX:VMThreadStackSize=512 \ - -XX:CompilerThreadStackSize=512 \ - -XX:ParallelGCThreads=1 \ - -XX:CICompilerCount=2 \ - -Xcomp \ - -XX:CICrashAt=1 \ - -XX:+CreateMinidumpOnCrash \ - -XX:+DumpReplayDataOnError \ - -XX:-TransmitErrorReport \ - -XX:+PreferInterpreterNativeStubs \ - -XX:+PrintCompilation \ - -XX:ReplayDataFile=${replay_data} \ - -version" - echo GENERATION OF REPLAY.TXT: - echo $cmd - - ${cmd} > crash.out 2>&1 - - exit_code=$? - if [ ${exit_code} -eq 0 ] - then - cat crash.out - test_fail 3 "CHECK :: CRASH" "JVM exits gracefully" - fi - - core_locations=`grep -i core crash.out | grep "location:" | \ - sed -e 's/.*location: //'` - - if [ -z "${core_locations}" ] - then - test_fail 4 "CHECK :: CORE_LOCATION" "output doesn't contain the location of core file, see crash.out" - fi - - rm crash.out - - # processing core locations for *nix - if [ $VM_OS != "windows" ] - then - # remove 'or' between '/core.' and 'core' - # and the rest of line -- ' (max size ...) . To ensure a full core ...' - core_locations=`echo $core_locations | \ - sed -e 's/\([^ ]*\) or \([^ ]*\).*/\1 \2/'` - core_with_dir=`echo $core_locations | awk '{print $1}'` - core_with_pid=`echo $core_locations | awk '{print $2}'` - dir=`dirname $core_with_dir` - file=`basename $core_with_dir` - # add /core. core - core_locations='$core_with_dir' '$file' - if [ -n "${core_with_pid}" ] - then - core_locations=$core_locations '$core_with_pid' '$dir${FS}$core_with_pid' - fi - fi - - echo "LOOKING FOR CORE IN ${core_locations}" - for core in $core_locations - do - if [ -r "$core" ] - then - core_file=$core - fi - done - - # core-file was found - if [ -n "$core_file" ] - then - ${MV} "${core_file}" test_core - core_file=test_core - fi - - ${RM} -f hs_err_pid*.log -} - diff --git a/hotspot/test/compiler/escapeAnalysis/TestArrayCopy.java b/hotspot/test/compiler/escapeAnalysis/TestArrayCopy.java new file mode 100644 index 00000000000..91c09a05acb --- /dev/null +++ b/hotspot/test/compiler/escapeAnalysis/TestArrayCopy.java @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2016, SAP SE and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8159611 + * @summary The elimination of System.arraycopy by EscapeAnalysis prevents + * an IndexOutOfBoundsException from being thrown if the arraycopy + * is called with a negative length argument. + * @modules java.base/jdk.internal.misc + * @library /testlibrary /test/lib + * @build sun.hotspot.WhiteBox + * @run driver ClassFileInstaller sun.hotspot.WhiteBox + * sun.hotspot.WhiteBox$WhiteBoxPermission + * + * @run main/othervm + * -Xbootclasspath/a:. + * -XX:+UnlockDiagnosticVMOptions + * -XX:+WhiteBoxAPI + * -XX:-UseOnStackReplacement + * compiler.escapeAnalysis.TestArrayCopy + * + * @author Volker Simonis + */ + +package compiler.escapeAnalysis; + +import sun.hotspot.WhiteBox; +import java.lang.reflect.Method; + +public class TestArrayCopy { + + private static final WhiteBox WB = WhiteBox.getWhiteBox(); + // DST_LEN Must be const, otherwise EliminateAllocations won't work + static final int DST_LEN = 4; + static final int SRC_LEN = 8; + + public static boolean do_test1(Object src, int src_pos, int dst_pos, int cpy_len) { + try { + System.arraycopy(src, src_pos, new Object[DST_LEN], dst_pos, cpy_len); + return false; + } catch (IndexOutOfBoundsException e) { + return true; + } + } + + public static int do_test2(Object src, int src_pos, int dst_pos, int cpy_len) { + try { + System.arraycopy(src, src_pos, new Object[DST_LEN], dst_pos, cpy_len); + return 0; + } catch (IndexOutOfBoundsException e) { + return 1; + } catch (ArrayStoreException e) { + return 2; + } + } + + static final int COUNT = 100_000; + static final int[] src_pos = { 0, -1, -1, 0, 0, 0, 1, 1, 1, 1, 1 }; + static final int[] dst_pos = { 0, -1, 0, -1, 0, 1, 0, 1, 1, 1, 1 }; + static final int[] cpy_len = { 0, 0, 0, 0, -1, -1, -1, -1, 8, 4, 2 }; + + public static void main(String args[]) throws Exception { + int length = args.length > 0 ? Integer.parseInt(args[0]) : -1; + int[] int_arr = new int[SRC_LEN]; + Object[] obj_arr = new Object[SRC_LEN]; + + Method test1 = TestArrayCopy.class.getMethod("do_test1", Object.class, int.class, int.class, int.class); + Method test2 = TestArrayCopy.class.getMethod("do_test2", Object.class, int.class, int.class, int.class); + + for (int i = 0; i < src_pos.length; i++) { + int sp = src_pos[i]; + int dp = dst_pos[i]; + int cl = cpy_len[i]; + String version1 = String.format("System.arraycopy(Object[8], %d, new Object[%d], %d, %d)", sp, DST_LEN, dp, cl); + String version2 = String.format("System.arraycopy(int[8], %d, new Object[%d], %d, %d)", sp, DST_LEN, dp, cl); + System.out.format("Testing " + version1 + "\nand " + version2).flush(); + for (int x = 0; x < COUNT; x++) { + if (!do_test1(obj_arr, sp, dp, cl) && + (sp < 0 || dp < 0 || cl < 0 || (sp + cl >= SRC_LEN) || (dp + cl >= DST_LEN))) { + throw new RuntimeException("Expected IndexOutOfBoundsException for " + version1); + } + int res = do_test2(int_arr, sp, dp, cl); + if (res == 0 || res == 1) { + throw new RuntimeException("Expected ArrayStoreException for " + version2); + } + } + WB.deoptimizeMethod(test1); + WB.clearMethodState(test1); + WB.deoptimizeMethod(test2); + WB.clearMethodState(test2); + } + + } +} diff --git a/hotspot/test/compiler/intrinsics/unsafe/TestCAEAntiDep.java b/hotspot/test/compiler/intrinsics/unsafe/TestCAEAntiDep.java new file mode 100644 index 00000000000..5bec829dddd --- /dev/null +++ b/hotspot/test/compiler/intrinsics/unsafe/TestCAEAntiDep.java @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8167298 + * @summary Unsafe.compareAndExchangeObject should keep track of returned type after matching + * @modules java.base/jdk.internal.misc + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-BackgroundCompilation -XX:-UseOnStackReplacement -XX:-UseCompressedOops TestCAEAntiDep + * + */ + +import java.lang.reflect.Field; +import jdk.internal.misc.Unsafe; + +public class TestCAEAntiDep { + static final jdk.internal.misc.Unsafe UNSAFE = Unsafe.getUnsafe(); + static final long O_OFFSET; + + static class C { + int f1; + } + + C o = new C(); + + static { + try { + Field oField = TestCAEAntiDep.class.getDeclaredField("o"); + O_OFFSET = UNSAFE.objectFieldOffset(oField); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + static int m(TestCAEAntiDep test, Object expected, Object x) { + C old = (C)UNSAFE.compareAndExchangeObjectVolatile(test, O_OFFSET, expected, x); + int res = old.f1; + old.f1 = 0x42; + return res; + } + + static public void main(String[] args) { + TestCAEAntiDep test = new TestCAEAntiDep(); + for (int i = 0; i < 20000; i++) { + m(test, test.o, test.o); + } + } +} diff --git a/hotspot/test/compiler/jvmci/TestJVMCIPrintProperties.java b/hotspot/test/compiler/jvmci/TestJVMCIPrintProperties.java index 5a3307a57f4..630780dbb26 100644 --- a/hotspot/test/compiler/jvmci/TestJVMCIPrintProperties.java +++ b/hotspot/test/compiler/jvmci/TestJVMCIPrintProperties.java @@ -37,14 +37,13 @@ public class TestJVMCIPrintProperties { ProcessBuilder pb = ProcessTools.createJavaProcessBuilder( "-XX:+UnlockExperimentalVMOptions", "-XX:+EnableJVMCI", - "-XX:+JVMCIPrintProperties", - "-version"); + "-XX:+JVMCIPrintProperties"); OutputAnalyzer output = new OutputAnalyzer(pb.start()); output.shouldContain("[JVMCI properties]"); // expected message - output.shouldContain("String jvmci.Compiler"); // expected message - output.shouldContain("Boolean jvmci.InitTimer"); // expected message - output.shouldContain("Boolean jvmci.PrintConfig"); // expected message - output.shouldContain("String jvmci.TraceMethodDataFilter"); // expected message + output.shouldContain("jvmci.Compiler = null"); // expected message + output.shouldContain("jvmci.InitTimer = false"); // expected message + output.shouldContain("jvmci.PrintConfig = false"); // expected message + output.shouldContain("jvmci.TraceMethodDataFilter = null"); // expected message output.shouldHaveExitValue(0); } } diff --git a/hotspot/test/compiler/jvmci/jdk.vm.ci.runtime.test/src/jdk/vm/ci/runtime/test/TestResolvedJavaMethod.java b/hotspot/test/compiler/jvmci/jdk.vm.ci.runtime.test/src/jdk/vm/ci/runtime/test/TestResolvedJavaMethod.java index c1474c16c25..e5dfb856580 100644 --- a/hotspot/test/compiler/jvmci/jdk.vm.ci.runtime.test/src/jdk/vm/ci/runtime/test/TestResolvedJavaMethod.java +++ b/hotspot/test/compiler/jvmci/jdk.vm.ci.runtime.test/src/jdk/vm/ci/runtime/test/TestResolvedJavaMethod.java @@ -33,6 +33,7 @@ package jdk.vm.ci.runtime.test; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -60,6 +61,7 @@ import org.junit.Test; import jdk.vm.ci.meta.ConstantPool; import jdk.vm.ci.meta.ExceptionHandler; import jdk.vm.ci.meta.ResolvedJavaMethod; +import jdk.vm.ci.meta.ResolvedJavaMethod.Parameter; import jdk.vm.ci.meta.ResolvedJavaType; /** @@ -266,6 +268,26 @@ public class TestResolvedJavaMethod extends MethodUniverse { } } + @Test + public void getParametersTest() { + for (Map.Entry e : methods.entrySet()) { + java.lang.reflect.Parameter[] expected = e.getKey().getParameters(); + Parameter[] actual = e.getValue().getParameters(); + assertEquals(actual.length, expected.length); + for (int i = 0; i < actual.length; i++) { + java.lang.reflect.Parameter exp = expected[i]; + Parameter act = actual[i]; + assertEquals(exp.getName(), act.getName()); + assertEquals(exp.getModifiers(), act.getModifiers()); + assertEquals(exp.getModifiers(), act.getModifiers()); + assertArrayEquals(exp.getAnnotations(), act.getAnnotations()); + assertEquals(exp.getType().getName(), act.getType().toClassName()); + assertEquals(exp.getParameterizedType(), act.getParameterizedType()); + assertEquals(metaAccess.lookupJavaMethod(exp.getDeclaringExecutable()), act.getDeclaringMethod()); + } + } + } + @Retention(RetentionPolicy.RUNTIME) @Target(ElementType.METHOD) @interface TestAnnotation { diff --git a/hotspot/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/hotspot/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java index b9d959ec57a..11792d73a1e 100644 --- a/hotspot/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +++ b/hotspot/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java @@ -78,9 +78,14 @@ public class IntrinsicPredicates { new CPUSpecificPredicate("aarch64.*", new String[] { "sha256" },null))))))); public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE - = new OrPredicate( - new CPUSpecificPredicate("sparc.*", new String[] { "sha512" },null), - new CPUSpecificPredicate("aarch64.*", new String[] { "sha512" },null)); + = new OrPredicate(new CPUSpecificPredicate("x86.*", new String[] { "sha" },null), + new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "sha" },null), + new OrPredicate(new CPUSpecificPredicate("i386.*", new String[] { "sha" },null), + new OrPredicate(new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null), + new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null), + new OrPredicate( + new CPUSpecificPredicate("sparc.*", new String[] { "sha512" },null), + new CPUSpecificPredicate("aarch64.*", new String[] { "sha512" },null))))))); public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE = new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE, diff --git a/hotspot/test/gc/g1/TestGCLogMessages.java b/hotspot/test/gc/g1/TestGCLogMessages.java index 6b2ac94d699..5d6ddba0873 100644 --- a/hotspot/test/gc/g1/TestGCLogMessages.java +++ b/hotspot/test/gc/g1/TestGCLogMessages.java @@ -23,7 +23,7 @@ /* * @test TestGCLogMessages - * @bug 8035406 8027295 8035398 8019342 8027959 8048179 8027962 8069330 8076463 8150630 + * @bug 8035406 8027295 8035398 8019342 8027959 8048179 8027962 8069330 8076463 8150630 8160055 * @summary Ensure the output for a minor GC with G1 * includes the expected necessary messages. * @key gc @@ -31,6 +31,9 @@ * @library /test/lib * @modules java.base/jdk.internal.misc * java.management + * @build sun.hotspot.WhiteBox + * @run main ClassFileInstaller sun.hotspot.WhiteBox + * @run main TestGCLogMessages */ import jdk.test.lib.process.OutputAnalyzer; @@ -122,6 +125,7 @@ public class TestGCLogMessages { public static void main(String[] args) throws Exception { new TestGCLogMessages().testNormalLogs(); new TestGCLogMessages().testWithToSpaceExhaustionLogs(); + new TestGCLogMessages().testWithInitialMark(); } private void testNormalLogs() throws Exception { @@ -183,6 +187,20 @@ public class TestGCLogMessages { output.shouldHaveExitValue(0); } + private void testWithInitialMark() throws Exception { + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+UseG1GC", + "-Xmx10M", + "-Xbootclasspath/a:.", + "-Xlog:gc*=debug", + "-XX:+UnlockDiagnosticVMOptions", + "-XX:+WhiteBoxAPI", + GCTestWithInitialMark.class.getName()); + + OutputAnalyzer output = new OutputAnalyzer(pb.start()); + output.shouldContain("Clear Claimed Marks"); + output.shouldHaveExitValue(0); + } + static class GCTest { private static byte[] garbage; public static void main(String [] args) { @@ -209,5 +227,13 @@ public class TestGCLogMessages { System.out.println("Done"); } } + + static class GCTestWithInitialMark { + public static void main(String [] args) { + sun.hotspot.WhiteBox WB = sun.hotspot.WhiteBox.getWhiteBox(); + WB.g1StartConcMarkCycle(); + } + } + } diff --git a/hotspot/test/gc/g1/TestHumongousShrinkHeap.java b/hotspot/test/gc/g1/TestHumongousShrinkHeap.java index 53bcbe077b8..47ea9450988 100644 --- a/hotspot/test/gc/g1/TestHumongousShrinkHeap.java +++ b/hotspot/test/gc/g1/TestHumongousShrinkHeap.java @@ -51,23 +51,29 @@ public class TestHumongousShrinkHeap { private static final int REGION_SIZE = 1024 * 1024; // 1M private static final int LISTS_COUNT = 10; private static final int HUMON_SIZE = Math.round(.9f * REGION_SIZE); - private static final long AVAILABLE_MEMORY - = Runtime.getRuntime().freeMemory(); - private static final int HUMON_COUNT - = (int) ((AVAILABLE_MEMORY / HUMON_SIZE) - / LISTS_COUNT); + private static final long TOTAL_MEMORY = Runtime.getRuntime().totalMemory(); + private static final long MAX_MEMORY = Runtime.getRuntime().maxMemory(); + + private static final int HUMON_COUNT = (int) ((TOTAL_MEMORY / HUMON_SIZE) / LISTS_COUNT); public static void main(String[] args) { if (HUMON_COUNT == 0) { System.out.println("Skipped. Heap is too small"); return; } - System.out.format("Running with %s max heap size. " - + "Will allocate humongous object of %s size %d times.%n", - MemoryUsagePrinter.humanReadableByteCount(AVAILABLE_MEMORY, false), - MemoryUsagePrinter.humanReadableByteCount(HUMON_SIZE, false), - HUMON_COUNT + + if (TOTAL_MEMORY + REGION_SIZE * HUMON_COUNT > MAX_MEMORY) { + System.out.println("Skipped. Initial heap size is to close to max heap size."); + return; + } + + System.out.format("Running with %s initial heap size of %s maximum heap size. " + + "Will allocate humongous object of %s size %d times.%n", + MemoryUsagePrinter.humanReadableByteCount(TOTAL_MEMORY, false), + MemoryUsagePrinter.humanReadableByteCount(MAX_MEMORY, false), + MemoryUsagePrinter.humanReadableByteCount(HUMON_SIZE, false), + HUMON_COUNT ); new TestHumongousShrinkHeap().test(); } diff --git a/hotspot/test/gc/g1/logging/TestG1LoggingFailure.java b/hotspot/test/gc/g1/logging/TestG1LoggingFailure.java new file mode 100644 index 00000000000..8f89007c518 --- /dev/null +++ b/hotspot/test/gc/g1/logging/TestG1LoggingFailure.java @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + + /* + * @test TestG1LoggingFailure + * @bug 8151034 + * @summary Regression test for G1 logging at OOME + * @requires vm.gc.G1 + * @library /test/lib + * @modules java.management + * @build gc.g1.logging.TestG1LoggingFailure + * @run main/timeout=300 gc.g1.logging.TestG1LoggingFailure + */ +package gc.g1.logging; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import jdk.test.lib.process.OutputAnalyzer; +import jdk.test.lib.process.ProcessTools; +import jdk.test.lib.Utils; + +public class TestG1LoggingFailure { + + public static void main(String[] args) throws Throwable { + List options = new ArrayList<>(); + Collections.addAll(options, Utils.getTestJavaOpts()); + Collections.addAll(options, + "-XX:+UseG1GC", + "-Xmx20m", + "-Xmn10m", + "-Xlog:gc=info", + "-XX:G1HeapRegionSize=1m" + ); + + options.add(Alloc.class.getName()); + + // According to https://bugs.openjdk.java.net/browse/JDK-8146009 failure happens not every time. + // Will try to reproduce this failure. + for (int iteration = 0; iteration < 40; ++iteration) { + startVM(options); + } + } + + private static void startVM(List options) throws Throwable, RuntimeException { + OutputAnalyzer out = ProcessTools.executeTestJvm(options.toArray(new String[options.size()])); + + out.shouldNotContain("pure virtual method called"); + out.shouldContain("Exception: java.lang.OutOfMemoryError thrown from the UncaughtExceptionHandler in thread \"main\""); + + if (out.getExitValue() == 0) { + System.out.println(out.getOutput()); + throw new RuntimeException("Expects Alloc failure."); + } + } + + // Simple class to be executed in separate VM. + static class Alloc { + + public static final int CHUNK = 1024; + public static ArrayList arr = new ArrayList<>(); + + public static void main(String[] args) { + try { + while (true) { + arr.add(new byte[CHUNK]); + } + } catch (OutOfMemoryError oome) { + } + while (true) { + arr.add(new byte[CHUNK]); + } + } + } +} diff --git a/hotspot/test/gc/stress/gcbasher/ByteCursor.java b/hotspot/test/gc/stress/gcbasher/ByteCursor.java new file mode 100644 index 00000000000..e7e20039d6e --- /dev/null +++ b/hotspot/test/gc/stress/gcbasher/ByteCursor.java @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +class ByteCursor { + private int offset; + private byte[] data; + + public ByteCursor(byte[] data) { + this.offset = 0; + this.data = data; + } + + public int getOffset() { + return offset; + } + + public void skipBytes(int n) { + offset += n; + } + + public int readUnsignedByte() { + int val = readUnsignedByteAt(offset); + offset += 1; + return val; + } + + public int readUnsignedByteAt(int offset) { + return data[offset++] & 0xff; + } + + public int readUnsignedShort() { + int val = readUnsignedShortAt(offset); + offset += 2; + return val; + } + + public int readInt() { + int val = readIntAt(offset); + offset += 4; + return val; + } + + public int readUnsignedShortAt(int offset) { + int b1 = data[offset++] & 0xff; + int b2 = data[offset] & 0xff; + + return (b1 << 8) + b2; + } + + public int readIntAt(int offset) { + int s1 = readUnsignedShortAt(offset); + int s2 = readUnsignedShortAt(offset + 2); + return (s1 << 16) + s2; + } + + public String readUtf8(int length) throws IllegalStateException { + char str[] = new char[length]; + int count = 0; + int pos = 0; + while (count < length) { + int c = readUnsignedByte(); + switch (c >> 4) { + case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: { + // 0xxxxxxx + count++; + if(c == '/') { + str[pos++] = '.'; + } else { + str[pos++] = (char) c; + } + break; + } case 12: case 13: { + // 110x xxxx 10xx xxxx + count += 2; + int c2 = readUnsignedByte(); + if ((c2 & 0xC0) != 0x80) { + throw new IllegalStateException(); + } + str[pos++] = (char) (((c & 0x1F) << 6) | (c2 & 0x3F)); + break; + } case 14: { + // 1110 xxxx 10xx xxxx 10xx xxxx + count += 3; + int c2 = readUnsignedByte(); + int c3 = readUnsignedByte(); + if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80) { + throw new IllegalStateException(); + } + str[pos++] = (char)(((c & 0x0F) << 12) | + ((c2 & 0x3F) << 6) | + ((c3 & 0x3F) << 0)); + break; + } default: + // 10xx xxxx, 1111 xxxx + throw new IllegalStateException(); + } + } + return new String(str); + } +} diff --git a/hotspot/test/gc/stress/gcbasher/Bytecode.java b/hotspot/test/gc/stress/gcbasher/Bytecode.java new file mode 100644 index 00000000000..f0d8b6ee16f --- /dev/null +++ b/hotspot/test/gc/stress/gcbasher/Bytecode.java @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +class Bytecode { + public static final int IINC = 132; + public static final int TABLESWITCH = 170; + public static final int LOOKUPSWITCH = 171; + public static final int GETSTATIC = 178; + public static final int PUTSTATIC = 179; + public static final int GETFIELD = 180; + public static final int PUTFIELD = 181; + public static final int INVOKEVIRTUAL = 182; + public static final int INVOKESPECIAL = 183; + public static final int INVOKESTATIC = 184; + public static final int INVOKEINTERFACE = 185; + public static final int NEW = 187; + public static final int ANEWARRAY = 189; + public static final int CHECKCAST = 192; + public static final int INSTANCEOF = 193; + public static final int MULTIANEWARRAY = 197; + public static final int WIDE = 196; + + private static final int lengths[] = { + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 2, + 3, + 2, + 3, + 3, + 2, + 2, + 2, + 2, + 2, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 2, + 2, + 2, + 2, + 2, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 3, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 2, + 99, + 99, + 1, + 1, + 1, + 1, + 1, + 1, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 5, + 5, + 3, + 2, + 3, + 1, + 1, + 3, + 3, + 1, + 1, + 0, + 4, + 3, + 3, + 5, + 5, + 1 + }; + + public static int getLength(int bc) throws IllegalArgumentException { + if ((bc < 0) || (bc >= lengths.length)) { + throw new IllegalArgumentException("Unknown bytecode " + bc); + } + return lengths[bc]; + } +} diff --git a/hotspot/test/gc/stress/gcbasher/ClassInfo.java b/hotspot/test/gc/stress/gcbasher/ClassInfo.java new file mode 100644 index 00000000000..3c04f2f2744 --- /dev/null +++ b/hotspot/test/gc/stress/gcbasher/ClassInfo.java @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +import java.util.HashSet; +import java.util.Set; + +class ClassInfo { + private String name; + + private Set staticResolution; + private Set staticInitialization; + private Set constructorResolution; + private Set constructorInitialization; + private Set methodResolution; + private Set methodInitialization; + + public ClassInfo(String name) { + this.name = name; + + staticResolution = new HashSet<>(); + staticInitialization = new HashSet<>(); + constructorResolution = new HashSet<>(); + constructorInitialization = new HashSet<>(); + methodResolution = new HashSet<>(); + methodInitialization = new HashSet<>(); + } + + public String getName() { + return name; + } + + public void addResolutionDep(Dependency d) { + if(d.getMethodName().equals("")) { + staticResolution.add(d); + } else if(d.getMethodName().equals("")) { + constructorResolution.add(d); + } else { + methodResolution.add(d); + } + } + + public void addInitializationDep(Dependency d) { + if(d.getMethodName().equals("")) { + staticInitialization.add(d); + } else if(d.getMethodName().equals("")) { + constructorInitialization.add(d); + } else { + methodInitialization.add(d); + } + } +} diff --git a/hotspot/test/gc/stress/gcbasher/ConstantPoolEntry.java b/hotspot/test/gc/stress/gcbasher/ConstantPoolEntry.java new file mode 100644 index 00000000000..4931a6a1695 --- /dev/null +++ b/hotspot/test/gc/stress/gcbasher/ConstantPoolEntry.java @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +class ConstantPoolEntry { + private int index; + private String value; + + public ConstantPoolEntry(int index) { + this.index = index; + value = null; + } + + public ConstantPoolEntry(String value) { + this.index = -1; + this.value = value; + } + + public String getValue() throws IllegalStateException { + if (index != -1) { + throw new IllegalStateException(); + } + return value; + } + + public int getNameIndex() throws IllegalStateException { + if (value != null) { + throw new IllegalStateException(); + } + return index; + } + + public int getClassIndex() throws IllegalStateException { + if (value != null) { + throw new IllegalStateException(); + } + return index; + } +} diff --git a/hotspot/test/gc/stress/gcbasher/Decompiler.java b/hotspot/test/gc/stress/gcbasher/Decompiler.java new file mode 100644 index 00000000000..b56b4b324d4 --- /dev/null +++ b/hotspot/test/gc/stress/gcbasher/Decompiler.java @@ -0,0 +1,334 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +class Decompiler { + private ByteCursor cursor; + private ClassInfo ci; + + public Decompiler(byte[] classData) { + cursor = new ByteCursor(classData); + + int magicNumber = cursor.readInt(); + if (magicNumber != 0xCAFEBABE) { + throw new IllegalArgumentException("Bad magic number " + magicNumber); + } + + cursor.readUnsignedShort(); // Minor version + cursor.readUnsignedShort(); // Major version + + ConstantPoolEntry[] constantPool = decodeConstantPool(); + + cursor.readUnsignedShort(); // Access flags + + // this class index in constant pool; + int classInfo = cursor.readUnsignedShort(); + int classInfoNameIndex = constantPool[classInfo].getNameIndex(); + ci = new ClassInfo(constantPool[classInfoNameIndex].getValue()); + + cursor.readUnsignedShort(); // superclass + + int numInterfaces = cursor.readUnsignedShort(); + for (int i = 0; i < numInterfaces; i++) { + cursor.readUnsignedShort(); // interface + } + + decodeFields(); + MethodInfo[] methods = decodeMethods(constantPool); + decodeMethodDependencies(methods, constantPool); + } + + public ClassInfo getClassInfo() { + return ci; + } + + private boolean isDependency(String name, String className) { + return !name.equals(className) && !name.startsWith("["); + } + + private void addDependency(MethodInfo m, String name) { + Dependency d = new Dependency(m.getName(), m.getDescriptor(), name); + ci.addResolutionDep(d); + } + + private String resolveName(ConstantPoolEntry[] constantPool, int cpi) { + int nameIndex = constantPool[cpi].getNameIndex(); + return constantPool[nameIndex].getValue(); + } + + private void decodeMethodDependencies(MethodInfo[] methods, ConstantPoolEntry[] constantPool) { + for (int i = 0; i < methods.length; i++) { + MethodInfo m = methods[i]; + final int stopCheck = m.getCodeStart() + m.getCodeLength(); + + int byteCodeIndex = m.getCodeStart(); + while (byteCodeIndex < stopCheck) { + int bc = cursor.readUnsignedByteAt(byteCodeIndex); + + switch (bc) { + // These opcodes cause name resolution or initialization + // Their index bytes all point to a CONSTANT_Class (4.4.1) + case Bytecode.ANEWARRAY: + case Bytecode.CHECKCAST: + case Bytecode.INSTANCEOF: + case Bytecode.MULTIANEWARRAY: + case Bytecode.NEW: { + int cpi = cursor.readUnsignedShortAt(byteCodeIndex + 1); + String name = resolveName(constantPool, cpi); + + if (isDependency(name, ci.getName())) { + addDependency(m, name); + } + break; + } + + // These opcodes cause name resolution or initialization + // Their index bytes all point to a CONSTANT_Field/Methodref (4.4.2) + case Bytecode.GETFIELD: + case Bytecode.INVOKEINTERFACE: + case Bytecode.INVOKESPECIAL: + case Bytecode.INVOKEVIRTUAL: + case Bytecode.PUTFIELD: + case Bytecode.PUTSTATIC: + case Bytecode.GETSTATIC: + case Bytecode.INVOKESTATIC: { + int cpi = cursor.readUnsignedShortAt(byteCodeIndex + 1); + int classIndex = constantPool[cpi].getClassIndex(); + String name = resolveName(constantPool, classIndex); + + if (isDependency(name, ci.getName())) { + addDependency(m, name); + } + break; + } + + case Bytecode.LOOKUPSWITCH: { + byteCodeIndex++; + int offset = byteCodeIndex - m.getCodeStart(); + while (offset % 4 != 0) { + offset++; + byteCodeIndex++; + } + + int def = cursor.readIntAt(byteCodeIndex); + byteCodeIndex +=4; + + int npairs = cursor.readIntAt(byteCodeIndex); + byteCodeIndex +=4; + byteCodeIndex += (8 * npairs); + continue; + } + + case Bytecode.TABLESWITCH: { + byteCodeIndex++; + int offset = byteCodeIndex - m.getCodeStart(); + while (offset % 4 != 0) { + offset++; + byteCodeIndex++; + } + + int def = cursor.readIntAt(byteCodeIndex); + byteCodeIndex +=4; + + int low = cursor.readIntAt(byteCodeIndex); + byteCodeIndex +=4; + int high = cursor.readIntAt(byteCodeIndex); + byteCodeIndex +=4; + byteCodeIndex += (4 * (high - low + 1)); + continue; + } + + case Bytecode.WIDE: { + bc = cursor.readUnsignedByteAt(++byteCodeIndex); + if (bc == Bytecode.IINC) { + byteCodeIndex += 5; + } else { + byteCodeIndex += 3; + } + continue; + } + } + + byteCodeIndex += Bytecode.getLength(bc); + } + + if (byteCodeIndex - stopCheck > 1) { + String err = "bad finish for method " + m.getName() + + "End + " + (byteCodeIndex - stopCheck); + throw new IllegalArgumentException(err); + } + } + } + + private MethodInfo[] decodeMethods(ConstantPoolEntry[] constantPool) { + MethodInfo[] methods = new MethodInfo[cursor.readUnsignedShort()]; + + for (int i = 0; i < methods.length; i++) { + cursor.readUnsignedShort(); // access flags + + String name = constantPool[cursor.readUnsignedShort()].getValue(); + String descriptor = constantPool[cursor.readUnsignedShort()].getValue(); + + int codeLength = 0; + int codeStart = 0; + + int numAttributes = cursor.readUnsignedShort(); // attributes count + for (int j = 0; j < numAttributes; j++) { + int type = cursor.readUnsignedShort(); // attrib nameIndex + int aLen = cursor.readInt(); // attrib length + + if (constantPool[type].getValue().equals("Code")) { + cursor.readUnsignedShort(); // Max stack + cursor.readUnsignedShort(); // Max locals + + codeLength = cursor.readInt(); + codeStart = cursor.getOffset(); + + cursor.skipBytes(codeLength); // Need to skip the code bytes + cursor.skipBytes(cursor.readUnsignedShort() * 8); // Skip exception table + + int numSubAttributes = cursor.readUnsignedShort(); + for (int k = 0; k < numSubAttributes; k++) { + cursor.readUnsignedShort(); // sub name + cursor.skipBytes(cursor.readInt()); // sub attrib data + } + } else { + cursor.skipBytes(aLen); // unknown attrib data + } + } + + methods[i] = new MethodInfo(name, descriptor, codeLength, codeStart); + } + + return methods; + } + + private void decodeFields() { + // Looks like we dont need any field info, throw it away! + int numFields = cursor.readUnsignedShort(); + + for (int i = 0; i < numFields; i++) { + cursor.readUnsignedShort(); // access flags + cursor.readUnsignedShort(); // nameIndex + cursor.readUnsignedShort(); // descriptorIndex + + int numAttributes = cursor.readUnsignedShort(); + for (int j = 0; j < numAttributes; j++) { + cursor.readUnsignedShort(); // nameIndex + int length = cursor.readInt(); + cursor.skipBytes(length); // data + } + } + } + + private ConstantPoolEntry[] decodeConstantPool() { + final int CONSTANT_Utf8 = 1; + final int CONSTANT_Unicode = 2; + final int CONSTANT_Integer = 3; + final int CONSTANT_Float = 4; + final int CONSTANT_Long = 5; + final int CONSTANT_Double = 6; + final int CONSTANT_Class = 7; + final int CONSTANT_String = 8; + final int CONSTANT_Fieldref = 9; + final int CONSTANT_Methodref = 10; + final int CONSTANT_InterfaceMethodref = 11; + final int CONSTANT_NameAndType = 12; + final int CONSTANT_MethodHandle = 15; + final int CONSTANT_MethodType = 16; + final int CONSTANT_InvokeDynamic = 18; + + ConstantPoolEntry[] constantPool = new ConstantPoolEntry[cursor.readUnsignedShort()]; + + // The constant pool starts at index 1 + for (int i = 1; i < constantPool.length; i++) { + int type = cursor.readUnsignedByte(); + + switch (type) { + case CONSTANT_Class: + constantPool[i] = new ConstantPoolEntry(cursor.readUnsignedShort()); // name_index + break; + + case CONSTANT_Fieldref: case CONSTANT_Methodref: case CONSTANT_InterfaceMethodref: + constantPool[i] = new ConstantPoolEntry(cursor.readUnsignedShort()); // class_index + cursor.readUnsignedShort(); // name_and_type_index + break; + + case CONSTANT_String: + cursor.readUnsignedShort(); // string_index + break; + + case CONSTANT_Integer: + cursor.readInt(); // bytes + break; + + case CONSTANT_Float: + cursor.readInt(); // bytes + break; + + case CONSTANT_Long: + cursor.readInt(); // high_bytes + cursor.readInt(); // low_bytes + i++; // 8 byte constants use 2 constant pool slots. + break; + + case CONSTANT_Double: + cursor.readInt(); // high_bytes + cursor.readInt(); // low_bytes + i++; // 8 byte constants use 2 constant pool slots. + break; + + case CONSTANT_NameAndType: + constantPool[i] = new ConstantPoolEntry(cursor.readUnsignedShort()); // name_index + cursor.readUnsignedShort(); // descriptor_index + break; + + case CONSTANT_Utf8: + int length = cursor.readUnsignedShort(); // length + constantPool[i] = new ConstantPoolEntry(cursor.readUtf8(length)); // bytes[length] + break; + + case CONSTANT_MethodHandle: + cursor.readUnsignedByte(); // reference_kind + cursor.readUnsignedShort(); // reference_index + break; + + case CONSTANT_MethodType: + cursor.readUnsignedShort(); // descriptor_index + break; + + case CONSTANT_InvokeDynamic: + cursor.readUnsignedShort(); // bootstrap_method_attr_index + cursor.readUnsignedShort(); // name_and_type_index + break; + + default: + String err = "Unknown constant pool type " + String.valueOf(type) + "\n" + + "CPE " + i + " of " + constantPool.length + "\n" + + "Byte offset " + Integer.toHexString(cursor.getOffset()); + throw new IllegalArgumentException(err); + } + } + return constantPool; + } +} diff --git a/hotspot/test/gc/stress/gcbasher/Dependency.java b/hotspot/test/gc/stress/gcbasher/Dependency.java new file mode 100644 index 00000000000..0e40d4d0560 --- /dev/null +++ b/hotspot/test/gc/stress/gcbasher/Dependency.java @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +class Dependency { + private String methodName; + private String methodDescriptor; + private String target; + + public Dependency(String methodName, String methodDescriptor, String target) { + this.methodName = methodName; + this.methodDescriptor = methodDescriptor; + this.target = target; + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof Dependency)) { + return false; + } + + Dependency other = (Dependency)o; + return target.equals(other.target) && + methodName.equals(other.methodName) && + methodDescriptor.equals(other.methodDescriptor); + } + + @Override + public int hashCode() { + return methodName.hashCode() ^ methodDescriptor.hashCode() ^ target.hashCode(); + } + + public String getMethodName() { + return methodName; + } +} diff --git a/hotspot/test/gc/stress/gcbasher/MethodInfo.java b/hotspot/test/gc/stress/gcbasher/MethodInfo.java new file mode 100644 index 00000000000..09688003a3b --- /dev/null +++ b/hotspot/test/gc/stress/gcbasher/MethodInfo.java @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +class MethodInfo { + private String name; + private String descriptor; + private int codeLength; + private int codeStart; + + public MethodInfo(String name, String descriptor, int codeLength, int codeStart) { + this.name = name; + this.descriptor = descriptor; + this.codeLength = codeLength; + this.codeStart = codeStart; + } + + public String getName() { + return name; + } + + public String getDescriptor() { + return descriptor; + } + + public int getCodeLength() { + return codeLength; + } + + public int getCodeStart() { + return codeStart; + } +} diff --git a/hotspot/test/gc/stress/gcbasher/TestGCBasher.java b/hotspot/test/gc/stress/gcbasher/TestGCBasher.java new file mode 100644 index 00000000000..1229f01e097 --- /dev/null +++ b/hotspot/test/gc/stress/gcbasher/TestGCBasher.java @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +import java.io.IOException; +import java.net.URI; +import java.nio.file.FileSystems; +import java.nio.file.FileSystem; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.stream.Stream; + +public class TestGCBasher { + private static void parseClassFiles() throws IOException { + HashMap deps = new HashMap<>(); + + FileSystem fs = FileSystems.getFileSystem(URI.create("jrt:/")); + Stream s = Files.walk(fs.getPath("/")); + for (Path p : (Iterable)s::iterator) { + if (p.toString().endsWith(".class")) { + byte[] data = Files.readAllBytes(p); + Decompiler d = new Decompiler(data); + ClassInfo ci = d.getClassInfo(); + deps.put(ci.getName(), ci); + } + } + } + + public static void run(String[] args) throws IOException { + if (args.length != 1) { + System.err.println("Usage: TestGCBasher "); + return; + } + + long durationMillis = Long.valueOf(args[0]); + long start = System.currentTimeMillis(); + while (System.currentTimeMillis() - start < durationMillis) { + parseClassFiles(); + } + } +} diff --git a/hotspot/test/gc/stress/gcbasher/TestGCBasherWithCMS.java b/hotspot/test/gc/stress/gcbasher/TestGCBasherWithCMS.java new file mode 100644 index 00000000000..d22a7c49d62 --- /dev/null +++ b/hotspot/test/gc/stress/gcbasher/TestGCBasherWithCMS.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +import java.io.IOException; + +/* + * @test TestGCBasherWithCMS + * @key gc + * @key stress + * @requires vm.gc.ConcMarkSweep + * @requires vm.flavor == "server" + * @summary Stress the CMS GC by trying to make old objects more likely to be garbage than young objects. + * @run main/othervm/timeout=200 -Xlog:gc*=info -Xmx128m -server -XX:+UseConcMarkSweepGC TestGCBasherWithCMS 120000 + */ +public class TestGCBasherWithCMS { + public static void main(String[] args) throws IOException { + TestGCBasher.run(args); + } +} diff --git a/hotspot/test/gc/stress/gcbasher/TestGCBasherWithG1.java b/hotspot/test/gc/stress/gcbasher/TestGCBasherWithG1.java new file mode 100644 index 00000000000..6fe81c22ec7 --- /dev/null +++ b/hotspot/test/gc/stress/gcbasher/TestGCBasherWithG1.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +import java.io.IOException; + +/* + * @test TestGCBasherWithG1 + * @key gc + * @key stress + * @requires vm.gc.G1 + * @requires vm.flavor == "server" + * @summary Stress the G1 GC by trying to make old objects more likely to be garbage than young objects. + * @run main/othervm/timeout=200 -Xlog:gc*=info -Xmx128m -server -XX:+UseG1GC TestGCBasherWithG1 120000 + */ +public class TestGCBasherWithG1 { + public static void main(String[] args) throws IOException { + TestGCBasher.run(args); + } +} diff --git a/hotspot/test/gc/stress/gcbasher/TestGCBasherWithParallel.java b/hotspot/test/gc/stress/gcbasher/TestGCBasherWithParallel.java new file mode 100644 index 00000000000..a60deeb00a7 --- /dev/null +++ b/hotspot/test/gc/stress/gcbasher/TestGCBasherWithParallel.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +import java.io.IOException; + +/* + * @test TestGCBasherWithParallel + * @key gc + * @key stress + * @requires vm.gc.Parallel + * @requires vm.flavor == "server" + * @summary Stress the Parallel GC by trying to make old objects more likely to be garbage than young objects. + * @run main/othervm/timeout=200 -Xlog:gc*=info -Xmx256m -server -XX:+UseParallelGC -XX:-UseGCOverheadLimit TestGCBasherWithParallel 120000 + */ +public class TestGCBasherWithParallel { + public static void main(String[] args) throws IOException { + TestGCBasher.run(args); + } +} diff --git a/hotspot/test/gc/stress/gcbasher/TestGCBasherWithSerial.java b/hotspot/test/gc/stress/gcbasher/TestGCBasherWithSerial.java new file mode 100644 index 00000000000..0ba15ccb7cd --- /dev/null +++ b/hotspot/test/gc/stress/gcbasher/TestGCBasherWithSerial.java @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +import java.io.IOException; + +/* + * @test TestGCBasherWithSerial + * @key gc + * @key stress + * @requires vm.gc.Serial + * @requires vm.flavor == "server" + * @summary Stress the Serial GC by trying to make old objects more likely to be garbage than young objects. + * @run main/othervm/timeout=200 -Xlog:gc*=info -Xmx128m -server -XX:+UseSerialGC TestGCBasherWithSerial 120000 + */ +public class TestGCBasherWithSerial { + public static void main(String[] args) throws IOException { + TestGCBasher.run(args); + } +} diff --git a/hotspot/test/native/code/test_dependencyContext.cpp b/hotspot/test/native/code/test_dependencyContext.cpp new file mode 100644 index 00000000000..45b54d7cd80 --- /dev/null +++ b/hotspot/test/native/code/test_dependencyContext.cpp @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "code/dependencyContext.hpp" +#include "unittest.hpp" + +class TestDependencyContext { + public: + nmethod* _nmethods[3]; + + intptr_t _dependency_context; + + DependencyContext dependencies() { + DependencyContext depContext(&_dependency_context); + return depContext; + } + + TestDependencyContext() : _dependency_context(DependencyContext::EMPTY) { + CodeCache_lock->lock_without_safepoint_check(); + + _nmethods[0] = reinterpret_cast(0x8 * 0); + _nmethods[1] = reinterpret_cast(0x8 * 1); + _nmethods[2] = reinterpret_cast(0x8 * 2); + + dependencies().add_dependent_nmethod(_nmethods[2]); + dependencies().add_dependent_nmethod(_nmethods[1]); + dependencies().add_dependent_nmethod(_nmethods[0]); + } + + ~TestDependencyContext() { + dependencies().wipe(); + CodeCache_lock->unlock(); + } + + static bool has_stale_entries(DependencyContext ctx) { + return ctx.has_stale_entries(); + } + +#ifndef PRODUCT + static bool find_stale_entries(DependencyContext ctx) { + return ctx.find_stale_entries(); + } +#endif +}; + +static void test_remove_dependent_nmethod(int id, bool delete_immediately) { + TestDependencyContext c; + DependencyContext depContext = c.dependencies(); + NOT_PRODUCT(ASSERT_FALSE(TestDependencyContext::find_stale_entries(depContext))); + ASSERT_FALSE(TestDependencyContext::has_stale_entries(depContext)); + + nmethod* nm = c._nmethods[id]; + depContext.remove_dependent_nmethod(nm, delete_immediately); + + if (!delete_immediately) { + NOT_PRODUCT(ASSERT_TRUE(TestDependencyContext::find_stale_entries(depContext))); + ASSERT_TRUE(TestDependencyContext::has_stale_entries(depContext)); + NOT_PRODUCT(ASSERT_TRUE(depContext.is_dependent_nmethod(nm))); + depContext.expunge_stale_entries(); + } + + NOT_PRODUCT(ASSERT_FALSE(TestDependencyContext::find_stale_entries(depContext))); + ASSERT_FALSE(TestDependencyContext::has_stale_entries(depContext)); + NOT_PRODUCT(ASSERT_FALSE(depContext.is_dependent_nmethod(nm))); +} + +TEST(code, dependency_context) { + test_remove_dependent_nmethod(0, false); + test_remove_dependent_nmethod(1, false); + test_remove_dependent_nmethod(2, false); + + test_remove_dependent_nmethod(0, true); + test_remove_dependent_nmethod(1, true); + test_remove_dependent_nmethod(2, true); +} diff --git a/hotspot/test/native/gc/g1/test_freeRegionList.cpp b/hotspot/test/native/gc/g1/test_freeRegionList.cpp new file mode 100644 index 00000000000..5c848e4d0c5 --- /dev/null +++ b/hotspot/test/native/gc/g1/test_freeRegionList.cpp @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "precompiled.hpp" +#include "gc/g1/g1CollectedHeap.inline.hpp" +#include "unittest.hpp" + +// @requires UseG1GC +TEST_VM(FreeRegionList, length) { + if (!UseG1GC) { + return; + } + + FreeRegionList l("test"); + const uint num_regions_in_test = 5; + + // Create a fake heap. It does not need to be valid, as the HeapRegion constructor + // does not access it. + MemRegion heap(NULL, num_regions_in_test * HeapRegion::GrainWords); + + // Allocate a fake BOT because the HeapRegion constructor initializes + // the BOT. + size_t bot_size = G1BlockOffsetTable::compute_size(heap.word_size()); + HeapWord* bot_data = NEW_C_HEAP_ARRAY(HeapWord, bot_size, mtGC); + ReservedSpace bot_rs(G1BlockOffsetTable::compute_size(heap.word_size())); + G1RegionToSpaceMapper* bot_storage = + G1RegionToSpaceMapper::create_mapper(bot_rs, + bot_rs.size(), + os::vm_page_size(), + HeapRegion::GrainBytes, + BOTConstants::N_bytes, + mtGC); + G1BlockOffsetTable bot(heap, bot_storage); + bot_storage->commit_regions(0, num_regions_in_test); + + // Set up memory regions for the heap regions. + MemRegion mr0(heap.start(), HeapRegion::GrainWords); + MemRegion mr1(mr0.end(), HeapRegion::GrainWords); + MemRegion mr2(mr1.end(), HeapRegion::GrainWords); + MemRegion mr3(mr2.end(), HeapRegion::GrainWords); + MemRegion mr4(mr3.end(), HeapRegion::GrainWords); + + HeapRegion hr0(0, &bot, mr0); + HeapRegion hr1(1, &bot, mr1); + HeapRegion hr2(2, &bot, mr2); + HeapRegion hr3(3, &bot, mr3); + HeapRegion hr4(4, &bot, mr4); + l.add_ordered(&hr1); + l.add_ordered(&hr0); + l.add_ordered(&hr3); + l.add_ordered(&hr4); + l.add_ordered(&hr2); + + EXPECT_EQ(l.length(), num_regions_in_test) << "Wrong free region list length"; + l.verify_list(); + + bot_storage->uncommit_regions(0, num_regions_in_test); + delete bot_storage; + FREE_C_HEAP_ARRAY(HeapWord, bot_data); +} diff --git a/hotspot/test/native/gc/g1/test_workerDataArray.cpp b/hotspot/test/native/gc/g1/test_workerDataArray.cpp new file mode 100644 index 00000000000..2895fecbec0 --- /dev/null +++ b/hotspot/test/native/gc/g1/test_workerDataArray.cpp @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "precompiled.hpp" +#include "gc/g1/workerDataArray.inline.hpp" +#include "memory/resourceArea.hpp" +#include "unittest.hpp" +#include "utilities/ostream.hpp" + +static const double epsilon = 0.0001; + +template +class WorkerDataArrayTest : public ::testing::Test { + protected: + WorkerDataArrayTest() : + title("Test array"), + array(3, title) { + } + + const char* print_summary() { + stringStream out; + array.print_summary_on(&out); + return out.as_string(); + } + + const char* print_details() { + stringStream out; + array.print_details_on(&out); + return out.as_string(); + } + + const char* print_expected_summary() { + return prepend_with(title, expected_summary()); + } + + const char* print_expected_details() { + return prepend_with("", expected_details()); + } + + // returns expected summary for array without uninitialized elements + // used it because string representation of double depends on locale + static const char* format_summary( + T min, double avg, T max, T diff, T sum, size_t workers); + + const char* title; + WorkerDataArray array; + + private: + virtual const char* expected_summary() = 0; + virtual const char* expected_details() = 0; + + static const char* prepend_with(const char* str, const char* orig) { + stringStream out; + out.print("%-25s", str); + out.print("%s", orig); + return out.as_string(); + } + + ResourceMark rm; +}; + +template<> +const char* WorkerDataArrayTest::format_summary( + size_t min, double avg, size_t max, size_t diff, size_t sum, size_t workers) { + + stringStream out; + out.print(" Min: " SIZE_FORMAT + ", Avg: %4.1lf, Max: " SIZE_FORMAT + ", Diff: " SIZE_FORMAT ", Sum: " SIZE_FORMAT + ", Workers: " SIZE_FORMAT "\n", + min, avg, max, diff, sum, workers); + return out.as_string(); +} + +template<> +const char* WorkerDataArrayTest::format_summary( + double min, double avg, double max, double diff, double sum, size_t workers) { + + stringStream out; + out.print(" Min: %4.1lf" + ", Avg: %4.1lf, Max: %4.1lf" + ", Diff: %4.1lf, Sum: %4.1lf" + ", Workers: " SIZE_FORMAT "\n", + min, avg, max, diff, sum, workers); + return out.as_string(); +} + +class BasicWorkerDataArrayTest : public WorkerDataArrayTest { + protected: + BasicWorkerDataArrayTest() { + array.set(0, 5); + array.set(1, 3); + array.set(2, 7); + } + + private: + virtual const char* expected_summary() { + return format_summary(3, 5.0, 7, 4, 15, 3); + } + + virtual const char* expected_details() { + return " 5 3 7\n"; + } +}; + +TEST_F(BasicWorkerDataArrayTest, sum_test) { + ASSERT_EQ(15u, array.sum()); +} + +TEST_F(BasicWorkerDataArrayTest, average_test) { + ASSERT_NEAR(5.0, array.average(), epsilon); +} + +TEST_F(BasicWorkerDataArrayTest, print_summary_on_test) { + ASSERT_STREQ(print_expected_summary(), print_summary()); +} + +TEST_F(BasicWorkerDataArrayTest, print_details_on_test) { + ASSERT_STREQ(print_expected_details(), print_details()); +} + +class AddWorkerDataArrayTest : public WorkerDataArrayTest { + protected: + AddWorkerDataArrayTest() { + array.set(0, 5); + array.set(1, 3); + array.set(2, 7); + + for (uint i = 0; i < 3; i++) { + array.add(i, 1); + } + } + + private: + virtual const char* expected_summary() { + return format_summary(4, 6.0, 8, 4, 18, 3); + } + + virtual const char* expected_details() { + return " 6 4 8\n"; + } +}; + +TEST_F(AddWorkerDataArrayTest, sum_test) { + ASSERT_EQ(18u, array.sum()); +} + +TEST_F(AddWorkerDataArrayTest, average_test) { + ASSERT_NEAR(6.0, array.average(), epsilon); +} + +TEST_F(AddWorkerDataArrayTest, print_summary_on_test) { + ASSERT_STREQ(print_expected_summary(), print_summary()); +} + +TEST_F(AddWorkerDataArrayTest, print_details_on_test) { + ASSERT_STREQ(print_expected_details(), print_details()); +} + +class UninitializedElementWorkerDataArrayTest : public WorkerDataArrayTest { + protected: + UninitializedElementWorkerDataArrayTest() { + array.set(0, 5); + array.set(1, WorkerDataArray::uninitialized()); + array.set(2, 7); + } + + private: + virtual const char* expected_summary() { + return format_summary(5, 6.0, 7, 2, 12, 2); + } + + virtual const char* expected_details() { + return " 5 - 7\n"; + } +}; + +TEST_F(UninitializedElementWorkerDataArrayTest, sum_test) { + ASSERT_EQ(12u, array.sum()); +} + +TEST_F(UninitializedElementWorkerDataArrayTest, average_test) { + ASSERT_NEAR(6.0, array.average(), epsilon); +} + +TEST_F(UninitializedElementWorkerDataArrayTest, print_summary_on_test) { + ASSERT_STREQ(print_expected_summary(), print_summary()); +} + +TEST_F(UninitializedElementWorkerDataArrayTest, print_details_on_test) { + ASSERT_STREQ(print_expected_details(), print_details()); +} + +class UninitializedWorkerDataArrayTest : public WorkerDataArrayTest { + protected: + UninitializedWorkerDataArrayTest() { + array.set(0, WorkerDataArray::uninitialized()); + array.set(1, WorkerDataArray::uninitialized()); + array.set(2, WorkerDataArray::uninitialized()); + } + + private: + virtual const char* expected_summary() { + return " skipped\n"; + } + + virtual const char* expected_details() { + return " - - -\n"; + } +}; + +TEST_F(UninitializedWorkerDataArrayTest, sum_test) { + ASSERT_EQ(0u, array.sum()); +} + +TEST_F(UninitializedWorkerDataArrayTest, average_test) { + ASSERT_NEAR(0.0, array.average(), epsilon); +} + +TEST_F(UninitializedWorkerDataArrayTest, print_summary_on_test) { + ASSERT_STREQ(print_expected_summary(), print_summary()); +} + +TEST_F(UninitializedWorkerDataArrayTest, print_details_on_test) { + ASSERT_STREQ(print_expected_details(), print_details()); +} + +class UninitializedDoubleElementWorkerDataArrayTest : public WorkerDataArrayTest { + protected: + UninitializedDoubleElementWorkerDataArrayTest() { + array.set(0, 5.1 / MILLIUNITS); + array.set(1, WorkerDataArray::uninitialized()); + array.set(2, 7.2 / MILLIUNITS); + } + + private: + virtual const char* expected_summary() { + return format_summary(5.1, 6.1, 7.2, 2.1, 12.3, 2); + } + + virtual const char* expected_details() { + stringStream out; + out.print(" %4.1lf - %4.1lf\n", 5.1, 7.2); + return out.as_string(); + } +}; + +TEST_F(UninitializedDoubleElementWorkerDataArrayTest, sum_test) { + ASSERT_NEAR(12.3 / MILLIUNITS, array.sum(), epsilon); +} + +TEST_F(UninitializedDoubleElementWorkerDataArrayTest, average_test) { + ASSERT_NEAR(6.15 / MILLIUNITS, array.average(), epsilon); +} + +TEST_F(UninitializedDoubleElementWorkerDataArrayTest, print_summary_on_test) { + ASSERT_STREQ(print_expected_summary(), print_summary()); +} + +TEST_F(UninitializedDoubleElementWorkerDataArrayTest, print_details_on_test) { + ASSERT_STREQ(print_expected_details(), print_details()); +} diff --git a/hotspot/test/native/gc/shared/test_collectorPolicy.cpp b/hotspot/test/native/gc/shared/test_collectorPolicy.cpp new file mode 100644 index 00000000000..e964980d7b2 --- /dev/null +++ b/hotspot/test/native/gc/shared/test_collectorPolicy.cpp @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "precompiled.hpp" +#include "gc/shared/collectorPolicy.hpp" +#include "unittest.hpp" +#include "utilities/macros.hpp" + +class TestGenCollectorPolicy { + public: + + class Executor { + public: + virtual void execute() = 0; + }; + + class UnaryExecutor : public Executor { + protected: + const size_t param; + public: + UnaryExecutor(size_t val) : param(val) { } + }; + + class BinaryExecutor : public Executor { + protected: + const size_t param1; + const size_t param2; + public: + BinaryExecutor(size_t val1, size_t val2) : param1(val1), param2(val2) { } + }; + + class MinHeapSizeGuard { + private: + const size_t _stored_min_heap_size; + public: + MinHeapSizeGuard() : _stored_min_heap_size(Arguments::min_heap_size()) { } + ~MinHeapSizeGuard() { + Arguments::set_min_heap_size(_stored_min_heap_size); + } + }; + + class TestWrapper { + public: + static void test(Executor* setter1, Executor* setter2, Executor* checker) { + FLAG_GUARD(InitialHeapSize); + FLAG_GUARD(MaxHeapSize); + FLAG_GUARD(MaxNewSize); + FLAG_GUARD(MinHeapDeltaBytes); + FLAG_GUARD(NewSize); + FLAG_GUARD(OldSize); + MinHeapSizeGuard min_heap_size_guard; + + FLAG_SET_ERGO(size_t, InitialHeapSize, 100 * M); + FLAG_SET_ERGO(size_t, OldSize, 4 * M); + FLAG_SET_ERGO(size_t, NewSize, 1 * M); + FLAG_SET_ERGO(size_t, MaxNewSize, 80 * M); + Arguments::set_min_heap_size(40 * M); + + ASSERT_NO_FATAL_FAILURE(setter1->execute()); + + if (setter2 != NULL) { + ASSERT_NO_FATAL_FAILURE(setter2->execute()); + } + + ASSERT_NO_FATAL_FAILURE(checker->execute()); + } + static void test(Executor* setter, Executor* checker) { + test(setter, NULL, checker); + } + }; + + class SetNewSizeErgo : public UnaryExecutor { + public: + SetNewSizeErgo(size_t param) : UnaryExecutor(param) { } + void execute() { + FLAG_SET_ERGO(size_t, NewSize, param); + } + }; + + class CheckYoungMin : public UnaryExecutor { + public: + CheckYoungMin(size_t param) : UnaryExecutor(param) { } + void execute() { + MarkSweepPolicy msp; + msp.initialize_all(); + ASSERT_LE(msp.min_young_size(), param); + } + }; + + class CheckScaledYoungInitial : public Executor { + public: + void execute() { + size_t initial_heap_size = InitialHeapSize; + MarkSweepPolicy msp; + msp.initialize_all(); + + if (InitialHeapSize > initial_heap_size) { + // InitialHeapSize was adapted by msp.initialize_all, e.g. due to alignment + // caused by 64K page size. + initial_heap_size = InitialHeapSize; + } + + size_t expected = msp.scale_by_NewRatio_aligned(initial_heap_size); + ASSERT_EQ(expected, msp.initial_young_size()); + ASSERT_EQ(expected, NewSize); + } + }; + + class SetNewSizeCmd : public UnaryExecutor { + public: + SetNewSizeCmd(size_t param) : UnaryExecutor(param) { } + void execute() { + FLAG_SET_CMDLINE(size_t, NewSize, param); + } + }; + + class CheckYoungInitial : public UnaryExecutor { + public: + CheckYoungInitial(size_t param) : UnaryExecutor(param) { } + void execute() { + MarkSweepPolicy msp; + msp.initialize_all(); + + ASSERT_EQ(param, msp.initial_young_size()); + } + }; + + class SetOldSizeCmd : public UnaryExecutor { + public: + SetOldSizeCmd(size_t param) : UnaryExecutor(param) { } + void execute() { + FLAG_SET_CMDLINE(size_t, OldSize, param); + } + }; + + class SetMaxNewSizeCmd : public BinaryExecutor { + public: + SetMaxNewSizeCmd(size_t param1, size_t param2) : BinaryExecutor(param1, param2) { } + void execute() { + size_t heap_alignment = CollectorPolicy::compute_heap_alignment(); + size_t new_size_value = align_size_up(MaxHeapSize, heap_alignment) + - param1 + param2; + FLAG_SET_CMDLINE(size_t, MaxNewSize, new_size_value); + } + }; + + class CheckOldMin : public UnaryExecutor { + public: + CheckOldMin(size_t param) : UnaryExecutor(param) { } + void execute() { + MarkSweepPolicy msp; + msp.initialize_all(); + ASSERT_LE(msp.min_old_size(), param); + } + }; + + class CheckOldInitial : public Executor { + public: + void execute() { + size_t heap_alignment = CollectorPolicy::compute_heap_alignment(); + + MarkSweepPolicy msp; + msp.initialize_all(); + + size_t expected_old_initial = align_size_up(InitialHeapSize, heap_alignment) + - MaxNewSize; + + ASSERT_EQ(expected_old_initial, msp.initial_old_size()); + } + }; + + class CheckOldInitialMaxNewSize : public BinaryExecutor { + public: + CheckOldInitialMaxNewSize(size_t param1, size_t param2) : BinaryExecutor(param1, param2) { } + void execute() { + size_t heap_alignment = CollectorPolicy::compute_heap_alignment(); + size_t new_size_value = align_size_up(MaxHeapSize, heap_alignment) + - param1 + param2; + + MarkSweepPolicy msp; + msp.initialize_all(); + + size_t expected_old_initial = align_size_up(MaxHeapSize, heap_alignment) + - new_size_value; + + ASSERT_EQ(expected_old_initial, msp.initial_old_size()); + } + }; +}; + + +// Testing that the NewSize flag is handled correct is hard because it +// depends on so many other configurable variables. These tests only try to +// verify that there are some basic rules for NewSize honored by the policies. + +// If NewSize has been ergonomically set, the collector policy +// should use it for min +TEST_VM(CollectorPolicy, young_min_ergo) { + TestGenCollectorPolicy::SetNewSizeErgo setter(20 * M); + TestGenCollectorPolicy::CheckYoungMin checker(20 * M); + + TestGenCollectorPolicy::TestWrapper::test(&setter, &checker); +} + +// If NewSize has been ergonomically set, the collector policy +// should use it for min but calculate the initial young size +// using NewRatio. +TEST_VM(CollectorPolicy, young_scaled_initial_ergo) { + TestGenCollectorPolicy::SetNewSizeErgo setter(20 * M); + TestGenCollectorPolicy::CheckScaledYoungInitial checker; + + TestGenCollectorPolicy::TestWrapper::test(&setter, &checker); +} + + +// Since a flag has been set with FLAG_SET_CMDLINE it +// will be treated as it have been set on the command line for +// the rest of the VM lifetime. This is an irreversible change and +// could impact other tests so we use TEST_OTHER_VM +TEST_OTHER_VM(CollectorPolicy, young_cmd) { + // If NewSize is set on the command line, it should be used + // for both min and initial young size if less than min heap. + TestGenCollectorPolicy::SetNewSizeCmd setter(20 * M); + + TestGenCollectorPolicy::CheckYoungMin checker_min(20 * M); + TestGenCollectorPolicy::TestWrapper::test(&setter, &checker_min); + + TestGenCollectorPolicy::CheckYoungInitial checker_initial(20 * M); + TestGenCollectorPolicy::TestWrapper::test(&setter, &checker_initial); + + // If NewSize is set on command line, but is larger than the min + // heap size, it should only be used for initial young size. + TestGenCollectorPolicy::SetNewSizeCmd setter_large(80 * M); + TestGenCollectorPolicy::CheckYoungInitial checker_large(80 * M); + TestGenCollectorPolicy::TestWrapper::test(&setter_large, &checker_large); +} + +// Since a flag has been set with FLAG_SET_CMDLINE it +// will be treated as it have been set on the command line for +// the rest of the VM lifetime. This is an irreversible change and +// could impact other tests so we use TEST_OTHER_VM +TEST_OTHER_VM(CollectorPolicy, old_cmd) { + // If OldSize is set on the command line, it should be used + // for both min and initial old size if less than min heap. + TestGenCollectorPolicy::SetOldSizeCmd setter(20 * M); + + TestGenCollectorPolicy::CheckOldMin checker_min(20 * M); + TestGenCollectorPolicy::TestWrapper::test(&setter, &checker_min); + + TestGenCollectorPolicy::CheckOldInitial checker_initial; + TestGenCollectorPolicy::TestWrapper::test(&setter, &checker_initial); + + // If MaxNewSize is large, the maximum OldSize will be less than + // what's requested on the command line and it should be reset + // ergonomically. + // We intentionally set MaxNewSize + OldSize > MaxHeapSize + TestGenCollectorPolicy::SetOldSizeCmd setter_old_size(30 * M); + TestGenCollectorPolicy::SetMaxNewSizeCmd setter_max_new_size(30 * M, 20 * M); + TestGenCollectorPolicy::CheckOldInitialMaxNewSize checker_large(30 * M, 20 * M); + + TestGenCollectorPolicy::TestWrapper::test(&setter_old_size, &setter_max_new_size, &checker_large); +} diff --git a/hotspot/test/native/logging/test_logConfiguration.cpp b/hotspot/test/native/logging/test_logConfiguration.cpp index cba192a6d3e..92d595e8059 100644 --- a/hotspot/test/native/logging/test_logConfiguration.cpp +++ b/hotspot/test/native/logging/test_logConfiguration.cpp @@ -25,6 +25,7 @@ #include "logTestFixture.hpp" #include "logTestUtils.inline.hpp" #include "logging/logConfiguration.hpp" +#include "logging/logFileStreamOutput.hpp" #include "logging/logLevel.hpp" #include "logging/logOutput.hpp" #include "logging/logTag.hpp" @@ -68,8 +69,8 @@ TEST_VM_F(LogConfigurationTest, describe) { const char* description = ss.as_string(); // Verify that stdout and stderr are listed by default - EXPECT_PRED2(string_contains_substring, description, LogOutput::Stdout->name()); - EXPECT_PRED2(string_contains_substring, description, LogOutput::Stderr->name()); + EXPECT_PRED2(string_contains_substring, description, StdoutLog.name()); + EXPECT_PRED2(string_contains_substring, description, StderrLog.name()); // Verify that each tag, level and decorator is listed for (size_t i = 0; i < LogTag::Count; i++) { @@ -126,7 +127,7 @@ TEST_VM_F(LogConfigurationTest, update_output) { EXPECT_TRUE(is_described("logging=info")); // Verify by iterating over tagsets - LogOutput* o = LogOutput::Stdout; + LogOutput* o = &StdoutLog; for (LogTagSet* ts = LogTagSet::first(); ts != NULL; ts = ts->next()) { EXPECT_TRUE(ts->has_output(o)); EXPECT_TRUE(ts->is_level(LogLevel::Info)); @@ -178,8 +179,8 @@ TEST_VM_F(LogConfigurationTest, disable_logging) { // Verify that no tagset has logging enabled for (LogTagSet* ts = LogTagSet::first(); ts != NULL; ts = ts->next()) { - EXPECT_FALSE(ts->has_output(LogOutput::Stdout)); - EXPECT_FALSE(ts->has_output(LogOutput::Stderr)); + EXPECT_FALSE(ts->has_output(&StdoutLog)); + EXPECT_FALSE(ts->has_output(&StderrLog)); EXPECT_FALSE(ts->is_level(LogLevel::Error)); } } @@ -193,7 +194,7 @@ TEST_VM_F(LogConfigurationTest, disable_output) { EXPECT_TRUE(is_described("#0: stdout all=off")); // Verify by iterating over tagsets - LogOutput* o = LogOutput::Stdout; + LogOutput* o = &StdoutLog; for (LogTagSet* ts = LogTagSet::first(); ts != NULL; ts = ts->next()) { EXPECT_FALSE(ts->has_output(o)); EXPECT_FALSE(ts->is_level(LogLevel::Error)); @@ -247,7 +248,7 @@ TEST_VM_F(LogConfigurationTest, parse_empty_command_line_arguments) { bool ret = LogConfiguration::parse_command_line_arguments(cmdline); EXPECT_TRUE(ret) << "Error parsing command line arguments '" << cmdline << "'"; for (LogTagSet* ts = LogTagSet::first(); ts != NULL; ts = ts->next()) { - EXPECT_EQ(LogLevel::Unspecified, ts->level_for(LogOutput::Stdout)); + EXPECT_EQ(LogLevel::Unspecified, ts->level_for(&StdoutLog)); } } } @@ -306,7 +307,7 @@ TEST_F(LogConfigurationTest, configure_stdout) { EXPECT_FALSE(log_is_enabled(Debug, logging)); EXPECT_FALSE(log_is_enabled(Info, gc)); LogTagSet* logging_ts = &LogTagSetMapping::tagset(); - EXPECT_EQ(LogLevel::Info, logging_ts->level_for(LogOutput::Stdout)); + EXPECT_EQ(LogLevel::Info, logging_ts->level_for(&StdoutLog)); // Enable 'gc=debug' (no wildcard), verifying no other tags are enabled LogConfiguration::configure_stdout(LogLevel::Debug, true, LOG_TAGS(gc)); @@ -316,9 +317,9 @@ TEST_F(LogConfigurationTest, configure_stdout) { for (LogTagSet* ts = LogTagSet::first(); ts != NULL; ts = ts->next()) { if (ts->contains(PREFIX_LOG_TAG(gc))) { if (ts->ntags() == 1) { - EXPECT_EQ(LogLevel::Debug, ts->level_for(LogOutput::Stdout)); + EXPECT_EQ(LogLevel::Debug, ts->level_for(&StdoutLog)); } else { - EXPECT_EQ(LogLevel::Off, ts->level_for(LogOutput::Stdout)); + EXPECT_EQ(LogLevel::Off, ts->level_for(&StdoutLog)); } } } @@ -329,12 +330,12 @@ TEST_F(LogConfigurationTest, configure_stdout) { EXPECT_TRUE(log_is_enabled(Trace, gc, heap)); for (LogTagSet* ts = LogTagSet::first(); ts != NULL; ts = ts->next()) { if (ts->contains(PREFIX_LOG_TAG(gc))) { - EXPECT_EQ(LogLevel::Trace, ts->level_for(LogOutput::Stdout)); + EXPECT_EQ(LogLevel::Trace, ts->level_for(&StdoutLog)); } else if (ts == logging_ts) { // Previous setting for 'logging' should remain - EXPECT_EQ(LogLevel::Info, ts->level_for(LogOutput::Stdout)); + EXPECT_EQ(LogLevel::Info, ts->level_for(&StdoutLog)); } else { - EXPECT_EQ(LogLevel::Off, ts->level_for(LogOutput::Stdout)); + EXPECT_EQ(LogLevel::Off, ts->level_for(&StdoutLog)); } } @@ -345,7 +346,7 @@ TEST_F(LogConfigurationTest, configure_stdout) { EXPECT_FALSE(log_is_enabled(Error, gc)); EXPECT_FALSE(log_is_enabled(Error, gc, heap)); for (LogTagSet* ts = LogTagSet::first(); ts != NULL; ts = ts->next()) { - EXPECT_EQ(LogLevel::Off, ts->level_for(LogOutput::Stdout)); + EXPECT_EQ(LogLevel::Off, ts->level_for(&StdoutLog)); } } diff --git a/hotspot/test/native/logging/test_logDecorations.cpp b/hotspot/test/native/logging/test_logDecorations.cpp index 3894c655412..f4c7b87e391 100644 --- a/hotspot/test/native/logging/test_logDecorations.cpp +++ b/hotspot/test/native/logging/test_logDecorations.cpp @@ -133,8 +133,8 @@ TEST(LogDecorations, iso8601_time) { // Verify format int y, M, d, h, m; double s; - int read = sscanf(timestr, "%d-%d-%dT%d:%d:%lfZ", &y, &M, &d, &h, &m, &s); - ASSERT_EQ(6, read); + int read = sscanf(timestr, "%d-%d-%dT%d:%d:%lf", &y, &M, &d, &h, &m, &s); + ASSERT_EQ(6, read) << "Invalid format: " << timestr; // Verify reported time & date struct tm reported_time = {0}; @@ -156,6 +156,48 @@ TEST(LogDecorations, iso8601_time) { << ", expected time: " << expected_ts; } +// Test the utctime decoration +TEST(LogDecorations, iso8601_utctime) { + LogDecorators decorator_selection; + ASSERT_TRUE(decorator_selection.parse("utctime")); + LogDecorations decorations(LogLevel::Info, tagset, decorator_selection); + + const char *timestr = decorations.decoration(LogDecorators::utctime_decorator); + time_t expected_ts = time(NULL); + + // Verify format + char trailing_character; + int y, M, d, h, m, offset; + double s; + int read = sscanf(timestr, "%d-%d-%dT%d:%d:%lf%c%d", &y, &M, &d, &h, &m, &s, &trailing_character, &offset); + ASSERT_GT(read, 7) << "Invalid format: " << timestr; + + // Ensure time is UTC (no offset) + if (trailing_character == '+') { + ASSERT_EQ(0, offset) << "Invalid offset: " << timestr; + } else { + ASSERT_EQ('Z', trailing_character) << "Invalid offset: " << timestr; + } + + struct tm reported_time = {0}; + reported_time.tm_year = y - 1900; + reported_time.tm_mon = M - 1; + reported_time.tm_mday = d; + reported_time.tm_hour = h; + reported_time.tm_min = m; + reported_time.tm_sec = s; + reported_time.tm_isdst = 0; // No DST for UTC timestamps + time_t reported_ts = mktime(&reported_time); + expected_ts = mktime(gmtime(&expected_ts)); + time_t diff = reported_ts - expected_ts; + if (diff < 0) { + diff = -diff; + } + // Allow up to 10 seconds in difference + ASSERT_LE(diff, 10) << "Reported time: " << reported_ts << " (" << timestr << ")" + << ", expected time: " << expected_ts; +} + // Test the pid and tid decorations TEST(LogDecorations, identifiers) { LogDecorators decorator_selection; diff --git a/hotspot/test/native/logging/test_logDecorators.cpp b/hotspot/test/native/logging/test_logDecorators.cpp index 3d392225857..49020abdce0 100644 --- a/hotspot/test/native/logging/test_logDecorators.cpp +++ b/hotspot/test/native/logging/test_logDecorators.cpp @@ -172,20 +172,20 @@ TEST(LogDecorators, combine_with) { // Select first and third decorator for dec1 char input[64]; - sprintf(input, "%s,%s", decorator_name_array[0], decorator_name_array[2]); + sprintf(input, "%s,%s", decorator_name_array[0], decorator_name_array[3]); dec1.parse(input); EXPECT_TRUE(dec1.is_decorator(decorator_array[0])); - EXPECT_TRUE(dec1.is_decorator(decorator_array[2])); + EXPECT_TRUE(dec1.is_decorator(decorator_array[3])); // Select the default decorators for dec2 EXPECT_FALSE(dec2.is_decorator(decorator_array[0])); - EXPECT_FALSE(dec2.is_decorator(decorator_array[2])); + EXPECT_FALSE(dec2.is_decorator(decorator_array[3])); assert_default_decorators(&dec2); // Combine and verify that the combination includes first, third and default decorators dec2.combine_with(dec1); EXPECT_TRUE(dec2.is_decorator(decorator_array[0])); - EXPECT_TRUE(dec2.is_decorator(decorator_array[2])); + EXPECT_TRUE(dec2.is_decorator(decorator_array[3])); assert_default_decorators(&dec2, false); } diff --git a/hotspot/test/native/logging/test_logOutputList.cpp b/hotspot/test/native/logging/test_logOutputList.cpp index 75ba599e3f8..45db9553a2a 100644 --- a/hotspot/test/native/logging/test_logOutputList.cpp +++ b/hotspot/test/native/logging/test_logOutputList.cpp @@ -22,6 +22,7 @@ */ #include "precompiled.hpp" +#include "logging/logFileStreamOutput.hpp" #include "logging/logLevel.hpp" #include "logging/logOutput.hpp" #include "logging/logOutputList.hpp" @@ -173,7 +174,7 @@ TEST(LogOutputList, is_level_single_output) { for (size_t i = LogLevel::First; i < LogLevel::Count; i++) { LogLevelType level = static_cast(i); LogOutputList list; - list.set_output_level(LogOutput::Stdout, level); + list.set_output_level(&StdoutLog, level); for (size_t j = LogLevel::First; j < LogLevel::Count; j++) { LogLevelType other = static_cast(j); // Verify that levels finer than the current level for stdout are reported as disabled, @@ -201,8 +202,8 @@ TEST(LogOutputList, is_level_empty) { // Test is_level() on lists with two outputs on different levels TEST(LogOutputList, is_level_multiple_outputs) { for (size_t i = LogLevel::First; i < LogLevel::Count - 1; i++) { - LogOutput* dummy1 = LogOutput::Stdout; - LogOutput* dummy2 = LogOutput::Stderr; + LogOutput* dummy1 = &StdoutLog; + LogOutput* dummy2 = &StderrLog; LogLevelType first = static_cast(i); LogLevelType second = static_cast(i + 1); LogOutputList list; @@ -226,19 +227,19 @@ TEST(LogOutputList, level_for) { LogOutputList list; // Ask the empty list about stdout, stderr - EXPECT_EQ(LogLevel::Off, list.level_for(LogOutput::Stdout)); - EXPECT_EQ(LogLevel::Off, list.level_for(LogOutput::Stderr)); + EXPECT_EQ(LogLevel::Off, list.level_for(&StdoutLog)); + EXPECT_EQ(LogLevel::Off, list.level_for(&StderrLog)); // Ask for level in a list with two outputs on different levels - list.set_output_level(LogOutput::Stdout, LogLevel::Info); - list.set_output_level(LogOutput::Stderr, LogLevel::Trace); - EXPECT_EQ(LogLevel::Info, list.level_for(LogOutput::Stdout)); - EXPECT_EQ(LogLevel::Trace, list.level_for(LogOutput::Stderr)); + list.set_output_level(&StdoutLog, LogLevel::Info); + list.set_output_level(&StderrLog, LogLevel::Trace); + EXPECT_EQ(LogLevel::Info, list.level_for(&StdoutLog)); + EXPECT_EQ(LogLevel::Trace, list.level_for(&StderrLog)); // Remove and ask again - list.set_output_level(LogOutput::Stdout, LogLevel::Off); - EXPECT_EQ(LogLevel::Off, list.level_for(LogOutput::Stdout)); - EXPECT_EQ(LogLevel::Trace, list.level_for(LogOutput::Stderr)); + list.set_output_level(&StdoutLog, LogLevel::Off); + EXPECT_EQ(LogLevel::Off, list.level_for(&StdoutLog)); + EXPECT_EQ(LogLevel::Trace, list.level_for(&StderrLog)); // Ask about an unknown output LogOutput* dummy = dummy_output(4711); @@ -251,5 +252,5 @@ TEST(LogOutputList, level_for) { } // Make sure the stderr level is still the same - EXPECT_EQ(LogLevel::Trace, list.level_for(LogOutput::Stderr)); + EXPECT_EQ(LogLevel::Trace, list.level_for(&StderrLog)); } diff --git a/hotspot/test/native/logging/test_logTagSet.cpp b/hotspot/test/native/logging/test_logTagSet.cpp index b92ba93fcc0..b8efb9346f4 100644 --- a/hotspot/test/native/logging/test_logTagSet.cpp +++ b/hotspot/test/native/logging/test_logTagSet.cpp @@ -22,6 +22,7 @@ */ #include "precompiled.hpp" +#include "logging/logFileStreamOutput.hpp" #include "logging/logLevel.hpp" #include "logging/logOutput.hpp" #include "logging/logTag.hpp" @@ -37,18 +38,18 @@ TEST(LogTagSet, defaults) { EXPECT_TRUE(ts->is_level(LogLevel::Error)); EXPECT_TRUE(ts->is_level(LogLevel::Warning)); EXPECT_FALSE(ts->is_level(LogLevel::Info)); - EXPECT_TRUE(ts->has_output(LogOutput::Stdout)); - EXPECT_FALSE(ts->has_output(LogOutput::Stderr)); + EXPECT_TRUE(ts->has_output(&StdoutLog)); + EXPECT_FALSE(ts->has_output(&StderrLog)); } } TEST(LogTagSet, has_output) { LogTagSet& ts = LogTagSetMapping::tagset(); - ts.set_output_level(LogOutput::Stderr, LogLevel::Trace); - EXPECT_TRUE(ts.has_output(LogOutput::Stderr)); + ts.set_output_level(&StderrLog, LogLevel::Trace); + EXPECT_TRUE(ts.has_output(&StderrLog)); EXPECT_FALSE(ts.has_output(NULL)); - ts.set_output_level(LogOutput::Stderr, LogLevel::Off); - EXPECT_FALSE(ts.has_output(LogOutput::Stderr)); + ts.set_output_level(&StderrLog, LogLevel::Off); + EXPECT_FALSE(ts.has_output(&StderrLog)); } TEST(LogTagSet, ntags) { @@ -61,18 +62,18 @@ TEST(LogTagSet, ntags) { TEST(LogTagSet, is_level) { LogTagSet& ts = LogTagSetMapping::tagset(); // Set info level on stdout and verify that is_level() reports correctly - ts.set_output_level(LogOutput::Stdout, LogLevel::Info); + ts.set_output_level(&StdoutLog, LogLevel::Info); EXPECT_TRUE(ts.is_level(LogLevel::Error)); EXPECT_TRUE(ts.is_level(LogLevel::Warning)); EXPECT_TRUE(ts.is_level(LogLevel::Info)); EXPECT_FALSE(ts.is_level(LogLevel::Debug)); EXPECT_FALSE(ts.is_level(LogLevel::Trace)); - ts.set_output_level(LogOutput::Stdout, LogLevel::Default); + ts.set_output_level(&StdoutLog, LogLevel::Default); EXPECT_TRUE(ts.is_level(LogLevel::Default)); } TEST(LogTagSet, level_for) { - LogOutput* output = LogOutput::Stdout; + LogOutput* output = &StdoutLog; LogTagSet& ts = LogTagSetMapping::tagset(); for (uint i = 0; i < LogLevel::Count; i++) { LogLevelType level = static_cast(i); diff --git a/hotspot/test/native/memory/test_guardedMemory.cpp b/hotspot/test/native/memory/test_guardedMemory.cpp new file mode 100644 index 00000000000..d48b624b341 --- /dev/null +++ b/hotspot/test/native/memory/test_guardedMemory.cpp @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "precompiled.hpp" +#include "memory/allocation.hpp" +#include "memory/allocation.inline.hpp" +#include "memory/guardedMemory.hpp" +#include "runtime/os.hpp" +#include "unittest.hpp" + +static void guarded_memory_test_check(void* p, size_t sz, void* tag) { + ASSERT_TRUE(p != NULL) << "NULL pointer given to check"; + u_char* c = (u_char*) p; + GuardedMemory guarded(c); + EXPECT_EQ(guarded.get_tag(), tag) << "Tag is not the same as supplied"; + EXPECT_EQ(guarded.get_user_ptr(), c) << "User pointer is not the same as supplied"; + EXPECT_EQ(guarded.get_user_size(), sz) << "User size is not the same as supplied"; + EXPECT_TRUE(guarded.verify_guards()) << "Guard broken"; +} + +class GuardedMemoryTest { + public: + static size_t get_guard_header_size() { + return sizeof (GuardedMemory::GuardHeader); + } + static size_t get_guard_size() { + return sizeof (GuardedMemory::Guard); + } +}; + +// Test GuardedMemory size +TEST(GuardedMemory, size) { + size_t total_sz = GuardedMemory::get_total_size(1); + ASSERT_GT(total_sz, (size_t) 1) << "Unexpected size"; + ASSERT_GE(total_sz, GuardedMemoryTest::get_guard_header_size() + 1 + + GuardedMemoryTest::get_guard_size()) << "Unexpected size"; +} + +// Test the basic characteristics +TEST(GuardedMemory, basic) { + u_char* basep = + (u_char*) os::malloc(GuardedMemory::get_total_size(1), mtInternal); + GuardedMemory guarded(basep, 1, (void*) 0xf000f000); + + EXPECT_EQ(badResourceValue, *basep) + << "Expected guard in the form of badResourceValue"; + + u_char* userp = guarded.get_user_ptr(); + EXPECT_EQ(uninitBlockPad, *userp) + << "Expected uninitialized data in the form of uninitBlockPad"; + guarded_memory_test_check(userp, 1, (void*) 0xf000f000); + + void* freep = guarded.release_for_freeing(); + EXPECT_EQ((u_char*) freep, basep) << "Expected the same pointer guard was "; + EXPECT_EQ(freeBlockPad, *userp) << "Expected user data to be free block padded"; + EXPECT_FALSE(guarded.verify_guards()); + os::free(freep); +} + +// Test a number of odd sizes +TEST(GuardedMemory, odd_sizes) { + u_char* basep = + (u_char*) os::malloc(GuardedMemory::get_total_size(1), mtInternal); + GuardedMemory guarded(basep, 1, (void*) 0xf000f000); + + size_t sz = 0; + do { + void* p = os::malloc(GuardedMemory::get_total_size(sz), mtInternal); + void* up = guarded.wrap_with_guards(p, sz, (void*) 1); + memset(up, 0, sz); + guarded_memory_test_check(up, sz, (void*) 1); + if (HasFatalFailure()) { + return; + } + + os::free(guarded.release_for_freeing()); + sz = (sz << 4) + 1; + } while (sz < (256 * 1024)); +} + +// Test buffer overrun into head... +TEST(GuardedMemory, buffer_overrun_head) { + u_char* basep = + (u_char*) os::malloc(GuardedMemory::get_total_size(1), mtInternal); + GuardedMemory guarded(basep, 1, (void*) 0xf000f000); + + guarded.wrap_with_guards(basep, 1); + *basep = 0; + EXPECT_FALSE(guarded.verify_guards()); + os::free(basep); +} + +// Test buffer overrun into tail with a number of odd sizes +TEST(GuardedMemory, buffer_overrun_tail) { + u_char* basep = + (u_char*) os::malloc(GuardedMemory::get_total_size(1), mtInternal); + GuardedMemory guarded(basep, 1, (void*) 0xf000f000); + + size_t sz = 1; + do { + void* p = os::malloc(GuardedMemory::get_total_size(sz), mtInternal); + void* up = guarded.wrap_with_guards(p, sz, (void*) 1); + memset(up, 0, sz + 1); // Buffer-overwrite (within guard) + EXPECT_FALSE(guarded.verify_guards()) << "Guard was not broken as expected"; + os::free(guarded.release_for_freeing()); + sz = (sz << 4) + 1; + } while (sz < (256 * 1024)); +} + +// Test wrap_copy/wrap_free +TEST(GuardedMemory, wrap) { + EXPECT_TRUE(GuardedMemory::free_copy(NULL)) << "Expected free NULL to be OK"; + + const char* str = "Check my bounds out"; + size_t str_sz = strlen(str) + 1; + char* str_copy = (char*) GuardedMemory::wrap_copy(str, str_sz); + guarded_memory_test_check(str_copy, str_sz, NULL); + if (HasFatalFailure()) { + return; + } + EXPECT_EQ(0, strcmp(str, str_copy)) << "Not identical copy"; + EXPECT_TRUE(GuardedMemory::free_copy(str_copy)) << "Free copy failed to verify"; + + void* no_data = NULL; + void* no_data_copy = GuardedMemory::wrap_copy(no_data, 0); + EXPECT_TRUE(GuardedMemory::free_copy(no_data_copy)) + << "Expected valid guards even for no data copy"; +} diff --git a/hotspot/test/native/memory/test_metachunk.cpp b/hotspot/test/native/memory/test_metachunk.cpp new file mode 100644 index 00000000000..e99bf42a121 --- /dev/null +++ b/hotspot/test/native/memory/test_metachunk.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016 Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "precompiled.hpp" +#include "memory/allocation.hpp" +#include "memory/metachunk.hpp" +#include "unittest.hpp" +#include "utilities/copy.hpp" +#include "utilities/debug.hpp" + +class MetachunkTest { + public: + static MetaWord* initial_top(Metachunk* metachunk) { + return metachunk->initial_top(); + } + static MetaWord* top(Metachunk* metachunk) { + return metachunk->top(); + } + +}; + +TEST(Metachunk, basic) { + size_t size = 2 * 1024 * 1024; + void* memory = malloc(size); + ASSERT_TRUE(NULL != memory) << "Failed to malloc 2MB"; + + Metachunk* metachunk = ::new (memory) Metachunk(size / BytesPerWord, NULL); + + EXPECT_EQ((MetaWord*) metachunk, metachunk->bottom()); + EXPECT_EQ((uintptr_t*) metachunk + metachunk->size(), metachunk->end()); + + // Check sizes + EXPECT_EQ(metachunk->size(), metachunk->word_size()); + EXPECT_EQ(pointer_delta(metachunk->end(), metachunk->bottom(), + sizeof (MetaWord*)), + metachunk->word_size()); + + // Check usage + EXPECT_EQ(metachunk->used_word_size(), metachunk->overhead()); + EXPECT_EQ(metachunk->word_size() - metachunk->used_word_size(), + metachunk->free_word_size()); + EXPECT_EQ(MetachunkTest::top(metachunk), MetachunkTest::initial_top(metachunk)); + EXPECT_TRUE(metachunk->is_empty()); + + // Allocate + size_t alloc_size = 64; // Words + EXPECT_TRUE(is_size_aligned(alloc_size, Metachunk::object_alignment())); + + MetaWord* mem = metachunk->allocate(alloc_size); + + // Check post alloc + EXPECT_EQ(MetachunkTest::initial_top(metachunk), mem); + EXPECT_EQ(MetachunkTest::top(metachunk), mem + alloc_size); + EXPECT_EQ(metachunk->overhead() + alloc_size, metachunk->used_word_size()); + EXPECT_EQ(metachunk->word_size() - metachunk->used_word_size(), + metachunk->free_word_size()); + EXPECT_FALSE(metachunk->is_empty()); + + // Clear chunk + metachunk->reset_empty(); + + // Check post clear + EXPECT_EQ(metachunk->used_word_size(), metachunk->overhead()); + EXPECT_EQ(metachunk->word_size() - metachunk->used_word_size(), + metachunk->free_word_size()); + EXPECT_EQ(MetachunkTest::top(metachunk), MetachunkTest::initial_top(metachunk)); + EXPECT_TRUE(metachunk->is_empty()); + + free(memory); +} diff --git a/hotspot/test/native/runtime/test_os.cpp b/hotspot/test/native/runtime/test_os.cpp index 94180b5e189..ed1b3b7a0cb 100644 --- a/hotspot/test/native/runtime/test_os.cpp +++ b/hotspot/test/native/runtime/test_os.cpp @@ -25,6 +25,15 @@ #include "runtime/os.hpp" #include "unittest.hpp" +static size_t small_page_size() { + return os::vm_page_size(); +} + +static size_t large_page_size() { + const size_t large_page_size_example = 4 * M; + return os::page_size_for_region_aligned(large_page_size_example, 1); +} + TEST_VM(os, page_size_for_region) { size_t large_page_example = 4 * M; size_t large_page = os::page_size_for_region_aligned(large_page_example, 1); @@ -37,6 +46,68 @@ TEST_VM(os, page_size_for_region) { } } +TEST_VM(os, page_size_for_region_aligned) { + if (UseLargePages) { + const size_t small_page = small_page_size(); + const size_t large_page = large_page_size(); + + if (large_page > small_page) { + size_t num_small_pages_in_large = large_page / small_page; + size_t page = os::page_size_for_region_aligned(large_page, num_small_pages_in_large); + + ASSERT_EQ(page, small_page); + } + } +} + +TEST_VM(os, page_size_for_region_alignment) { + if (UseLargePages) { + const size_t small_page = small_page_size(); + const size_t large_page = large_page_size(); + if (large_page > small_page) { + const size_t unaligned_region = large_page + 17; + size_t page = os::page_size_for_region_aligned(unaligned_region, 1); + ASSERT_EQ(page, small_page); + + const size_t num_pages = 5; + const size_t aligned_region = large_page * num_pages; + page = os::page_size_for_region_aligned(aligned_region, num_pages); + ASSERT_EQ(page, large_page); + } + } +} + +TEST_VM(os, page_size_for_region_unaligned) { + if (UseLargePages) { + // Given exact page size, should return that page size. + for (size_t i = 0; os::_page_sizes[i] != 0; i++) { + size_t expected = os::_page_sizes[i]; + size_t actual = os::page_size_for_region_unaligned(expected, 1); + ASSERT_EQ(expected, actual); + } + + // Given slightly larger size than a page size, return the page size. + for (size_t i = 0; os::_page_sizes[i] != 0; i++) { + size_t expected = os::_page_sizes[i]; + size_t actual = os::page_size_for_region_unaligned(expected + 17, 1); + ASSERT_EQ(expected, actual); + } + + // Given a slightly smaller size than a page size, + // return the next smaller page size. + if (os::_page_sizes[1] > os::_page_sizes[0]) { + size_t expected = os::_page_sizes[0]; + size_t actual = os::page_size_for_region_unaligned(os::_page_sizes[1] - 17, 1); + ASSERT_EQ(actual, expected); + } + + // Return small page size for values less than a small page. + size_t small_page = small_page_size(); + size_t actual = os::page_size_for_region_unaligned(small_page - 17, 1); + ASSERT_EQ(small_page, actual); + } +} + #ifdef ASSERT TEST_VM_ASSERT_MSG(os, page_size_for_region_with_zero_min_pages, "sanity") { size_t region_size = 16 * os::vm_page_size(); diff --git a/hotspot/test/native/utilities/test_quicksort.cpp b/hotspot/test/native/utilities/test_quicksort.cpp index 5ff4d0aaf44..2c770cd1869 100644 --- a/hotspot/test/native/utilities/test_quicksort.cpp +++ b/hotspot/test/native/utilities/test_quicksort.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -19,29 +19,186 @@ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. + * */ #include "precompiled.hpp" -#include "prims/jvm.h" +#include "memory/allocation.inline.hpp" +#include "runtime/os.hpp" #include "utilities/quickSort.hpp" #include "unittest.hpp" -static int int_comparator(int a, int b) { +static int test_comparator(int a, int b) { if (a == b) { return 0; - } else if (a < b) { + } + if (a < b) { return -1; } - - // a > b return 1; } -TEST(utilities, quicksort) { - int test_array[] = {3,2,1}; - QuickSort::sort(test_array, 3, int_comparator, false); - - ASSERT_EQ(1, test_array[0]); - ASSERT_EQ(2, test_array[1]); - ASSERT_EQ(3, test_array[2]); +static bool compare_arrays(int* actual, int* expected, int length) { + for (int i = 0; i < length; i++) { + if (actual[i] != expected[i]) { + return false; + } + } + return true; +} + +template +static bool sort_and_compare(int* arrayToSort, int* expectedResult, int length, C comparator, bool idempotent = false) { + QuickSort::sort(arrayToSort, length, comparator, idempotent); + return compare_arrays(arrayToSort, expectedResult, length); +} + +static int test_even_odd_comparator(int a, int b) { + bool a_is_odd = ((a % 2) == 1); + bool b_is_odd = ((b % 2) == 1); + if (a_is_odd == b_is_odd) { + return 0; + } + if (a_is_odd) { + return -1; + } + return 1; +} + +extern "C" { + static int test_stdlib_comparator(const void* a, const void* b) { + int ai = *(int*)a; + int bi = *(int*)b; + if (ai == bi) { + return 0; + } + if (ai < bi) { + return -1; + } + return 1; + } +} + +TEST(QuickSort, quicksort) { + { + int* test_array = NULL; + int* expected_array = NULL; + EXPECT_TRUE(sort_and_compare(test_array, expected_array, 0, test_comparator)); + } + { + int test_array[] = {3}; + int expected_array[] = {3}; + EXPECT_TRUE(sort_and_compare(test_array, expected_array, 1, test_comparator)); + } + { + int test_array[] = {3,2}; + int expected_array[] = {2,3}; + EXPECT_TRUE(sort_and_compare(test_array, expected_array, 2, test_comparator)); + } + { + int test_array[] = {3,2,1}; + int expected_array[] = {1,2,3}; + EXPECT_TRUE(sort_and_compare(test_array, expected_array, 3, test_comparator)); + } + { + int test_array[] = {4,3,2,1}; + int expected_array[] = {1,2,3,4}; + EXPECT_TRUE(sort_and_compare(test_array, expected_array, 4, test_comparator)); + } + { + int test_array[] = {7,1,5,3,6,9,8,2,4,0}; + int expected_array[] = {0,1,2,3,4,5,6,7,8,9}; + EXPECT_TRUE(sort_and_compare(test_array, expected_array, 10, test_comparator)); + } + { + int test_array[] = {4,4,1,4}; + int expected_array[] = {1,4,4,4}; + EXPECT_TRUE(sort_and_compare(test_array, expected_array, 4, test_comparator)); + } + { + int test_array[] = {0,1,2,3,4,5,6,7,8,9}; + int expected_array[] = {0,1,2,3,4,5,6,7,8,9}; + EXPECT_TRUE(sort_and_compare(test_array, expected_array, 10, test_comparator)); + } + { + // one of the random arrays that found an issue in the partition method. + int test_array[] = {76,46,81,8,64,56,75,11,51,55,11,71,59,27,9,64,69,75,21,25,39,40,44,32,7,8,40,41,24,78,24,74,9,65,28,6,40,31,22,13,27,82}; + int expected_array[] = {6,7,8,8,9,9,11,11,13,21,22,24,24,25,27,27,28,31,32,39,40,40,40,41,44,46,51,55,56,59,64,64,65,69,71,74,75,75,76,78,81,82}; + EXPECT_TRUE(sort_and_compare(test_array, expected_array, 42, test_comparator)); + } + { + int test_array[] = {2,8,1,4}; + int expected_array[] = {1,4,2,8}; + EXPECT_TRUE(sort_and_compare(test_array, expected_array, 4, test_even_odd_comparator)); + } +} + +TEST(QuickSort, idempotent) { + { + // An array of lenght 3 is only sorted by find_pivot. Make sure that it is idempotent. + int test_array[] = {1, 4, 8}; + int expected_array[] = {1, 4, 8}; + EXPECT_TRUE(sort_and_compare(test_array, expected_array, 3, test_even_odd_comparator, true)); + } + { + int test_array[] = {1, 7, 9, 4, 8, 2}; + int expected_array[] = {1, 7, 9, 4, 8, 2}; + EXPECT_TRUE(sort_and_compare(test_array, expected_array, 6, test_even_odd_comparator, true)); + } + { + int test_array[] = {1, 9, 7, 4, 2, 8}; + int expected_array[] = {1, 9, 7, 4, 2, 8}; + EXPECT_TRUE(sort_and_compare(test_array, expected_array, 6, test_even_odd_comparator, true)); + } + { + int test_array[] = {7, 9, 1, 2, 8, 4}; + int expected_array[] = {7, 9, 1, 2, 8, 4}; + EXPECT_TRUE(sort_and_compare(test_array, expected_array, 6, test_even_odd_comparator, true)); + } + { + int test_array[] = {7, 1, 9, 2, 4, 8}; + int expected_array[] = {7, 1, 9, 2, 4, 8}; + EXPECT_TRUE(sort_and_compare(test_array, expected_array, 6, test_even_odd_comparator, true)); + } + { + int test_array[] = {9, 1, 7, 4, 8, 2}; + int expected_array[] = {9, 1, 7, 4, 8, 2}; + EXPECT_TRUE(sort_and_compare(test_array, expected_array, 6, test_even_odd_comparator, true)); + } + { + int test_array[] = {9, 7, 1, 4, 2, 8}; + int expected_array[] = {9, 7, 1, 4, 2, 8}; + EXPECT_TRUE(sort_and_compare(test_array, expected_array, 6, test_even_odd_comparator, true)); + } +} + +TEST(QuickSort, random) { + for (int i = 0; i < 1000; i++) { + int length = os::random() % 100; + int* test_array = NEW_C_HEAP_ARRAY(int, length, mtInternal); + int* expected_array = NEW_C_HEAP_ARRAY(int, length, mtInternal); + for (int j = 0; j < length; j++) { + // Choose random values, but get a chance of getting duplicates + test_array[j] = os::random() % (length * 2); + expected_array[j] = test_array[j]; + } + + // Compare sorting to stdlib::qsort() + qsort(expected_array, length, sizeof(int), test_stdlib_comparator); + EXPECT_TRUE(sort_and_compare(test_array, expected_array, length, test_comparator)); + + // Make sure sorting is idempotent. + // Both test_array and expected_array are sorted by the test_comparator. + // Now sort them once with the test_even_odd_comparator. Then sort the + // test_array one more time with test_even_odd_comparator and verify that + // it is idempotent. + QuickSort::sort(expected_array, length, test_even_odd_comparator, true); + QuickSort::sort(test_array, length, test_even_odd_comparator, true); + EXPECT_TRUE(compare_arrays(test_array, expected_array, length)); + QuickSort::sort(test_array, length, test_even_odd_comparator, true); + EXPECT_TRUE(compare_arrays(test_array, expected_array, length)); + + FREE_C_HEAP_ARRAY(int, test_array); + FREE_C_HEAP_ARRAY(int, expected_array); + } } diff --git a/hotspot/test/runtime/CompactStrings/TestMethodNames.java b/hotspot/test/runtime/CompactStrings/TestMethodNames.java new file mode 100644 index 00000000000..f9d4ce5ef0e --- /dev/null +++ b/hotspot/test/runtime/CompactStrings/TestMethodNames.java @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import javax.script.*; +import java.util.function.*; + +/* + * @test + * @bug 8164612 + * @summary Test method names containing Latin-1 supplement characters. + * @run main/othervm -XX:+CompactStrings TestMethodNames + * @run main/othervm -XX:-CompactStrings TestMethodNames + */ +public class TestMethodNames { + public static void main(String[] args) throws Exception { + ScriptEngineManager m = new ScriptEngineManager(); + ScriptEngine e = m.getEngineByName("nashorn"); + + e.eval("({get \"\0\"(){}})[\"\0\"]"); + e.eval("({get \"\\x80\"(){}})[\"\\x80\"]"); + e.eval("({get \"\\xff\"(){}})[\"\\xff\"]"); + } +} diff --git a/hotspot/test/runtime/ConstantPool/TestMethodHandleConstant.java b/hotspot/test/runtime/ConstantPool/TestMethodHandleConstant.java index 0ed640992be..565f05525f6 100644 --- a/hotspot/test/runtime/ConstantPool/TestMethodHandleConstant.java +++ b/hotspot/test/runtime/ConstantPool/TestMethodHandleConstant.java @@ -23,7 +23,7 @@ /* * @test - * @bug 8159470 + * @bug 8159470 8166974 * @summary Test that MethodHandle constants are checked * @modules java.base/jdk.internal.misc * @compile WithConfiguration.jcod @@ -33,12 +33,13 @@ public class TestMethodHandleConstant { public static void main(String[] args) { try { - // This interface has bad constant pool entry for MethodHandle -> Method - String URI_DEFAULT - = WithConfiguration.autoDetect().getLocation(); - throw new RuntimeException("FAILED, ICCE not thrown"); - } catch (BootstrapMethodError icce) { - System.out.println("PASSED, expecting ICCE" + icce.getMessage()); + // This interface has bad constant pool entry for MethodHandle -> Method + String URI_DEFAULT + = WithConfiguration.autoDetect().getLocation(); + throw new RuntimeException("FAILED, IncompatibleClassChangeError not thrown"); + } + catch (IncompatibleClassChangeError icce) { + System.out.println("PASSED, expecting IncompatibleClassChangeError" + icce.getMessage()); } } } diff --git a/hotspot/test/runtime/SharedArchiveFile/SASymbolTableTest.java b/hotspot/test/runtime/SharedArchiveFile/SASymbolTableTest.java index 14603a5f1c6..e8f36d2b58c 100644 --- a/hotspot/test/runtime/SharedArchiveFile/SASymbolTableTest.java +++ b/hotspot/test/runtime/SharedArchiveFile/SASymbolTableTest.java @@ -24,9 +24,6 @@ /* * @test SASymbolTableTest * @summary Walk symbol table using SA, with and without CDS. - * Started failing on 2016.06.24 due to 8160376 on MacOS X so quarantine - * it on that platform: - * @requires os.family != "mac" * @library /test/lib * @modules java.base/jdk.internal.misc * jdk.hotspot.agent/sun.jvm.hotspot.oops diff --git a/hotspot/test/runtime/invokedynamic/BootstrapMethodErrorTest.java b/hotspot/test/runtime/invokedynamic/BootstrapMethodErrorTest.java index 5581771852a..d1a270f6229 100644 --- a/hotspot/test/runtime/invokedynamic/BootstrapMethodErrorTest.java +++ b/hotspot/test/runtime/invokedynamic/BootstrapMethodErrorTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -23,93 +23,296 @@ /* * @test - * @bug 8051045 - * @summary Test that exceptions from invokedynamic are wrapped in BootstrapMethodError + * @bug 8051045 8166974 + * @summary Test exceptions from invokedynamic and the bootstrap method * @modules java.base/jdk.internal.org.objectweb.asm * @run main BootstrapMethodErrorTest */ -import java.lang.reflect.Method; -import java.lang.invoke.MethodHandle; -import java.lang.invoke.MethodHandles; -import static java.lang.invoke.MethodHandles.*; -import static java.lang.invoke.MethodType.*; - import jdk.internal.org.objectweb.asm.ClassWriter; import jdk.internal.org.objectweb.asm.Handle; import jdk.internal.org.objectweb.asm.MethodVisitor; import jdk.internal.org.objectweb.asm.Opcodes; -public class BootstrapMethodErrorTest extends ClassLoader implements Opcodes { +import java.lang.invoke.CallSite; +import java.lang.invoke.ConstantCallSite; +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodType; +import java.lang.invoke.WrongMethodTypeException; +import java.lang.reflect.InvocationTargetException; +import java.util.List; - @Override - public Class findClass(String name) throws ClassNotFoundException { - byte[] b; - try { - b = loadClassData(name); - } catch (Throwable th) { - throw new ClassNotFoundException("Loading error", th); +public class BootstrapMethodErrorTest { + + static abstract class IndyClassloader extends ClassLoader implements Opcodes { + + public IndyClassloader() { + super(BootstrapMethodErrorTest.class.getClassLoader()); + } + + @Override + public Class findClass(String name) throws ClassNotFoundException { + byte[] b; + try { + b = loadClassData(name); + } + catch (Throwable th) { + throw new ClassNotFoundException("Loading error", th); + } + return defineClass(name, b, 0, b.length); + } + + static final String BOOTSTRAP_METHOD_CLASS_NAME = "C"; + + static final String BOOTSTRAP_METHOD_NAME = "bsm"; + + static final String INDY_CALLER_CLASS_NAME = "Exec"; + + static final String BOOTSTRAP_METHOD_DESC = MethodType.methodType( + Object.class, MethodHandles.Lookup.class, String.class, MethodType.class). + toMethodDescriptorString(); + + private byte[] loadClassData(String name) throws Exception { + ClassWriter cw = new ClassWriter( + ClassWriter.COMPUTE_FRAMES | ClassWriter.COMPUTE_MAXS); + if (name.equals(BOOTSTRAP_METHOD_CLASS_NAME)) { + defineIndyBootstrapMethodClass(cw); + return cw.toByteArray(); + } + else if (name.equals("Exec")) { + defineIndyCallingClass(cw); + return cw.toByteArray(); + } + return null; + } + + void defineIndyCallingClass(ClassWriter cw) { + cw.visit(52, ACC_SUPER | ACC_PUBLIC, INDY_CALLER_CLASS_NAME, null, "java/lang/Object", null); + MethodVisitor mv = cw.visitMethod(ACC_PUBLIC | ACC_STATIC, "invoke", "()V", null, null); + mv.visitCode(); + Handle h = new Handle(H_INVOKESTATIC, + BOOTSTRAP_METHOD_CLASS_NAME, BOOTSTRAP_METHOD_NAME, + BOOTSTRAP_METHOD_DESC, false); + mv.visitInvokeDynamicInsn(BOOTSTRAP_METHOD_CLASS_NAME, "()V", h); + mv.visitInsn(RETURN); + mv.visitMaxs(0, 0); + mv.visitEnd(); + cw.visitEnd(); + } + + void defineIndyBootstrapMethodClass(ClassWriter cw) { + cw.visit(52, ACC_SUPER | ACC_PUBLIC, + BOOTSTRAP_METHOD_CLASS_NAME, null, "java/lang/Object", null); + MethodVisitor mv = cw.visitMethod(ACC_PUBLIC | ACC_STATIC, + BOOTSTRAP_METHOD_NAME, BOOTSTRAP_METHOD_DESC, null, null); + mv.visitCode(); + defineIndyBootstrapMethodBody(mv); + mv.visitMaxs(0, 0); + mv.visitEnd(); + } + + void defineIndyBootstrapMethodBody(MethodVisitor mv) { + mv.visitInsn(ACONST_NULL); + mv.visitInsn(ARETURN); + } + + void invoke() throws Exception { + Class.forName(BOOTSTRAP_METHOD_CLASS_NAME, true, this); + Class exec = Class.forName(INDY_CALLER_CLASS_NAME, true, this); + exec.getMethod("invoke").invoke(null); + } + + void test() throws Exception { + Class.forName(BOOTSTRAP_METHOD_CLASS_NAME, true, this); + Class exec = Class.forName(INDY_CALLER_CLASS_NAME, true, this); + try { + exec.getMethod("invoke").invoke(null); + throw new RuntimeException("Expected InvocationTargetException but no exception at all was thrown"); + } catch (InvocationTargetException e) { + Throwable t = e.getCause(); + for (Class etc : expectedThrowableClasses()) { + if (!etc.isInstance(t)) { + throw new RuntimeException( + "Expected " + etc.getName() + " but got another exception: " + + t.getClass().getName(), + t); + } + t = t.getCause(); + } + } + } + + abstract List> expectedThrowableClasses(); } - return defineClass(name, b, 0, b.length); - } - private byte[] loadClassData(String name) throws Exception { - ClassWriter cw = new ClassWriter(0); - MethodVisitor mv; + // Methods called by a bootstrap method - if (name.equals("C")) { - cw.visit(52, ACC_SUPER | ACC_PUBLIC, "C", null, "java/lang/Object", null); - { - mv = cw.visitMethod(ACC_PRIVATE | ACC_STATIC, "m", "()V", null, null); - mv.visitCode(); - mv.visitInsn(RETURN); - mv.visitMaxs(0, 1); - mv.visitEnd(); - } - cw.visitEnd(); - return cw.toByteArray(); - } else if (name.equals("Exec")) { - cw.visit(52, ACC_SUPER | ACC_PUBLIC, "Exec", null, "java/lang/Object", null); - { - mv = cw.visitMethod(ACC_PUBLIC | ACC_STATIC, "invokeRef", "()V", null, null); - mv.visitCode(); - Handle h = new Handle(H_INVOKESTATIC, "C", "m", "()V"); - mv.visitInvokeDynamicInsn("C", "()V", h); - mv.visitInsn(RETURN); - mv.visitMaxs(0, 0); - mv.visitEnd(); - } - cw.visitEnd(); - return cw.toByteArray(); + public static CallSite getCallSite() { + try { + MethodHandle mh = MethodHandles.lookup().findStatic( + BootstrapMethodErrorTest.class, + "target", + MethodType.methodType(Object.class, Object.class)); + return new ConstantCallSite(mh); + } catch (Exception e) { + throw new RuntimeException(e); + } } - return null; - } - - public static void main(String[] args) throws ClassNotFoundException, IllegalAccessException, NoSuchMethodException { - new BootstrapMethodErrorTest().test(); - } - - public void test() throws ClassNotFoundException, IllegalAccessException, NoSuchMethodException { - Class.forName("C", true, this); - Class exec = Class.forName("Exec", true, this); - - try { - exec.getMethod("invokeRef").invoke(null); - } catch (Throwable e) { - Throwable c = e.getCause(); - if (c == null) { - throw new RuntimeException( - "Expected BootstrapMethodError wrapped in an InvocationTargetException but it wasn't wrapped", e); - } else if (c instanceof BootstrapMethodError) { - // Only way to pass test, all else should throw - return; - } else { - throw new RuntimeException( - "Expected BootstrapMethodError but got another Error: " - + c.getClass().getName(), - c); - } + public static Object target(Object o) { + return null; + } + + static class TestThrowable extends Throwable {} + public static void throwsTestThrowable() throws Throwable { + throw new TestThrowable(); + } + + static class TestError extends Error {} + public static void throwsTestError() { + throw new TestError(); + } + + static class TestRuntimeException extends RuntimeException {} + public static void throwsTestRuntimeException() { + throw new TestRuntimeException(); + } + + static class TestCheckedException extends Exception {} + public static void throwsTestCheckedException() throws TestCheckedException { + throw new TestCheckedException(); + } + + + // Test classes + + static class InaccessibleBootstrapMethod extends IndyClassloader { + + void defineIndyBootstrapMethodClass(ClassWriter cw) { + cw.visit(52, ACC_SUPER | ACC_PUBLIC, + BOOTSTRAP_METHOD_CLASS_NAME, null, "java/lang/Object", null); + // Bootstrap method is declared to be private + MethodVisitor mv = cw.visitMethod(ACC_PRIVATE | ACC_STATIC, + BOOTSTRAP_METHOD_NAME, BOOTSTRAP_METHOD_DESC, null, null); + mv.visitCode(); + defineIndyBootstrapMethodBody(mv); + mv.visitMaxs(0, 0); + mv.visitEnd(); + } + + @Override + List> expectedThrowableClasses() { + return List.of(IllegalAccessError.class); + } + } + + static class BootstrapMethodDoesNotReturnCallSite extends IndyClassloader { + + void defineIndyBootstrapMethodBody(MethodVisitor mv) { + // return null from the bootstrap method, + // which cannot be cast to CallSite + mv.visitInsn(ACONST_NULL); + mv.visitInsn(ARETURN); + } + + @Override + List> expectedThrowableClasses() { + return List.of(BootstrapMethodError.class, ClassCastException.class); + } + } + + static class BootstrapMethodCallSiteHasWrongTarget extends IndyClassloader { + + @Override + void defineIndyBootstrapMethodBody(MethodVisitor mv) { + // Invoke the method BootstrapMethodErrorTest.getCallSite to obtain + // a CallSite instance whose target is different from that of + // the indy call site + mv.visitMethodInsn(INVOKESTATIC, "BootstrapMethodErrorTest", + "getCallSite", "()Ljava/lang/invoke/CallSite;", false); + mv.visitInsn(ARETURN); + } + + @Override + List> expectedThrowableClasses() { + return List.of(BootstrapMethodError.class, WrongMethodTypeException.class); + } + } + + abstract static class BootstrapMethodThrows extends IndyClassloader { + final String methodName; + + public BootstrapMethodThrows(Class t) { + this.methodName = "throws" + t.getSimpleName(); + } + + @Override + void defineIndyBootstrapMethodBody(MethodVisitor mv) { + // Invoke the method whose name is methodName which will throw + // an exception + mv.visitMethodInsn(INVOKESTATIC, "BootstrapMethodErrorTest", + methodName, "()V", false); + mv.visitInsn(ACONST_NULL); + mv.visitInsn(ARETURN); + } + } + + static class BootstrapMethodThrowsThrowable extends BootstrapMethodThrows { + + public BootstrapMethodThrowsThrowable() { + super(TestThrowable.class); + } + + @Override + List> expectedThrowableClasses() { + return List.of(BootstrapMethodError.class, TestThrowable.class); + } + } + + static class BootstrapMethodThrowsError extends BootstrapMethodThrows { + + public BootstrapMethodThrowsError() { + super(TestError.class); + } + + @Override + List> expectedThrowableClasses() { + return List.of(TestError.class); + } + } + + static class BootstrapMethodThrowsRuntimeException extends BootstrapMethodThrows { + + public BootstrapMethodThrowsRuntimeException() { + super(TestRuntimeException.class); + } + + @Override + List> expectedThrowableClasses() { + return List.of(BootstrapMethodError.class, TestRuntimeException.class); + } + } + + static class BootstrapMethodThrowsCheckedException extends BootstrapMethodThrows { + + public BootstrapMethodThrowsCheckedException() { + super(TestCheckedException.class); + } + + @Override + List> expectedThrowableClasses() { + return List.of(BootstrapMethodError.class, TestCheckedException.class); + } + } + + + public static void main(String[] args) throws Exception { + new InaccessibleBootstrapMethod().test(); + new BootstrapMethodDoesNotReturnCallSite().test(); + new BootstrapMethodCallSiteHasWrongTarget().test(); + new BootstrapMethodThrowsThrowable().test(); + new BootstrapMethodThrowsError().test(); + new BootstrapMethodThrowsRuntimeException().test(); + new BootstrapMethodThrowsCheckedException().test(); } - throw new RuntimeException("Expected BootstrapMethodError but no Error at all was thrown"); - } } diff --git a/hotspot/test/runtime/jni/CalleeSavedRegisters/FPRegs.java b/hotspot/test/runtime/jni/CalleeSavedRegisters/FPRegs.java new file mode 100644 index 00000000000..1a90990244e --- /dev/null +++ b/hotspot/test/runtime/jni/CalleeSavedRegisters/FPRegs.java @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +/* + * @test + * @bug 8067744 + * @library /test/lib + * @modules java.base/jdk.internal.misc + * @run main/native FPRegs + */ + +import jdk.test.lib.Platform; +import jdk.test.lib.Utils; +import jdk.test.lib.process.ProcessTools; +import jdk.test.lib.process.OutputAnalyzer; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Optional; + +public class FPRegs { + public static void main(String[] args) throws IOException { + Path launcher = Paths.get(System.getProperty("test.nativepath"), "FPRegs" + (Platform.isWindows() ? ".exe" : "")); + System.out.println("Launcher = " + launcher + (Files.exists(launcher) ? " (exists)" : " (not exists)")); + Path jvmLib = findJVM(); + ProcessBuilder pb = new ProcessBuilder(launcher.toString(), jvmLib.toString()); + // bin as working directory to let Windows load dll + pb.directory(jvmLib.getParent().getParent().toFile()); + OutputAnalyzer outputf = new OutputAnalyzer(pb.start()); + outputf.shouldHaveExitValue(0); + } + + static Path findJVM() throws IOException { + String root = Utils.TEST_JDK; + String lib = System.mapLibraryName("jvm"); + System.out.println("Root = " + root); + System.out.println("Library = " + lib); + + Optional jvmLib = Files.find(new File(root).toPath(), 4, (p, attr) -> p.toFile().getName().equals(lib)).findFirst(); + Path p = null; + if (jvmLib.isPresent()) { + p = jvmLib.get().toRealPath(); + System.out.println("JVM = " + p); + } else { + System.out.println("TESTBUG: JVM not found in "); + Files.walk(new File(root).toPath(), 4).map(Path::toString).forEach(System.out::println); + } + return p; + } +} + diff --git a/hotspot/test/runtime/jni/CalleeSavedRegisters/exeFPRegs.c b/hotspot/test/runtime/jni/CalleeSavedRegisters/exeFPRegs.c new file mode 100644 index 00000000000..3b67abb4f6a --- /dev/null +++ b/hotspot/test/runtime/jni/CalleeSavedRegisters/exeFPRegs.c @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ +#include +#include + +#ifdef WINDOWS +#include +#else +#include +#endif // WINDOWS + +#ifdef WINDOWS + HMODULE handle; +#else + void* handle; +#endif // WINDOWS + +jint(JNICALL *jni_create_java_vm)(JavaVM **, JNIEnv **, void *) = NULL; + +// method to perform dlclose on an open dynamic library handle +void closeHandle() { +#ifdef WINDOWS + if (!FreeLibrary(handle)) { + fprintf(stderr, "Error occurred while closing handle: 0x%02X\n", GetLastError()); + } +#else + if (dlclose(handle) != 0) { + fprintf(stderr, "Error occurred while closing handle: %s\n", dlerror()); + } +#endif // WINDOWS +} + +void fail(int code) { + if (handle) { + closeHandle(); + } + exit(code); +} + + +// method to load the dynamic library libjvm +int loadJVM(const char* path) { +#ifdef WINDOWS + UINT errorMode = GetErrorMode(); + SetErrorMode(errorMode | SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX); + handle = LoadLibraryA(path); +#else + handle = dlopen(path, RTLD_LAZY); +#endif // WINDOWS + + if (handle) { + // find the address of function +#ifdef WINDOWS + *(void **) (&jni_create_java_vm) = GetProcAddress(handle, "JNI_CreateJavaVM"); +#else + *(void **) (&jni_create_java_vm) = dlsym(handle, "JNI_CreateJavaVM"); +#endif // WINDOWS + + if (jni_create_java_vm == NULL) { + fprintf(stderr, "ERROR: No JNI_CreateJavaVM found: '%s'\n", path); + return -1; + } + } else { +#ifdef WINDOWS + fprintf(stderr, "ERROR: Can't load JVM library: 0x%02X\n", GetLastError()); +#else + fprintf(stderr, "ERROR: Can't load JVM library: %s\n", dlerror()); +#endif // WINDOWS + return -1; + } + return 0; +} + +long long unsigned int d2l(double d) { + union { + double d; + long long unsigned int llu; + } dl; + + dl.d = d; + return dl.llu; +} + +#define print_reg(r) printf("%s = %f (0x%llX)\n", #r, r, d2l(r)); + +int main(int argc, const char** argv) { + JavaVM* jvm; + JNIEnv* env; + JavaVMInitArgs vm_args; + + // values to trick constant folding + long long unsigned int vd[32]; + int i; + int bad_cnt = 0; + + // values occupy fp registers + // note: suitable code shape is produced only on Windows, + // and even then registers are corrupted not on every machine + register double d00; + register double d01; + register double d02; + register double d03; + register double d04; + register double d05; + register double d06; + register double d07; + register double d08; + register double d09; + register double d10; + register double d11; + register double d12; + register double d13; + register double d14; + register double d15; + + if (argc != 2) { + printf("Usage: FPRegs "); + fail(2); + } + printf("jvm_path = %s\n", argv[1]); + + if (loadJVM(argv[1]) < 0) { + fail(3); + } + + vm_args.version = JNI_VERSION_1_8; + vm_args.ignoreUnrecognized = JNI_FALSE; + vm_args.options = NULL; + vm_args.nOptions = 0; + + for(i = 0; i < 16; i++) { + vd[i] = d2l(100 + i); + } + + d00 = 100.0; + d01 = 101.0; + d02 = 102.0; + d03 = 103.0; + d04 = 104.0; + d05 = 105.0; + d06 = 106.0; + d07 = 107.0; + d08 = 108.0; + d09 = 109.0; + d10 = 110.0; + d11 = 111.0; + d12 = 112.0; + d13 = 113.0; + d14 = 114.0; + d15 = 115.0; + + printf("BEFORE:\n"); + print_reg(d00); + print_reg(d01); + print_reg(d02); + print_reg(d03); + print_reg(d04); + print_reg(d05); + print_reg(d06); + print_reg(d07); + print_reg(d08); + print_reg(d09); + print_reg(d10); + print_reg(d11); + print_reg(d12); + print_reg(d13); + print_reg(d14); + print_reg(d15); + + if (jni_create_java_vm(&jvm, &env, &vm_args) < 0 ) { + fprintf(stderr, "ERROR: Can't create JavaVM\n"); + fail(4); + } + + if (d2l(d00) != vd[0]) bad_cnt++; + if (d2l(d01) != vd[1]) bad_cnt++; + if (d2l(d02) != vd[2]) bad_cnt++; + if (d2l(d03) != vd[3]) bad_cnt++; + if (d2l(d04) != vd[4]) bad_cnt++; + if (d2l(d05) != vd[5]) bad_cnt++; + if (d2l(d06) != vd[6]) bad_cnt++; + if (d2l(d07) != vd[7]) bad_cnt++; + if (d2l(d08) != vd[8]) bad_cnt++; + if (d2l(d09) != vd[9]) bad_cnt++; + if (d2l(d10) != vd[10]) bad_cnt++; + if (d2l(d11) != vd[11]) bad_cnt++; + if (d2l(d12) != vd[12]) bad_cnt++; + if (d2l(d13) != vd[13]) bad_cnt++; + if (d2l(d14) != vd[14]) bad_cnt++; + if (d2l(d15) != vd[15]) bad_cnt++; + + printf("AFTER:\n"); + print_reg(d00); + print_reg(d01); + print_reg(d02); + print_reg(d03); + print_reg(d04); + print_reg(d05); + print_reg(d06); + print_reg(d07); + print_reg(d08); + print_reg(d09); + print_reg(d10); + print_reg(d11); + print_reg(d12); + print_reg(d13); + print_reg(d14); + print_reg(d15); + + printf("%d registers corrupted\n", bad_cnt); + if (bad_cnt > 0) { + printf("TEST FAILED"); + fail(1); + } + + printf("TEST PASSED"); + closeHandle(); + return 0; +} + diff --git a/hotspot/test/runtime/lambda-features/InterfaceInitializationStates.java b/hotspot/test/runtime/lambda-features/InterfaceInitializationStates.java index e43bb808160..47645af8ff4 100644 --- a/hotspot/test/runtime/lambda-features/InterfaceInitializationStates.java +++ b/hotspot/test/runtime/lambda-features/InterfaceInitializationStates.java @@ -88,9 +88,7 @@ public class InterfaceInitializationStates { // Iunlinked is testing initialization like interface I, except interface I is linked when // ClassLIM is linked. // Iunlinked is not linked already when K gets an initialization error. Linking Iunlinked - // should succeed and not get NoClassDefFoundError because it does not depend on the - // initialization state of K for linking. There's bug now where it gets this error. - // See: https://bugs.openjdk.java.net/browse/JDK-8166203. + // should succeed because it does not depend on the initialization state of K for linking. interface Iunlinked extends K { boolean v = InterfaceInitializationStates.out(Iunlinked.class); } @@ -157,15 +155,9 @@ public class InterfaceInitializationStates { System.out.println("ExceptionInInitializerError as expected"); } - // Initialize Iunlinked. This should not get NoClassDefFoundError because K + // Initialize Iunlinked. No exception should be thrown even if K // (its super interface) is in initialization_error state. - // This is a bug. It does now. - try { - boolean bb = Iunlinked.v; - throw new RuntimeException("FAIL exception not thrown for Iunlinked initialization"); - } catch(NoClassDefFoundError e) { - System.out.println("NoClassDefFoundError thrown because of bug"); - } + boolean bb = Iunlinked.v; // This should be okay boolean value = Iparams.v; @@ -182,7 +174,7 @@ public class InterfaceInitializationStates { // Check expected class initialization order List> expectedCInitOrder = Arrays.asList(L.class, K.class, M.class, ClassM.class, - I.class, Iparams.class, + I.class, Iunlinked.class, Iparams.class, ClassIparams.class); if (!cInitOrder.equals(expectedCInitOrder)) { throw new RuntimeException( diff --git a/hotspot/test/serviceability/jdwp/AllModulesCommandTest.java b/hotspot/test/serviceability/jdwp/AllModulesCommandTest.java index 33bb583c59d..16ae4a1d615 100644 --- a/hotspot/test/serviceability/jdwp/AllModulesCommandTest.java +++ b/hotspot/test/serviceability/jdwp/AllModulesCommandTest.java @@ -30,8 +30,10 @@ import static jdk.test.lib.Asserts.assertTrue; /** * @test - * @summary Tests AllModules JDWP command + * @summary Tests the modules-related JDWP commands * @library /test/lib + * @ignore 8168478 + * @modules jdk.jdwp.agent * @modules java.base/jdk.internal.misc * @compile AllModulesCommandTestDebuggee.java * @run main/othervm AllModulesCommandTest @@ -87,11 +89,16 @@ public class AllModulesCommandTest implements DebuggeeLauncher.Listener { assertReply(reply); for (int i = 0; i < reply.getModulesCount(); ++i) { long modId = reply.getModuleId(i); - // For each module reported by JDWP get its name using the JDWP NAME command - getModuleName(modId); + // For each module reported by JDWP get its name using the JDWP NAME command + // and store the reply + String modName = getModuleName(modId); + System.out.println("i=" + i + ", modId=" + modId + ", modName=" + modName); + if (modName != null) { // JDWP reports unnamed modules, ignore them + jdwpModuleNames.add(modName); + } // Assert the JDWP CANREAD and CLASSLOADER commands - assertCanRead(modId); - assertClassLoader(modId); + assertCanRead(modId, modName); + assertClassLoader(modId, modName); } System.out.println("Module names reported by JDWP: " + Arrays.toString(jdwpModuleNames.toArray())); @@ -114,14 +121,10 @@ public class AllModulesCommandTest implements DebuggeeLauncher.Listener { } } - private void getModuleName(long modId) throws IOException { - // Send out the JDWP NAME command and store the reply + private String getModuleName(long modId) throws IOException { JdwpModNameReply reply = new JdwpModNameCmd(modId).send(channel); assertReply(reply); - String modName = reply.getModuleName(); - if (modName != null) { // JDWP reports unnamed modules, ignore them - jdwpModuleNames.add(modName); - } + return reply.getModuleName(); } private void assertReply(JdwpReply reply) { @@ -131,19 +134,47 @@ public class AllModulesCommandTest implements DebuggeeLauncher.Listener { } } - private void assertCanRead(long modId) throws IOException { + private void assertCanRead(long modId, String modName) throws IOException { // Simple assert for the CANREAD command JdwpCanReadReply reply = new JdwpCanReadCmd(modId, modId).send(channel); assertReply(reply); - assertTrue(reply.canRead(), "canRead() reports false for reading from the same module"); + assertTrue(reply.canRead(), "canRead() reports false for reading from the same module '" + modName + "', moduleId=" + modId); } - private void assertClassLoader(long modId) throws IOException { - // Simple assert for the CLASSLOADER command + private void assertClassLoader(long modId, String modName) throws IOException { + // Verify that the module classloader id is valid JdwpClassLoaderReply reply = new JdwpClassLoaderCmd(modId).send(channel); assertReply(reply); - long clId = reply.getClassLoaderId(); - assertTrue(clId >= 0, "bad classloader refId " + clId + " for module id " + modId); + long moduleClassLoader = reply.getClassLoaderId(); + assertTrue(moduleClassLoader >= 0, "bad classloader refId " + moduleClassLoader + " for module '" + modName + "', moduleId=" + modId); + + String clsModName = getModuleName(modId); + if ("java.base".equals(clsModName)) { + // For the java.base module, because there will be some loaded classes, we can verify + // that some of the loaded classes do report the java.base module as the module they belong to + assertGetModule(moduleClassLoader, modId); + } + } + + private void assertGetModule(long moduleClassLoader, long modId) throws IOException { + // Get all the visible classes for the module classloader + JdwpVisibleClassesReply visibleClasses = new JdwpVisibleClassesCmd(moduleClassLoader).send(channel); + assertReply(visibleClasses); + + boolean moduleFound = false; + for (long clsId : visibleClasses.getVisibleClasses()) { + // For each visible class get the module the class belongs to + JdwpModuleReply modReply = new JdwpModuleCmd(clsId).send(channel); + assertReply(modReply); + long clsModId = modReply.getModuleId(); + + // At least one of the visible classes should belong to our module + if (modId == clsModId) { + moduleFound = true; + break; + } + } + assertTrue(moduleFound, "None of the visible classes for the classloader of the module " + getModuleName(modId) + " reports the module as its own"); } } diff --git a/hotspot/test/serviceability/jdwp/JdwpCanReadReply.java b/hotspot/test/serviceability/jdwp/JdwpCanReadReply.java index f838baf89b6..438f7410c8d 100644 --- a/hotspot/test/serviceability/jdwp/JdwpCanReadReply.java +++ b/hotspot/test/serviceability/jdwp/JdwpCanReadReply.java @@ -31,7 +31,7 @@ public class JdwpCanReadReply extends JdwpReply { private boolean canRead; protected void parseData(DataInputStream ds) throws IOException { - canRead = ds.read() == 1; + canRead = (ds.read() != 0); } public boolean canRead() { diff --git a/hotspot/test/serviceability/jdwp/JdwpCmd.java b/hotspot/test/serviceability/jdwp/JdwpCmd.java index fe7f28707a8..05dbb6efb7f 100644 --- a/hotspot/test/serviceability/jdwp/JdwpCmd.java +++ b/hotspot/test/serviceability/jdwp/JdwpCmd.java @@ -70,7 +70,6 @@ public abstract class JdwpCmd { } public final T send(JdwpChannel channel) throws IOException { - System.err.println("Sending command: " + this); channel.write(data.array(), HEADER_LEN + getDataLength()); if (reply != null) { reply.initFromStream(channel.getInputStream()); diff --git a/hotspot/test/serviceability/jdwp/JdwpModuleCmd.java b/hotspot/test/serviceability/jdwp/JdwpModuleCmd.java new file mode 100644 index 00000000000..a9ed54419fa --- /dev/null +++ b/hotspot/test/serviceability/jdwp/JdwpModuleCmd.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * The JDWP MODULE command + */ +public class JdwpModuleCmd extends JdwpCmd { + + public JdwpModuleCmd(long refId) { + super(19, 2, JdwpModuleReply.class, refLen()); + putRefId(refId); + } + +} diff --git a/hotspot/test/serviceability/jdwp/JdwpModuleReply.java b/hotspot/test/serviceability/jdwp/JdwpModuleReply.java new file mode 100644 index 00000000000..19baa7a4dde --- /dev/null +++ b/hotspot/test/serviceability/jdwp/JdwpModuleReply.java @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import java.io.DataInputStream; +import java.io.IOException; + +/** + * The reply to the JDWP MODULE command + */ +public class JdwpModuleReply extends JdwpReply { + + private long moduleId; + + protected void parseData(DataInputStream ds) throws IOException { + moduleId = readRefId(ds); + } + + public long getModuleId() { + return moduleId; + } + +} diff --git a/hotspot/test/serviceability/jdwp/JdwpVisibleClassesCmd.java b/hotspot/test/serviceability/jdwp/JdwpVisibleClassesCmd.java new file mode 100644 index 00000000000..daab8a11d6a --- /dev/null +++ b/hotspot/test/serviceability/jdwp/JdwpVisibleClassesCmd.java @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * The JDWP VISIBLE CLASSES command + */ +public class JdwpVisibleClassesCmd extends JdwpCmd { + + public JdwpVisibleClassesCmd(long classLoaderId) { + super(1, 14, JdwpVisibleClassesReply.class, refLen()); + putRefId(classLoaderId); + } + +} diff --git a/hotspot/test/serviceability/jdwp/JdwpVisibleClassesReply.java b/hotspot/test/serviceability/jdwp/JdwpVisibleClassesReply.java new file mode 100644 index 00000000000..5381c43c51a --- /dev/null +++ b/hotspot/test/serviceability/jdwp/JdwpVisibleClassesReply.java @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import java.io.DataInputStream; +import java.io.IOException; +import java.util.Arrays; + +/** + * The reply to the JDWP VISIBLE CLASSES command + */ +public class JdwpVisibleClassesReply extends JdwpReply { + + private long[] visibleClasses; + + protected void parseData(DataInputStream ds) throws IOException { + int numOfClasses = ds.readInt(); + visibleClasses = new long[numOfClasses]; + for (int i = 0; i < numOfClasses; ++i) { + byte type = ds.readByte(); + long refId = readRefId(ds); + visibleClasses[i] = refId; + } + } + + public long[] getVisibleClasses() { + return Arrays.copyOf(visibleClasses, visibleClasses.length); + } + +} diff --git a/hotspot/test/serviceability/sa/LingeredAppWithInvokeDynamic.java b/hotspot/test/serviceability/sa/LingeredAppWithInvokeDynamic.java new file mode 100644 index 00000000000..4d1c2cc393e --- /dev/null +++ b/hotspot/test/serviceability/sa/LingeredAppWithInvokeDynamic.java @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import jdk.test.lib.apps.LingeredApp; + +interface TestComparator { + public boolean compare(int a1, int a2); +} + +public class LingeredAppWithInvokeDynamic extends LingeredApp { + public static void main(String args[]) { + Runnable r1 = () -> System.out.println("Hello"); + Runnable r2 = () -> System.out.println("Hello Hello"); + r1.run(); + r2.run(); + TestComparator testComparator = (int a1, int a2) -> {return (a1 > a2);}; + boolean result = testComparator.compare(2, 5); + System.out.println(result); + LingeredApp.main(args); + } + } diff --git a/hotspot/test/serviceability/sa/TestCpoolForInvokeDynamic.java b/hotspot/test/serviceability/sa/TestCpoolForInvokeDynamic.java new file mode 100644 index 00000000000..e8f1fd86703 --- /dev/null +++ b/hotspot/test/serviceability/sa/TestCpoolForInvokeDynamic.java @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import java.util.ArrayList; +import java.util.List; + +import sun.jvm.hotspot.HotSpotAgent; +import sun.jvm.hotspot.utilities.SystemDictionaryHelper; +import sun.jvm.hotspot.oops.InstanceKlass; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.oops.Method; +import sun.jvm.hotspot.utilities.MethodArray; +import sun.jvm.hotspot.ui.classbrowser.HTMLGenerator; + +import jdk.test.lib.apps.LingeredApp; +import jdk.test.lib.JDKToolLauncher; +import jdk.test.lib.JDKToolFinder; +import jdk.test.lib.Platform; +import jdk.test.lib.process.ProcessTools; +import jdk.test.lib.process.OutputAnalyzer; +import jdk.test.lib.Utils; +import jdk.test.lib.Asserts; + +/* + * @test + * @library /test/lib + * @requires os.family != "mac" + * @modules java.base/jdk.internal.misc + * jdk.hotspot.agent/sun.jvm.hotspot + * jdk.hotspot.agent/sun.jvm.hotspot.utilities + * jdk.hotspot.agent/sun.jvm.hotspot.oops + * jdk.hotspot.agent/sun.jvm.hotspot.debugger + * jdk.hotspot.agent/sun.jvm.hotspot.ui.classbrowser + * @ignore 8169232 + * @run main/othervm TestCpoolForInvokeDynamic + */ + +public class TestCpoolForInvokeDynamic { + + private static LingeredAppWithInvokeDynamic theApp = null; + + private static void printBytecodes(String pid, + String[] instanceKlassNames) { + HotSpotAgent agent = new HotSpotAgent(); + try { + agent.attach(Integer.parseInt(pid)); + } + catch (DebuggerException e) { + System.out.println(e.getMessage()); + System.err.println("Unable to connect to process ID: " + pid); + + agent.detach(); + e.printStackTrace(); + } + + for (String instanceKlassName : instanceKlassNames) { + InstanceKlass iKlass = SystemDictionaryHelper.findInstanceKlass(instanceKlassName); + MethodArray methods = iKlass.getMethods(); + for (int i = 0; i < methods.length(); i++) { + Method m = methods.at(i); + System.out.println("Method: " + m.getName().asString() + + " in instance klass: " + instanceKlassName); + HTMLGenerator gen = new HTMLGenerator(false); + System.out.println(gen.genHTML(m)); + } + } + agent.detach(); + } + + private static void createAnotherToAttach( + String[] instanceKlassNames, + long lingeredAppPid) throws Exception { + + String[] toolArgs = { + "--add-modules=jdk.hotspot.agent", + "--add-exports=jdk.hotspot.agent/sun.jvm.hotspot=ALL-UNNAMED", + "--add-exports=jdk.hotspot.agent/sun.jvm.hotspot.utilities=ALL-UNNAMED", + "--add-exports=jdk.hotspot.agent/sun.jvm.hotspot.oops=ALL-UNNAMED", + "--add-exports=jdk.hotspot.agent/sun.jvm.hotspot.debugger=ALL-UNNAMED", + "--add-exports=jdk.hotspot.agent/sun.jvm.hotspot.ui.classbrowser=ALL-UNNAMED", + "TestCpoolForInvokeDynamic", + Long.toString(lingeredAppPid) + }; + + // Start a new process to attach to the lingered app + ProcessBuilder processBuilder = ProcessTools.createJavaProcessBuilder(toolArgs); + OutputAnalyzer SAOutput = ProcessTools.executeProcess(processBuilder); + SAOutput.shouldHaveExitValue(0); + System.out.println(SAOutput.getOutput()); + + SAOutput.shouldContain("invokedynamic"); + SAOutput.shouldContain("Name and Type"); + SAOutput.shouldContain("run:()Ljava.lang.Runnable"); + SAOutput.shouldContain("compare:()LTestComparator"); + SAOutput.shouldNotContain("Corrupted constant pool"); + } + + public static void main (String... args) throws Exception { + + String[] instanceKlassNames = new String[] { + "LingeredAppWithInvokeDynamic" + }; + + if (!Platform.shouldSAAttach()) { + System.out.println( + "SA attach not expected to work - test skipped."); + return; + } + + if (args == null || args.length == 0) { + try { + List vmArgs = new ArrayList(); + vmArgs.add("-XX:+UsePerfData"); + vmArgs.addAll(Utils.getVmOptions()); + + theApp = new LingeredAppWithInvokeDynamic(); + LingeredApp.startApp(vmArgs, theApp); + createAnotherToAttach(instanceKlassNames, + theApp.getPid()); + } finally { + LingeredApp.stopApp(theApp); + } + } else { + printBytecodes(args[0], instanceKlassNames); + } + } +} diff --git a/hotspot/test/serviceability/sa/TestInstanceKlassSize.java b/hotspot/test/serviceability/sa/TestInstanceKlassSize.java index dd8deda1612..4af527eaa46 100644 --- a/hotspot/test/serviceability/sa/TestInstanceKlassSize.java +++ b/hotspot/test/serviceability/sa/TestInstanceKlassSize.java @@ -98,7 +98,6 @@ public class TestInstanceKlassSize { OutputAnalyzer output = null; try { List vmArgs = new ArrayList(); - vmArgs.add("-XX:+UnlockDiagnosticVMOptions"); vmArgs.add("-XX:+UsePerfData"); vmArgs.addAll(Utils.getVmOptions()); app = LingeredApp.startApp(vmArgs); diff --git a/hotspot/test/serviceability/sa/TestInstanceKlassSizeForInterface.java b/hotspot/test/serviceability/sa/TestInstanceKlassSizeForInterface.java index 0bd8bc38b23..f6a9c3f0bf8 100644 --- a/hotspot/test/serviceability/sa/TestInstanceKlassSizeForInterface.java +++ b/hotspot/test/serviceability/sa/TestInstanceKlassSizeForInterface.java @@ -45,7 +45,6 @@ import jdk.test.lib.Asserts; * --add-exports=jdk.hotspot.agent/sun.jvm.hotspot.debugger=ALL-UNNAMED * TestInstanceKlassSizeForInterface.java * @run main/othervm - * -XX:+UnlockDiagnosticVMOptions * --add-modules=jdk.hotspot.agent * --add-exports=jdk.hotspot.agent/sun.jvm.hotspot=ALL-UNNAMED * --add-exports=jdk.hotspot.agent/sun.jvm.hotspot.utilities=ALL-UNNAMED diff --git a/hotspot/test/serviceability/sa/jmap-hprof/JMapHProfLargeHeapTest.java b/hotspot/test/serviceability/sa/jmap-hprof/JMapHProfLargeHeapTest.java index 3469bdf6315..d2e64f75d34 100644 --- a/hotspot/test/serviceability/sa/jmap-hprof/JMapHProfLargeHeapTest.java +++ b/hotspot/test/serviceability/sa/jmap-hprof/JMapHProfLargeHeapTest.java @@ -42,9 +42,6 @@ import jdk.test.lib.process.ProcessTools; * @bug 6313383 * @key regression * @summary Regression test for hprof export issue due to large heaps (>2G) - * Started failing on 2016.06.24 due to 8160376 on MacOS X so quarantine - * it on that platform: - * @requires os.family != "mac" * @library /test/lib * @modules java.base/jdk.internal.misc * java.compiler diff --git a/hotspot/test/serviceability/tmtools/jstat/GcCauseTest02.java b/hotspot/test/serviceability/tmtools/jstat/GcCauseTest02.java index 0e886ffe1b1..56ef9387cf8 100644 --- a/hotspot/test/serviceability/tmtools/jstat/GcCauseTest02.java +++ b/hotspot/test/serviceability/tmtools/jstat/GcCauseTest02.java @@ -30,6 +30,7 @@ * @modules java.base/jdk.internal.misc * @library /test/lib * @library ../share + * @ignore 8168396 * @run main/othervm -XX:+UsePerfData -Xmx128M -XX:MaxMetaspaceSize=128M GcCauseTest02 */ import utils.*; diff --git a/hotspot/test/serviceability/tmtools/jstat/GcTest02.java b/hotspot/test/serviceability/tmtools/jstat/GcTest02.java index 0c022358edc..c46b4ad5b5d 100644 --- a/hotspot/test/serviceability/tmtools/jstat/GcTest02.java +++ b/hotspot/test/serviceability/tmtools/jstat/GcTest02.java @@ -31,7 +31,7 @@ import utils.*; * @modules java.base/jdk.internal.misc * @library /test/lib * @library ../share - * @ignore 8155570 + * @ignore 8168396 * @run main/othervm -XX:+UsePerfData -Xmx128M -XX:MaxMetaspaceSize=128M GcTest02 */ diff --git a/hotspot/test/serviceability/tmtools/jstat/utils/GcProvokerImpl.java b/hotspot/test/serviceability/tmtools/jstat/utils/GcProvokerImpl.java index f04cdf6a375..565d86cf377 100644 --- a/hotspot/test/serviceability/tmtools/jstat/utils/GcProvokerImpl.java +++ b/hotspot/test/serviceability/tmtools/jstat/utils/GcProvokerImpl.java @@ -50,7 +50,7 @@ public class GcProvokerImpl implements GcProvoker { used += memoryChunk; } catch (OutOfMemoryError e) { list = null; - throw new RuntimeException("Unexpected OOME while eating " + targetUsage + " of heap memory."); + throw new RuntimeException("Unexpected OOME '" + e.getMessage() + "' while eating " + targetUsage + " of heap memory."); } } return list; @@ -73,8 +73,10 @@ public class GcProvokerImpl implements GcProvoker { @Override public void eatMetaspaceAndHeap(float targetMemoryUsagePercent) { - eatenMemory = eatHeapMemory(targetMemoryUsagePercent); + // Metaspace should be filled before Java Heap to prevent unexpected OOME + // in the Java Heap while filling Metaspace eatenMetaspace = eatMetaspace(targetMemoryUsagePercent); + eatenMemory = eatHeapMemory(targetMemoryUsagePercent); } private static List eatMetaspace(float targetUsage) { @@ -97,7 +99,7 @@ public class GcProvokerImpl implements GcProvoker { list.add(gp.create(0)); } catch (OutOfMemoryError oome) { list = null; - throw new RuntimeException("Unexpected OOME while eating " + targetUsage + " of Metaspace."); + throw new RuntimeException("Unexpected OOME '" + oome.getMessage() + "' while eating " + targetUsage + " of Metaspace."); } MemoryUsage memoryUsage = metaspacePool.getUsage(); currentUsage = (((float) memoryUsage.getUsed()) / memoryUsage.getMax());