This commit is contained in:
Jesper Wilhelmsson 2017-10-14 00:19:29 +02:00
commit 947fc09db0
344 changed files with 10893 additions and 3764 deletions

View File

@ -113,6 +113,7 @@ PLATFORM_MODULES += \
jdk.dynalink \
jdk.httpserver \
jdk.incubator.httpclient \
jdk.internal.vm.compiler.management \
jdk.jsobject \
jdk.localedata \
jdk.naming.dns \
@ -215,6 +216,7 @@ endif
ifeq ($(INCLUDE_GRAAL), false)
MODULES_FILTER += jdk.internal.vm.compiler
MODULES_FILTER += jdk.internal.vm.compiler.management
endif
################################################################################

View File

@ -1063,7 +1063,7 @@ var getJibProfilesDependencies = function (input, common) {
jtreg: {
server: "javare",
revision: "4.2",
build_number: "b08",
build_number: "b09",
checksum_file: "MD5_VALUES",
file: "jtreg_bin-4.2.zip",
environment_name: "JT_HOME",

View File

@ -54,15 +54,4 @@ $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/module/ModuleLoaderMap.java:
GENSRC_JAVA_BASE += $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/module/ModuleLoaderMap.java
$(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat: \
$(TOPDIR)/src/java.base/share/classes/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat \
$(VARDEPS_FILE) $(BUILD_TOOLS_JDK)
$(MKDIR) -p $(@D)
$(RM) $@ $@.tmp
$(TOOL_GENCLASSLOADERMAP) -boot $(BOOT_MODULES_LIST) \
-platform $(PLATFORM_MODULES_LIST) -o $@.tmp $<
$(MV) $@.tmp $@
GENSRC_JAVA_BASE += $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat
################################################################################

View File

@ -47,6 +47,9 @@ endif
ifeq ($(call check-jvm-feature, zero), true)
JVM_CFLAGS_FEATURES += -DZERO -DCC_INTERP -DZERO_LIBARCH='"$(OPENJDK_TARGET_CPU_LEGACY_LIB)"' $(LIBFFI_CFLAGS)
JVM_LIBS_FEATURES += $(LIBFFI_LIBS)
ifeq ($(OPENJDK_TARGET_CPU), sparcv9)
BUILD_LIBJVM_EXTRA_FILES := $(TOPDIR)/src/hotspot/cpu/sparc/memset_with_concurrent_readers_sparc.cpp
endif
endif
ifeq ($(call check-jvm-feature, shark), true)

View File

@ -77,30 +77,22 @@ public class GenModuleLoaderMap {
throw new IllegalArgumentException(source + " not exist");
}
boolean needsQuotes = outfile.toString().contains(".java.tmp");
try (BufferedWriter bw = Files.newBufferedWriter(outfile, StandardCharsets.UTF_8);
PrintWriter writer = new PrintWriter(bw)) {
for (String line : Files.readAllLines(source)) {
if (line.contains("@@BOOT_MODULE_NAMES@@")) {
line = patch(line, "@@BOOT_MODULE_NAMES@@", bootModules, needsQuotes);
line = patch(line, "@@BOOT_MODULE_NAMES@@", bootModules);
} else if (line.contains("@@PLATFORM_MODULE_NAMES@@")) {
line = patch(line, "@@PLATFORM_MODULE_NAMES@@", platformModules, needsQuotes);
line = patch(line, "@@PLATFORM_MODULE_NAMES@@", platformModules);
}
writer.println(line);
}
}
}
private static String patch(String s, String tag, Stream<String> stream, boolean needsQuotes) {
String mns = null;
if (needsQuotes) {
mns = stream.sorted()
.collect(Collectors.joining("\",\n \""));
} else {
mns = stream.sorted()
.collect(Collectors.joining("\n"));
}
private static String patch(String s, String tag, Stream<String> stream) {
String mns = stream.sorted()
.collect(Collectors.joining("\",\n \""));
return s.replace(tag, mns);
}

View File

@ -59,6 +59,7 @@ BUILD_HOTSPOT_JTREG_NATIVE_SRC += \
$(TOPDIR)/test/hotspot/jtreg/runtime/SameObject \
$(TOPDIR)/test/hotspot/jtreg/runtime/BoolReturn \
$(TOPDIR)/test/hotspot/jtreg/runtime/noClassDefFoundMsg \
$(TOPDIR)/test/hotspot/jtreg/runtime/RedefineTests \
$(TOPDIR)/test/hotspot/jtreg/compiler/floatingpoint/ \
$(TOPDIR)/test/hotspot/jtreg/compiler/calls \
$(TOPDIR)/test/hotspot/jtreg/serviceability/jvmti/GetOwnedMonitorInfo \
@ -103,6 +104,7 @@ ifeq ($(TOOLCHAIN_TYPE), solstudio)
BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libMAAClassLoadPrepare := -lc
BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libMAAThreadStart := -lc
BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libAllowedFunctions := -lc
BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libRedefineDoubleDelete := -lc
endif
ifeq ($(OPENJDK_TARGET_OS), linux)

View File

@ -70,7 +70,7 @@
</toolChain>
</folderInfo>
<sourceEntries>
<entry excluding="cpu/vm/templateTable_x86_32.cpp|cpu/vm/templateInterpreter_x86_32.cpp|cpu/vm/stubRoutines_x86_32.cpp|cpu/vm/stubGenerator_x86_32.cpp|cpu/vm/sharedRuntime_x86_32.cpp|cpu/vm/jniFastGetField_x86_32.cpp|cpu/vm/interpreterRT_x86_32.cpp|cpu/vm/interpreter_x86_32.cpp|cpu/vm/interp_masm_x86_32.cpp|cpu/vm/vtableStubs_x86_32.cpp" flags="VALUE_WORKSPACE_PATH" kind="sourcePath" name=""/>
<entry excluding="cpu/x86/templateTable_x86_32.cpp|cpu/x86/templateInterpreter_x86_32.cpp|cpu/x86/stubRoutines_x86_32.cpp|cpu/x86/stubGenerator_x86_32.cpp|cpu/x86/sharedRuntime_x86_32.cpp|cpu/x86/jniFastGetField_x86_32.cpp|cpu/x86/interpreterRT_x86_32.cpp|cpu/x86/interpreter_x86_32.cpp|cpu/x86/interp_masm_x86_32.cpp|cpu/x86/vtableStubs_x86_32.cpp" flags="VALUE_WORKSPACE_PATH" kind="sourcePath" name=""/>
</sourceEntries>
</configuration>
</storageModule>

View File

@ -256,14 +256,10 @@ class HotSpotProject(mx.NativeProject):
"""
roots = [
'ASSEMBLY_EXCEPTION',
'LICENSE',
'README',
'THIRD_PARTY_README',
'agent',
'make',
'src',
'test'
'cpu',
'os',
'os_cpu',
'share'
]
for jvmVariant in _jdkJvmVariants:
@ -605,6 +601,16 @@ def _get_openjdk_cpu():
def _get_openjdk_os_cpu():
return _get_openjdk_os() + '-' + _get_openjdk_cpu()
def _get_jdk_dir():
suiteParentDir = dirname(_suite.dir)
# suitParentDir is now something like: /some_prefix/jdk10-hs/open/src
pathComponents = suiteParentDir.split(os.sep)
for i in range(0, len(pathComponents)):
if pathComponents[i] in ["open", "src"]:
del pathComponents[i:]
break
return os.path.join(os.sep, *pathComponents)
def _get_jdk_build_dir(debugLevel=None):
"""
Gets the directory into which the JDK is built. This directory contains
@ -613,7 +619,7 @@ def _get_jdk_build_dir(debugLevel=None):
if debugLevel is None:
debugLevel = _vm.debugLevel
name = '{}-{}-{}-{}'.format(_get_openjdk_os_cpu(), 'normal', _vm.jvmVariant, debugLevel)
return join(dirname(_suite.dir), 'build', name)
return join(_get_jdk_dir(), 'build', name)
_jvmci_bootclasspath_prepends = []

View File

@ -24,9 +24,7 @@ suite = {
"defaultLicense" : "GPLv2-CPE",
# This puts mx/ as a sibling of the JDK build configuration directories
# (e.g., macosx-x86_64-normal-server-release).
"outputRoot" : "../build/mx/hotspot",
"outputRoot" : "../../build/mx/hotspot",
# ------------- Libraries -------------
@ -43,7 +41,7 @@ suite = {
# ------------- JVMCI:Service -------------
"jdk.vm.ci.services" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"javaCompliance" : "9",
"workingSets" : "API,JVMCI",
@ -52,7 +50,7 @@ suite = {
# ------------- JVMCI:API -------------
"jdk.vm.ci.common" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"checkstyle" : "jdk.vm.ci.services",
"javaCompliance" : "9",
@ -60,7 +58,7 @@ suite = {
},
"jdk.vm.ci.meta" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"checkstyle" : "jdk.vm.ci.services",
"javaCompliance" : "9",
@ -68,7 +66,7 @@ suite = {
},
"jdk.vm.ci.code" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : ["jdk.vm.ci.meta"],
"checkstyle" : "jdk.vm.ci.services",
@ -77,7 +75,7 @@ suite = {
},
"jdk.vm.ci.code.test" : {
"subDir" : "test/compiler/jvmci",
"subDir" : "../../test/hotspot/jtreg/compiler/jvmci",
"sourceDirs" : ["src"],
"dependencies" : [
"mx:JUNIT",
@ -92,7 +90,7 @@ suite = {
},
"jdk.vm.ci.runtime" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : [
"jdk.vm.ci.code",
@ -104,7 +102,7 @@ suite = {
},
"jdk.vm.ci.runtime.test" : {
"subDir" : "test/compiler/jvmci",
"subDir" : "../../test/hotspot/jtreg/compiler/jvmci",
"sourceDirs" : ["src"],
"dependencies" : [
"mx:JUNIT",
@ -119,7 +117,7 @@ suite = {
# ------------- JVMCI:HotSpot -------------
"jdk.vm.ci.aarch64" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : ["jdk.vm.ci.code"],
"checkstyle" : "jdk.vm.ci.services",
@ -128,7 +126,7 @@ suite = {
},
"jdk.vm.ci.amd64" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : ["jdk.vm.ci.code"],
"checkstyle" : "jdk.vm.ci.services",
@ -137,7 +135,7 @@ suite = {
},
"jdk.vm.ci.sparc" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : ["jdk.vm.ci.code"],
"checkstyle" : "jdk.vm.ci.services",
@ -146,7 +144,7 @@ suite = {
},
"jdk.vm.ci.hotspot" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : [
"jdk.vm.ci.common",
@ -163,7 +161,7 @@ suite = {
},
"jdk.vm.ci.hotspot.test" : {
"subDir" : "test/compiler/jvmci",
"subDir" : "../../test/hotspot/jtreg/compiler/jvmci",
"sourceDirs" : ["src"],
"dependencies" : [
"TESTNG",
@ -175,7 +173,7 @@ suite = {
},
"jdk.vm.ci.hotspot.aarch64" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : [
"jdk.vm.ci.aarch64",
@ -187,7 +185,7 @@ suite = {
},
"jdk.vm.ci.hotspot.amd64" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : [
"jdk.vm.ci.amd64",
@ -199,7 +197,7 @@ suite = {
},
"jdk.vm.ci.hotspot.sparc" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : [
"jdk.vm.ci.sparc",
@ -221,12 +219,12 @@ suite = {
# ------------- Distributions -------------
"JVMCI_SERVICES" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"dependencies" : ["jdk.vm.ci.services"],
},
"JVMCI_API" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"dependencies" : [
"jdk.vm.ci.runtime",
"jdk.vm.ci.common",
@ -240,7 +238,7 @@ suite = {
},
"JVMCI_HOTSPOT" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"dependencies" : [
"jdk.vm.ci.hotspot.aarch64",
"jdk.vm.ci.hotspot.amd64",
@ -253,7 +251,7 @@ suite = {
},
"JVMCI_TEST" : {
"subDir" : "test/compiler/jvmci",
"subDir" : "../../test/hotspot/jtreg/compiler/jvmci",
"dependencies" : [
"jdk.vm.ci.runtime.test",
],

View File

@ -2840,6 +2840,44 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi
bind(L_done);
}
// Code for BigInteger::mulAdd instrinsic
// out = r0
// in = r1
// offset = r2 (already out.length-offset)
// len = r3
// k = r4
//
// pseudo code from java implementation:
// carry = 0;
// offset = out.length-offset - 1;
// for (int j=len-1; j >= 0; j--) {
// product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
// out[offset--] = (int)product;
// carry = product >>> 32;
// }
// return (int)carry;
void MacroAssembler::mul_add(Register out, Register in, Register offset,
Register len, Register k) {
Label LOOP, END;
// pre-loop
cmp(len, zr); // cmp, not cbz/cbnz: to use condition twice => less branches
csel(out, zr, out, Assembler::EQ);
br(Assembler::EQ, END);
add(in, in, len, LSL, 2); // in[j+1] address
add(offset, out, offset, LSL, 2); // out[offset + 1] address
mov(out, zr); // used to keep carry now
BIND(LOOP);
ldrw(rscratch1, Address(pre(in, -4)));
madd(rscratch1, rscratch1, k, out);
ldrw(rscratch2, Address(pre(offset, -4)));
add(rscratch1, rscratch1, rscratch2);
strw(rscratch1, Address(offset));
lsr(out, rscratch1, 32);
subs(len, len, 1);
br(Assembler::NE, LOOP);
BIND(END);
}
/**
* Emits code to update CRC-32 with a byte value according to constants in table
*
@ -3291,6 +3329,7 @@ void MacroAssembler::load_mirror(Register dst, Register method) {
ldr(dst, Address(dst, ConstMethod::constants_offset()));
ldr(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes()));
ldr(dst, Address(dst, mirror_offset));
resolve_oop_handle(dst);
}
void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) {

View File

@ -1265,6 +1265,7 @@ public:
void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z,
Register zlen, Register tmp1, Register tmp2, Register tmp3,
Register tmp4, Register tmp5, Register tmp6, Register tmp7);
void mul_add(Register out, Register in, Register offs, Register len, Register k);
// ISB may be needed because of a safepoint
void maybe_isb() { isb(); }

View File

@ -3607,6 +3607,63 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
address generate_squareToLen() {
// squareToLen algorithm for sizes 1..127 described in java code works
// faster than multiply_to_len on some CPUs and slower on others, but
// multiply_to_len shows a bit better overall results
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "squareToLen");
address start = __ pc();
const Register x = r0;
const Register xlen = r1;
const Register z = r2;
const Register zlen = r3;
const Register y = r4; // == x
const Register ylen = r5; // == xlen
const Register tmp1 = r10;
const Register tmp2 = r11;
const Register tmp3 = r12;
const Register tmp4 = r13;
const Register tmp5 = r14;
const Register tmp6 = r15;
const Register tmp7 = r16;
RegSet spilled_regs = RegSet::of(y, ylen);
BLOCK_COMMENT("Entry:");
__ enter();
__ push(spilled_regs, sp);
__ mov(y, x);
__ mov(ylen, xlen);
__ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
__ pop(spilled_regs, sp);
__ leave();
__ ret(lr);
return start;
}
address generate_mulAdd() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "mulAdd");
address start = __ pc();
const Register out = r0;
const Register in = r1;
const Register offset = r2;
const Register len = r3;
const Register k = r4;
BLOCK_COMMENT("Entry:");
__ enter();
__ mul_add(out, in, offset, len, k);
__ leave();
__ ret(lr);
return start;
}
void ghash_multiply(FloatRegister result_lo, FloatRegister result_hi,
FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0,
FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3, FloatRegister tmp4) {
@ -4913,6 +4970,14 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_multiplyToLen = generate_multiplyToLen();
}
if (UseSquareToLenIntrinsic) {
StubRoutines::_squareToLen = generate_squareToLen();
}
if (UseMulAddIntrinsic) {
StubRoutines::_mulAdd = generate_mulAdd();
}
if (UseMontgomeryMultiplyIntrinsic) {
StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);

View File

@ -2297,6 +2297,7 @@ void TemplateTable::load_field_cp_cache_entry(Register obj,
ConstantPoolCacheEntry::f1_offset())));
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ ldr(obj, Address(obj, mirror_offset));
__ resolve_oop_handle(obj);
}
}

View File

@ -340,6 +340,14 @@ void VM_Version::get_processor_features() {
UseMultiplyToLenIntrinsic = true;
}
if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
UseSquareToLenIntrinsic = true;
}
if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
UseMulAddIntrinsic = true;
}
if (FLAG_IS_DEFAULT(UseBarriersForVolatile)) {
UseBarriersForVolatile = (_features & CPU_DMB_ATOMICS) != 0;
}

View File

@ -2899,6 +2899,7 @@ void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp)
ldr(tmp, Address(tmp, ConstMethod::constants_offset()));
ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes()));
ldr(mirror, Address(tmp, mirror_offset));
resolve_oop_handle(mirror);
}

View File

@ -2963,6 +2963,7 @@ void TemplateTable::load_field_cp_cache_entry(Register Rcache,
cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ ldr(Robj, Address(Robj, mirror_offset));
__ resolve_oop_handle(Robj);
}
}

View File

@ -517,6 +517,9 @@ class Assembler : public AbstractAssembler {
XXPERMDI_OPCODE= (60u << OPCODE_SHIFT | 10u << 3),
XXMRGHW_OPCODE = (60u << OPCODE_SHIFT | 18u << 3),
XXMRGLW_OPCODE = (60u << OPCODE_SHIFT | 50u << 3),
XXSPLTW_OPCODE = (60u << OPCODE_SHIFT | 164u << 2),
XXLXOR_OPCODE = (60u << OPCODE_SHIFT | 154u << 3),
XXLEQV_OPCODE = (60u << OPCODE_SHIFT | 186u << 3),
// Vector Permute and Formatting
VPKPX_OPCODE = (4u << OPCODE_SHIFT | 782u ),
@ -1125,6 +1128,7 @@ class Assembler : public AbstractAssembler {
static int vsplti_sim(int x) { return opp_u_field(x, 15, 11); } // for vsplti* instructions
static int vsldoi_shb(int x) { return opp_u_field(x, 25, 22); } // for vsldoi instruction
static int vcmp_rc( int x) { return opp_u_field(x, 21, 21); } // for vcmp* instructions
static int xxsplt_uim(int x) { return opp_u_field(x, 15, 14); } // for xxsplt* instructions
//static int xo1( int x) { return opp_u_field(x, 29, 21); }// is contained in our opcodes
//static int xo2( int x) { return opp_u_field(x, 30, 21); }// is contained in our opcodes
@ -1308,6 +1312,7 @@ class Assembler : public AbstractAssembler {
inline void li( Register d, int si16);
inline void lis( Register d, int si16);
inline void addir(Register d, int si16, Register a);
inline void subi( Register d, Register a, int si16);
static bool is_addi(int x) {
return ADDI_OPCODE == (x & ADDI_OPCODE_MASK);
@ -2154,6 +2159,11 @@ class Assembler : public AbstractAssembler {
inline void xxpermdi( VectorSRegister d, VectorSRegister a, VectorSRegister b, int dm);
inline void xxmrghw( VectorSRegister d, VectorSRegister a, VectorSRegister b);
inline void xxmrglw( VectorSRegister d, VectorSRegister a, VectorSRegister b);
inline void mtvsrd( VectorSRegister d, Register a);
inline void mtvsrwz( VectorSRegister d, Register a);
inline void xxspltw( VectorSRegister d, VectorSRegister b, int ui2);
inline void xxlxor( VectorSRegister d, VectorSRegister a, VectorSRegister b);
inline void xxleqv( VectorSRegister d, VectorSRegister a, VectorSRegister b);
// VSX Extended Mnemonics
inline void xxspltd( VectorSRegister d, VectorSRegister a, int x);
@ -2174,7 +2184,8 @@ class Assembler : public AbstractAssembler {
inline void vsbox( VectorRegister d, VectorRegister a);
// SHA (introduced with Power 8)
// Not yet implemented.
inline void vshasigmad(VectorRegister d, VectorRegister a, bool st, int six);
inline void vshasigmaw(VectorRegister d, VectorRegister a, bool st, int six);
// Vector Binary Polynomial Multiplication (introduced with Power 8)
inline void vpmsumb( VectorRegister d, VectorRegister a, VectorRegister b);
@ -2285,6 +2296,11 @@ class Assembler : public AbstractAssembler {
inline void lvsl( VectorRegister d, Register s2);
inline void lvsr( VectorRegister d, Register s2);
// Endianess specific concatenation of 2 loaded vectors.
inline void load_perm(VectorRegister perm, Register addr);
inline void vec_perm(VectorRegister first_dest, VectorRegister second, VectorRegister perm);
inline void vec_perm(VectorRegister dest, VectorRegister first, VectorRegister second, VectorRegister perm);
// RegisterOrConstant versions.
// These emitters choose between the versions using two registers and
// those with register and immediate, depending on the content of roc.

View File

@ -164,6 +164,7 @@ inline void Assembler::divwo_( Register d, Register a, Register b) { emit_int32
inline void Assembler::li( Register d, int si16) { Assembler::addi_r0ok( d, R0, si16); }
inline void Assembler::lis( Register d, int si16) { Assembler::addis_r0ok(d, R0, si16); }
inline void Assembler::addir(Register d, int si16, Register a) { Assembler::addi(d, a, si16); }
inline void Assembler::subi( Register d, Register a, int si16) { Assembler::addi(d, a, -si16); }
// PPC 1, section 3.3.9, Fixed-Point Compare Instructions
inline void Assembler::cmpi( ConditionRegister f, int l, Register a, int si16) { emit_int32( CMPI_OPCODE | bf(f) | l10(l) | ra(a) | simm(si16,16)); }
@ -760,9 +761,14 @@ inline void Assembler::lvsr( VectorRegister d, Register s1, Register s2) { emit
// Vector-Scalar (VSX) instructions.
inline void Assembler::lxvd2x( VectorSRegister d, Register s1) { emit_int32( LXVD2X_OPCODE | vsrt(d) | ra(0) | rb(s1)); }
inline void Assembler::lxvd2x( VectorSRegister d, Register s1, Register s2) { emit_int32( LXVD2X_OPCODE | vsrt(d) | ra0mem(s1) | rb(s2)); }
inline void Assembler::stxvd2x( VectorSRegister d, Register s1) { emit_int32( STXVD2X_OPCODE | vsrt(d) | ra(0) | rb(s1)); }
inline void Assembler::stxvd2x( VectorSRegister d, Register s1, Register s2) { emit_int32( STXVD2X_OPCODE | vsrt(d) | ra0mem(s1) | rb(s2)); }
inline void Assembler::mtvrd( VectorRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
inline void Assembler::stxvd2x( VectorSRegister d, Register s1) { emit_int32( STXVD2X_OPCODE | vsrs(d) | ra(0) | rb(s1)); }
inline void Assembler::stxvd2x( VectorSRegister d, Register s1, Register s2) { emit_int32( STXVD2X_OPCODE | vsrs(d) | ra0mem(s1) | rb(s2)); }
inline void Assembler::mtvsrd( VectorSRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vsrt(d) | ra(a)); }
inline void Assembler::mtvsrwz( VectorSRegister d, Register a) { emit_int32( MTVSRWZ_OPCODE | vsrt(d) | ra(a)); }
inline void Assembler::xxspltw( VectorSRegister d, VectorSRegister b, int ui2) { emit_int32( XXSPLTW_OPCODE | vsrt(d) | vsrb(b) | xxsplt_uim(uimm(ui2,2))); }
inline void Assembler::xxlxor( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XXLXOR_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
inline void Assembler::xxleqv( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XXLEQV_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
inline void Assembler::mtvrd( VectorRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
inline void Assembler::mfvrd( Register a, VectorRegister d) { emit_int32( MFVSRD_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
inline void Assembler::mtvrwz( VectorRegister d, Register a) { emit_int32( MTVSRWZ_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
inline void Assembler::mfvrwz( Register a, VectorRegister d) { emit_int32( MFVSRWZ_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
@ -925,7 +931,8 @@ inline void Assembler::vncipherlast(VectorRegister d, VectorRegister a, VectorRe
inline void Assembler::vsbox( VectorRegister d, VectorRegister a) { emit_int32( VSBOX_OPCODE | vrt(d) | vra(a) ); }
// SHA (introduced with Power 8)
// Not yet implemented.
inline void Assembler::vshasigmad(VectorRegister d, VectorRegister a, bool st, int six) { emit_int32( VSHASIGMAD_OPCODE | vrt(d) | vra(a) | vst(st) | vsix(six)); }
inline void Assembler::vshasigmaw(VectorRegister d, VectorRegister a, bool st, int six) { emit_int32( VSHASIGMAW_OPCODE | vrt(d) | vra(a) | vst(st) | vsix(six)); }
// Vector Binary Polynomial Multiplication (introduced with Power 8)
inline void Assembler::vpmsumb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPMSUMB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
@ -1034,6 +1041,30 @@ inline void Assembler::stvxl( VectorRegister d, Register s2) { emit_int32( STVXL
inline void Assembler::lvsl( VectorRegister d, Register s2) { emit_int32( LVSL_OPCODE | vrt(d) | rb(s2)); }
inline void Assembler::lvsr( VectorRegister d, Register s2) { emit_int32( LVSR_OPCODE | vrt(d) | rb(s2)); }
inline void Assembler::load_perm(VectorRegister perm, Register addr) {
#if defined(VM_LITTLE_ENDIAN)
lvsr(perm, addr);
#else
lvsl(perm, addr);
#endif
}
inline void Assembler::vec_perm(VectorRegister first_dest, VectorRegister second, VectorRegister perm) {
#if defined(VM_LITTLE_ENDIAN)
vperm(first_dest, second, first_dest, perm);
#else
vperm(first_dest, first_dest, second, perm);
#endif
}
inline void Assembler::vec_perm(VectorRegister dest, VectorRegister first, VectorRegister second, VectorRegister perm) {
#if defined(VM_LITTLE_ENDIAN)
vperm(dest, second, first, perm);
#else
vperm(dest, first, second, perm);
#endif
}
inline void Assembler::load_const(Register d, void* x, Register tmp) {
load_const(d, (long)x, tmp);
}

View File

@ -32,7 +32,7 @@
// Sets the default values for platform dependent flags used by the runtime system.
// (see globals.hpp)
define_pd_global(bool, ShareVtableStubs, false); // Improves performance markedly for mtrt and compress.
define_pd_global(bool, ShareVtableStubs, true);
define_pd_global(bool, NeedsDeoptSuspend, false); // Only register window machines need this.
@ -103,6 +103,9 @@ define_pd_global(intx, InitArrayShortSize, 9*BytesPerLong);
"CPU Version: x for PowerX. Currently recognizes Power5 to " \
"Power8. Default is 0. Newer CPUs will be recognized as Power8.") \
\
product(bool, SuperwordUseVSX, false, \
"Use Power8 VSX instructions for superword optimization.") \
\
/* Reoptimize code-sequences of calls at runtime, e.g. replace an */ \
/* indirect call by a direct call. */ \
product(bool, ReoptimizeCallSequences, true, \

View File

@ -129,7 +129,7 @@ void MacroAssembler::calculate_address_from_global_toc(Register dst, address add
}
}
int MacroAssembler::patch_calculate_address_from_global_toc_at(address a, address bound, address addr) {
address MacroAssembler::patch_calculate_address_from_global_toc_at(address a, address bound, address addr) {
const int offset = MacroAssembler::offset_to_global_toc(addr);
const address inst2_addr = a;
@ -155,7 +155,7 @@ int MacroAssembler::patch_calculate_address_from_global_toc_at(address a, addres
assert(is_addis(inst1) && inv_ra_field(inst1) == 29 /* R29 */, "source must be global TOC");
set_imm((int *)inst1_addr, MacroAssembler::largeoffset_si16_si16_hi(offset));
set_imm((int *)inst2_addr, MacroAssembler::largeoffset_si16_si16_lo(offset));
return (int)((intptr_t)addr - (intptr_t)inst1_addr);
return inst1_addr;
}
address MacroAssembler::get_address_of_calculate_address_from_global_toc_at(address a, address bound) {
@ -201,7 +201,7 @@ address MacroAssembler::get_address_of_calculate_address_from_global_toc_at(addr
// clrldi rx = rx & 0xFFFFffff // clearMS32b, optional
// ori rx = rx | const.lo
// Clrldi will be passed by.
int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
address MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
assert(UseCompressedOops, "Should only patch compressed oops");
const address inst2_addr = a;
@ -227,7 +227,7 @@ int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop dat
set_imm((int *)inst1_addr, (short)(xc)); // see enc_load_con_narrow_hi/_lo
set_imm((int *)inst2_addr, (xd)); // unsigned int
return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr);
return inst1_addr;
}
// Get compressed oop or klass constant.
@ -3382,6 +3382,7 @@ void MacroAssembler::load_mirror_from_const_method(Register mirror, Register con
ld(mirror, in_bytes(ConstMethod::constants_offset()), const_method);
ld(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
ld(mirror, in_bytes(Klass::java_mirror_offset()), mirror);
resolve_oop_handle(mirror);
}
// Clear Array
@ -5234,6 +5235,40 @@ void MacroAssembler::multiply_128_x_128_loop(Register x_xstart,
bind(L_post_third_loop_done);
} // multiply_128_x_128_loop
void MacroAssembler::muladd(Register out, Register in,
Register offset, Register len, Register k,
Register tmp1, Register tmp2, Register carry) {
// Labels
Label LOOP, SKIP;
// Make sure length is positive.
cmpdi (CCR0, len, 0);
// Prepare variables
subi (offset, offset, 4);
li (carry, 0);
ble (CCR0, SKIP);
mtctr (len);
subi (len, len, 1 );
sldi (len, len, 2 );
// Main loop
bind(LOOP);
lwzx (tmp1, len, in );
lwzx (tmp2, offset, out );
mulld (tmp1, tmp1, k );
add (tmp2, carry, tmp2 );
add (tmp2, tmp1, tmp2 );
stwx (tmp2, offset, out );
srdi (carry, tmp2, 32 );
subi (offset, offset, 4 );
subi (len, len, 4 );
bdnz (LOOP);
bind(SKIP);
}
void MacroAssembler::multiply_to_len(Register x, Register xlen,
Register y, Register ylen,
Register z, Register zlen,

View File

@ -105,13 +105,15 @@ class MacroAssembler: public Assembler {
};
inline static bool is_calculate_address_from_global_toc_at(address a, address bound);
static int patch_calculate_address_from_global_toc_at(address a, address addr, address bound);
// Returns address of first instruction in sequence.
static address patch_calculate_address_from_global_toc_at(address a, address bound, address addr);
static address get_address_of_calculate_address_from_global_toc_at(address a, address addr);
#ifdef _LP64
// Patch narrow oop constant.
inline static bool is_set_narrow_oop(address a, address bound);
static int patch_set_narrow_oop(address a, address bound, narrowOop data);
// Returns address of first instruction in sequence.
static address patch_set_narrow_oop(address a, address bound, narrowOop data);
static narrowOop get_narrow_oop(address a, address bound);
#endif
@ -813,6 +815,8 @@ class MacroAssembler: public Assembler {
Register yz_idx, Register idx, Register carry,
Register product_high, Register product,
Register carry2, Register tmp);
void muladd(Register out, Register in, Register offset, Register len, Register k,
Register tmp1, Register tmp2, Register carry);
void multiply_to_len(Register x, Register xlen,
Register y, Register ylen,
Register z, Register zlen,
@ -862,6 +866,40 @@ class MacroAssembler: public Assembler {
void kernel_crc32_singleByteReg(Register crc, Register val, Register table,
bool invertCRC);
// SHA-2 auxiliary functions and public interfaces
private:
void sha256_deque(const VectorRegister src,
const VectorRegister dst1, const VectorRegister dst2, const VectorRegister dst3);
void sha256_load_h_vec(const VectorRegister a, const VectorRegister e, const Register hptr);
void sha256_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
void sha256_load_w_plus_k_vec(const Register buf_in, const VectorRegister* ws,
const int total_ws, const Register k, const VectorRegister* kpws,
const int total_kpws);
void sha256_calc_4w(const VectorRegister w0, const VectorRegister w1,
const VectorRegister w2, const VectorRegister w3, const VectorRegister kpw0,
const VectorRegister kpw1, const VectorRegister kpw2, const VectorRegister kpw3,
const Register j, const Register k);
void sha256_update_sha_state(const VectorRegister a, const VectorRegister b,
const VectorRegister c, const VectorRegister d, const VectorRegister e,
const VectorRegister f, const VectorRegister g, const VectorRegister h,
const Register hptr);
void sha512_load_w_vec(const Register buf_in, const VectorRegister* ws, const int total_ws);
void sha512_update_sha_state(const Register state, const VectorRegister* hs, const int total_hs);
void sha512_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
void sha512_load_h_vec(const Register state, const VectorRegister* hs, const int total_hs);
void sha512_calc_2w(const VectorRegister w0, const VectorRegister w1,
const VectorRegister w2, const VectorRegister w3,
const VectorRegister w4, const VectorRegister w5,
const VectorRegister w6, const VectorRegister w7,
const VectorRegister kpw0, const VectorRegister kpw1, const Register j,
const VectorRegister vRb, const Register k);
public:
void sha256(bool multi_block);
void sha512(bool multi_block);
//
// Debugging
//

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -221,13 +221,13 @@ address NativeMovConstReg::set_data_plain(intptr_t data, CodeBlob *cb) {
// A calculation relative to the global TOC.
if (MacroAssembler::get_address_of_calculate_address_from_global_toc_at(addr, cb->content_begin()) !=
(address)data) {
const int invalidated_range =
MacroAssembler::patch_calculate_address_from_global_toc_at(addr, cb->content_begin(),
const address inst2_addr = addr;
const address inst1_addr =
MacroAssembler::patch_calculate_address_from_global_toc_at(inst2_addr, cb->content_begin(),
(address)data);
const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
// FIXME:
const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
ICache::ppc64_flush_icache_bytes(start, range);
assert(inst1_addr != NULL && inst1_addr < inst2_addr, "first instruction must be found");
const int range = inst2_addr - inst1_addr + BytesPerInstWord;
ICache::ppc64_flush_icache_bytes(inst1_addr, range);
}
next_address = addr + 1 * BytesPerInstWord;
} else if (MacroAssembler::is_load_const_at(addr)) {
@ -288,15 +288,15 @@ void NativeMovConstReg::set_data(intptr_t data) {
}
void NativeMovConstReg::set_narrow_oop(narrowOop data, CodeBlob *code /* = NULL */) {
address addr = addr_at(0);
address inst2_addr = addr_at(0);
CodeBlob* cb = (code) ? code : CodeCache::find_blob(instruction_address());
if (MacroAssembler::get_narrow_oop(addr, cb->content_begin()) == (long)data) return;
const int invalidated_range =
MacroAssembler::patch_set_narrow_oop(addr, cb->content_begin(), (long)data);
const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
// FIXME:
const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
ICache::ppc64_flush_icache_bytes(start, range);
if (MacroAssembler::get_narrow_oop(inst2_addr, cb->content_begin()) == (long)data)
return;
const address inst1_addr =
MacroAssembler::patch_set_narrow_oop(inst2_addr, cb->content_begin(), (long)data);
assert(inst1_addr != NULL && inst1_addr < inst2_addr, "first instruction must be found");
const int range = inst2_addr - inst1_addr + BytesPerInstWord;
ICache::ppc64_flush_icache_bytes(inst1_addr, range);
}
// Do not use an assertion here. Let clients decide whether they only

View File

@ -254,6 +254,73 @@ register %{
reg_def SR_SPEFSCR(SOC, SOC, Op_RegP, 4, SR_SPEFSCR->as_VMReg()); // v
reg_def SR_PPR( SOC, SOC, Op_RegP, 5, SR_PPR->as_VMReg()); // v
// ----------------------------
// Vector-Scalar Registers
// ----------------------------
reg_def VSR0 ( SOC, SOC, Op_VecX, 0, NULL);
reg_def VSR1 ( SOC, SOC, Op_VecX, 1, NULL);
reg_def VSR2 ( SOC, SOC, Op_VecX, 2, NULL);
reg_def VSR3 ( SOC, SOC, Op_VecX, 3, NULL);
reg_def VSR4 ( SOC, SOC, Op_VecX, 4, NULL);
reg_def VSR5 ( SOC, SOC, Op_VecX, 5, NULL);
reg_def VSR6 ( SOC, SOC, Op_VecX, 6, NULL);
reg_def VSR7 ( SOC, SOC, Op_VecX, 7, NULL);
reg_def VSR8 ( SOC, SOC, Op_VecX, 8, NULL);
reg_def VSR9 ( SOC, SOC, Op_VecX, 9, NULL);
reg_def VSR10 ( SOC, SOC, Op_VecX, 10, NULL);
reg_def VSR11 ( SOC, SOC, Op_VecX, 11, NULL);
reg_def VSR12 ( SOC, SOC, Op_VecX, 12, NULL);
reg_def VSR13 ( SOC, SOC, Op_VecX, 13, NULL);
reg_def VSR14 ( SOC, SOC, Op_VecX, 14, NULL);
reg_def VSR15 ( SOC, SOC, Op_VecX, 15, NULL);
reg_def VSR16 ( SOC, SOC, Op_VecX, 16, NULL);
reg_def VSR17 ( SOC, SOC, Op_VecX, 17, NULL);
reg_def VSR18 ( SOC, SOC, Op_VecX, 18, NULL);
reg_def VSR19 ( SOC, SOC, Op_VecX, 19, NULL);
reg_def VSR20 ( SOC, SOC, Op_VecX, 20, NULL);
reg_def VSR21 ( SOC, SOC, Op_VecX, 21, NULL);
reg_def VSR22 ( SOC, SOC, Op_VecX, 22, NULL);
reg_def VSR23 ( SOC, SOC, Op_VecX, 23, NULL);
reg_def VSR24 ( SOC, SOC, Op_VecX, 24, NULL);
reg_def VSR25 ( SOC, SOC, Op_VecX, 25, NULL);
reg_def VSR26 ( SOC, SOC, Op_VecX, 26, NULL);
reg_def VSR27 ( SOC, SOC, Op_VecX, 27, NULL);
reg_def VSR28 ( SOC, SOC, Op_VecX, 28, NULL);
reg_def VSR29 ( SOC, SOC, Op_VecX, 29, NULL);
reg_def VSR30 ( SOC, SOC, Op_VecX, 30, NULL);
reg_def VSR31 ( SOC, SOC, Op_VecX, 31, NULL);
reg_def VSR32 ( SOC, SOC, Op_VecX, 32, NULL);
reg_def VSR33 ( SOC, SOC, Op_VecX, 33, NULL);
reg_def VSR34 ( SOC, SOC, Op_VecX, 34, NULL);
reg_def VSR35 ( SOC, SOC, Op_VecX, 35, NULL);
reg_def VSR36 ( SOC, SOC, Op_VecX, 36, NULL);
reg_def VSR37 ( SOC, SOC, Op_VecX, 37, NULL);
reg_def VSR38 ( SOC, SOC, Op_VecX, 38, NULL);
reg_def VSR39 ( SOC, SOC, Op_VecX, 39, NULL);
reg_def VSR40 ( SOC, SOC, Op_VecX, 40, NULL);
reg_def VSR41 ( SOC, SOC, Op_VecX, 41, NULL);
reg_def VSR42 ( SOC, SOC, Op_VecX, 42, NULL);
reg_def VSR43 ( SOC, SOC, Op_VecX, 43, NULL);
reg_def VSR44 ( SOC, SOC, Op_VecX, 44, NULL);
reg_def VSR45 ( SOC, SOC, Op_VecX, 45, NULL);
reg_def VSR46 ( SOC, SOC, Op_VecX, 46, NULL);
reg_def VSR47 ( SOC, SOC, Op_VecX, 47, NULL);
reg_def VSR48 ( SOC, SOC, Op_VecX, 48, NULL);
reg_def VSR49 ( SOC, SOC, Op_VecX, 49, NULL);
reg_def VSR50 ( SOC, SOC, Op_VecX, 50, NULL);
reg_def VSR51 ( SOC, SOC, Op_VecX, 51, NULL);
reg_def VSR52 ( SOC, SOC, Op_VecX, 52, NULL);
reg_def VSR53 ( SOC, SOC, Op_VecX, 53, NULL);
reg_def VSR54 ( SOC, SOC, Op_VecX, 54, NULL);
reg_def VSR55 ( SOC, SOC, Op_VecX, 55, NULL);
reg_def VSR56 ( SOC, SOC, Op_VecX, 56, NULL);
reg_def VSR57 ( SOC, SOC, Op_VecX, 57, NULL);
reg_def VSR58 ( SOC, SOC, Op_VecX, 58, NULL);
reg_def VSR59 ( SOC, SOC, Op_VecX, 59, NULL);
reg_def VSR60 ( SOC, SOC, Op_VecX, 60, NULL);
reg_def VSR61 ( SOC, SOC, Op_VecX, 61, NULL);
reg_def VSR62 ( SOC, SOC, Op_VecX, 62, NULL);
reg_def VSR63 ( SOC, SOC, Op_VecX, 63, NULL);
// ----------------------------
// Specify priority of register selection within phases of register
@ -385,6 +452,73 @@ alloc_class chunk2 (
);
alloc_class chunk3 (
VSR0,
VSR1,
VSR2,
VSR3,
VSR4,
VSR5,
VSR6,
VSR7,
VSR8,
VSR9,
VSR10,
VSR11,
VSR12,
VSR13,
VSR14,
VSR15,
VSR16,
VSR17,
VSR18,
VSR19,
VSR20,
VSR21,
VSR22,
VSR23,
VSR24,
VSR25,
VSR26,
VSR27,
VSR28,
VSR29,
VSR30,
VSR31,
VSR32,
VSR33,
VSR34,
VSR35,
VSR36,
VSR37,
VSR38,
VSR39,
VSR40,
VSR41,
VSR42,
VSR43,
VSR44,
VSR45,
VSR46,
VSR47,
VSR48,
VSR49,
VSR50,
VSR51,
VSR52,
VSR53,
VSR54,
VSR55,
VSR56,
VSR57,
VSR58,
VSR59,
VSR60,
VSR61,
VSR62,
VSR63
);
alloc_class chunk4 (
// special registers
// These registers are not allocated, but used for nodes generated by postalloc expand.
SR_XER,
@ -769,6 +903,45 @@ reg_class dbl_reg(
F31, F31_H // nv!
);
// ----------------------------
// Vector-Scalar Register Class
// ----------------------------
reg_class vs_reg(
VSR32,
VSR33,
VSR34,
VSR35,
VSR36,
VSR37,
VSR38,
VSR39,
VSR40,
VSR41,
VSR42,
VSR43,
VSR44,
VSR45,
VSR46,
VSR47,
VSR48,
VSR49,
VSR50,
VSR51
// VSR52, // nv!
// VSR53, // nv!
// VSR54, // nv!
// VSR55, // nv!
// VSR56, // nv!
// VSR57, // nv!
// VSR58, // nv!
// VSR59, // nv!
// VSR60, // nv!
// VSR61, // nv!
// VSR62, // nv!
// VSR63 // nv!
);
%}
//----------DEFINITION BLOCK---------------------------------------------------
@ -1502,7 +1675,7 @@ static enum RC rc_class(OptoReg::Name reg) {
if (reg < 64+64) return rc_float;
// Between float regs & stack are the flags regs.
assert(OptoReg::is_stack(reg), "blow up if spilling flags");
assert(OptoReg::is_stack(reg) || reg < 64+64+64, "blow up if spilling flags");
return rc_stack;
}
@ -2048,14 +2221,24 @@ const bool Matcher::convL2FSupported(void) {
// Vector width in bytes.
const int Matcher::vector_width_in_bytes(BasicType bt) {
assert(MaxVectorSize == 8, "");
return 8;
if (SuperwordUseVSX) {
assert(MaxVectorSize == 16, "");
return 16;
} else {
assert(MaxVectorSize == 8, "");
return 8;
}
}
// Vector ideal reg.
const uint Matcher::vector_ideal_reg(int size) {
assert(MaxVectorSize == 8 && size == 8, "");
return Op_RegL;
if (SuperwordUseVSX) {
assert(MaxVectorSize == 16 && size == 16, "");
return Op_VecX;
} else {
assert(MaxVectorSize == 8 && size == 8, "");
return Op_RegL;
}
}
const uint Matcher::vector_shift_count_ideal_reg(int size) {
@ -2075,7 +2258,7 @@ const int Matcher::min_vector_size(const BasicType bt) {
// PPC doesn't support misaligned vectors store/load.
const bool Matcher::misaligned_vectors_ok() {
return false;
return !AlignVector; // can be changed by flag
}
// PPC AES support not yet implemented
@ -2217,10 +2400,31 @@ const MachRegisterNumbers farg_reg[13] = {
F13_num
};
const MachRegisterNumbers vsarg_reg[64] = {
VSR0_num, VSR1_num, VSR2_num, VSR3_num,
VSR4_num, VSR5_num, VSR6_num, VSR7_num,
VSR8_num, VSR9_num, VSR10_num, VSR11_num,
VSR12_num, VSR13_num, VSR14_num, VSR15_num,
VSR16_num, VSR17_num, VSR18_num, VSR19_num,
VSR20_num, VSR21_num, VSR22_num, VSR23_num,
VSR24_num, VSR23_num, VSR24_num, VSR25_num,
VSR28_num, VSR29_num, VSR30_num, VSR31_num,
VSR32_num, VSR33_num, VSR34_num, VSR35_num,
VSR36_num, VSR37_num, VSR38_num, VSR39_num,
VSR40_num, VSR41_num, VSR42_num, VSR43_num,
VSR44_num, VSR45_num, VSR46_num, VSR47_num,
VSR48_num, VSR49_num, VSR50_num, VSR51_num,
VSR52_num, VSR53_num, VSR54_num, VSR55_num,
VSR56_num, VSR57_num, VSR58_num, VSR59_num,
VSR60_num, VSR61_num, VSR62_num, VSR63_num
};
const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]);
const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]);
const int num_vsarg_registers = sizeof(vsarg_reg) / sizeof(vsarg_reg[0]);
// Return whether or not this register is ever used as an argument. This
// function is used on startup to build the trampoline stubs in generateOptoStub.
// Registers not mentioned will be killed by the VM call in the trampoline, and
@ -2552,6 +2756,115 @@ loadConLNodesTuple loadConLNodesTuple_create(PhaseRegAlloc *ra_, Node *toc, immL
return nodes;
}
typedef struct {
loadConL_hiNode *_large_hi;
loadConL_loNode *_large_lo;
mtvsrdNode *_moved;
xxspltdNode *_replicated;
loadConLNode *_small;
MachNode *_last;
} loadConLReplicatedNodesTuple;
loadConLReplicatedNodesTuple loadConLReplicatedNodesTuple_create(Compile *C, PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc,
vecXOper *dst, immI_0Oper *zero,
OptoReg::Name reg_second, OptoReg::Name reg_first,
OptoReg::Name reg_vec_second, OptoReg::Name reg_vec_first) {
loadConLReplicatedNodesTuple nodes;
const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
if (large_constant_pool) {
// Create new nodes.
loadConL_hiNode *m1 = new loadConL_hiNode();
loadConL_loNode *m2 = new loadConL_loNode();
mtvsrdNode *m3 = new mtvsrdNode();
xxspltdNode *m4 = new xxspltdNode();
// inputs for new nodes
m1->add_req(NULL, toc);
m2->add_req(NULL, m1);
m3->add_req(NULL, m2);
m4->add_req(NULL, m3);
// operands for new nodes
m1->_opnds[0] = new iRegLdstOper(); // dst
m1->_opnds[1] = immSrc; // src
m1->_opnds[2] = new iRegPdstOper(); // toc
m2->_opnds[0] = new iRegLdstOper(); // dst
m2->_opnds[1] = immSrc; // src
m2->_opnds[2] = new iRegLdstOper(); // base
m3->_opnds[0] = new vecXOper(); // dst
m3->_opnds[1] = new iRegLdstOper(); // src
m4->_opnds[0] = new vecXOper(); // dst
m4->_opnds[1] = new vecXOper(); // src
m4->_opnds[2] = zero;
// Initialize ins_attrib TOC fields.
m1->_const_toc_offset = -1;
m2->_const_toc_offset_hi_node = m1;
// Initialize ins_attrib instruction offset.
m1->_cbuf_insts_offset = -1;
// register allocation for new nodes
ra_->set_pair(m1->_idx, reg_second, reg_first);
ra_->set_pair(m2->_idx, reg_second, reg_first);
ra_->set1(m3->_idx, reg_second);
ra_->set2(m3->_idx, reg_vec_first);
ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
// Create result.
nodes._large_hi = m1;
nodes._large_lo = m2;
nodes._moved = m3;
nodes._replicated = m4;
nodes._small = NULL;
nodes._last = nodes._replicated;
assert(m2->bottom_type()->isa_long(), "must be long");
} else {
loadConLNode *m2 = new loadConLNode();
mtvsrdNode *m3 = new mtvsrdNode();
xxspltdNode *m4 = new xxspltdNode();
// inputs for new nodes
m2->add_req(NULL, toc);
// operands for new nodes
m2->_opnds[0] = new iRegLdstOper(); // dst
m2->_opnds[1] = immSrc; // src
m2->_opnds[2] = new iRegPdstOper(); // toc
m3->_opnds[0] = new vecXOper(); // dst
m3->_opnds[1] = new iRegLdstOper(); // src
m4->_opnds[0] = new vecXOper(); // dst
m4->_opnds[1] = new vecXOper(); // src
m4->_opnds[2] = zero;
// Initialize ins_attrib instruction offset.
m2->_cbuf_insts_offset = -1;
ra_->set1(m3->_idx, reg_second);
ra_->set2(m3->_idx, reg_vec_first);
ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
// register allocation for new nodes
ra_->set_pair(m2->_idx, reg_second, reg_first);
// Create result.
nodes._large_hi = NULL;
nodes._large_lo = NULL;
nodes._small = m2;
nodes._moved = m3;
nodes._replicated = m4;
nodes._last = nodes._replicated;
assert(m2->bottom_type()->isa_long(), "must be long");
}
return nodes;
}
%} // source
encode %{
@ -3212,6 +3525,27 @@ encode %{
assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
%}
enc_class postalloc_expand_load_replF_constant_vsx(vecX dst, immF src, iRegLdst toc) %{
// Create new nodes.
// Make an operand with the bit pattern to load as float.
immLOper *op_repl = new immLOper((jlong)replicate_immF(op_src->constantF()));
immI_0Oper *op_zero = new immI_0Oper(0);
loadConLReplicatedNodesTuple loadConLNodes =
loadConLReplicatedNodesTuple_create(C, ra_, n_toc, op_repl, op_dst, op_zero,
OptoReg::Name(R20_H_num), OptoReg::Name(R20_num),
OptoReg::Name(VSR11_num), OptoReg::Name(VSR10_num));
// Push new nodes.
if (loadConLNodes._large_hi) { nodes->push(loadConLNodes._large_hi); }
if (loadConLNodes._large_lo) { nodes->push(loadConLNodes._large_lo); }
if (loadConLNodes._moved) { nodes->push(loadConLNodes._moved); }
if (loadConLNodes._last) { nodes->push(loadConLNodes._last); }
assert(nodes->length() >= 1, "must have created at least 1 node");
%}
// This enc_class is needed so that scheduler gets proper
// input mapping for latency computation.
enc_class enc_poll(immI dst, iRegLdst poll) %{
@ -3840,6 +4174,14 @@ ins_attrib ins_field_load_ic_node(0);
//
// Formats are generated automatically for constants and base registers.
operand vecX() %{
constraint(ALLOC_IN_RC(vs_reg));
match(VecX);
format %{ %}
interface(REG_INTER);
%}
//----------Simple Operands----------------------------------------------------
// Immediate Operands
@ -5372,6 +5714,20 @@ instruct loadV8(iRegLdst dst, memoryAlg4 mem) %{
ins_pipe(pipe_class_memory);
%}
// Load Aligned Packed Byte
instruct loadV16(vecX dst, indirect mem) %{
predicate(n->as_LoadVector()->memory_size() == 16);
match(Set dst (LoadVector mem));
ins_cost(MEMORY_REF_COST);
format %{ "LXVD2X $dst, $mem \t// load 16-byte Vector" %}
size(4);
ins_encode %{
__ lxvd2x($dst$$VectorSRegister, $mem$$Register);
%}
ins_pipe(pipe_class_default);
%}
// Load Range, range = array length (=jint)
instruct loadRange(iRegIdst dst, memory mem) %{
match(Set dst (LoadRange mem));
@ -6368,6 +6724,20 @@ instruct storeA8B(memoryAlg4 mem, iRegLsrc src) %{
ins_pipe(pipe_class_memory);
%}
// Store Packed Byte long register to memory
instruct storeV16(indirect mem, vecX src) %{
predicate(n->as_StoreVector()->memory_size() == 16);
match(Set mem (StoreVector mem src));
ins_cost(MEMORY_REF_COST);
format %{ "STXVD2X $mem, $src \t// store 16-byte Vector" %}
size(4);
ins_encode %{
__ stxvd2x($src$$VectorSRegister, $mem$$Register);
%}
ins_pipe(pipe_class_default);
%}
// Store Compressed Oop
instruct storeN(memory dst, iRegN_P2N src) %{
match(Set dst (StoreN dst src));
@ -13239,6 +13609,26 @@ instruct storeS_reversed(iRegIsrc src, indirect mem) %{
ins_pipe(pipe_class_default);
%}
instruct mtvsrwz(vecX temp1, iRegIsrc src) %{
effect(DEF temp1, USE src);
size(4);
ins_encode %{
__ mtvsrwz($temp1$$VectorSRegister, $src$$Register);
%}
ins_pipe(pipe_class_default);
%}
instruct xxspltw(vecX dst, vecX src, immI8 imm1) %{
effect(DEF dst, USE src, USE imm1);
size(4);
ins_encode %{
__ xxspltw($dst$$VectorSRegister, $src$$VectorSRegister, $imm1$$constant);
%}
ins_pipe(pipe_class_default);
%}
//---------- Replicate Vector Instructions ------------------------------------
// Insrdi does replicate if src == dst.
@ -13318,6 +13708,46 @@ instruct repl8B_immIminus1(iRegLdst dst, immI_minus1 src) %{
ins_pipe(pipe_class_default);
%}
instruct repl16B_reg_Ex(vecX dst, iRegIsrc src) %{
match(Set dst (ReplicateB src));
predicate(n->as_Vector()->length() == 16);
expand %{
iRegLdst tmpL;
vecX tmpV;
immI8 imm1 %{ (int) 1 %}
moveReg(tmpL, src);
repl56(tmpL);
repl48(tmpL);
mtvsrwz(tmpV, tmpL);
xxspltw(dst, tmpV, imm1);
%}
%}
instruct repl16B_immI0(vecX dst, immI_0 zero) %{
match(Set dst (ReplicateB zero));
predicate(n->as_Vector()->length() == 16);
format %{ "XXLXOR $dst, $zero \t// replicate16B" %}
size(4);
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl16B_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (ReplicateB src));
predicate(n->as_Vector()->length() == 16);
format %{ "XXLEQV $dst, $src \t// replicate16B" %}
size(4);
ins_encode %{
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl4S_reg_Ex(iRegLdst dst, iRegIsrc src) %{
match(Set dst (ReplicateS src));
predicate(n->as_Vector()->length() == 4);
@ -13352,6 +13782,46 @@ instruct repl4S_immIminus1(iRegLdst dst, immI_minus1 src) %{
ins_pipe(pipe_class_default);
%}
instruct repl8S_reg_Ex(vecX dst, iRegIsrc src) %{
match(Set dst (ReplicateS src));
predicate(n->as_Vector()->length() == 8);
expand %{
iRegLdst tmpL;
vecX tmpV;
immI8 zero %{ (int) 0 %}
moveReg(tmpL, src);
repl48(tmpL);
repl32(tmpL);
mtvsrd(tmpV, tmpL);
xxpermdi(dst, tmpV, tmpV, zero);
%}
%}
instruct repl8S_immI0(vecX dst, immI_0 zero) %{
match(Set dst (ReplicateS zero));
predicate(n->as_Vector()->length() == 8);
format %{ "XXLXOR $dst, $zero \t// replicate8S" %}
size(4);
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl8S_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (ReplicateS src));
predicate(n->as_Vector()->length() == 8);
format %{ "XXLEQV $dst, $src \t// replicate16B" %}
size(4);
ins_encode %{
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl2I_reg_Ex(iRegLdst dst, iRegIsrc src) %{
match(Set dst (ReplicateI src));
predicate(n->as_Vector()->length() == 2);
@ -13386,6 +13856,46 @@ instruct repl2I_immIminus1(iRegLdst dst, immI_minus1 src) %{
ins_pipe(pipe_class_default);
%}
instruct repl4I_reg_Ex(vecX dst, iRegIsrc src) %{
match(Set dst (ReplicateI src));
predicate(n->as_Vector()->length() == 4);
ins_cost(2 * DEFAULT_COST);
expand %{
iRegLdst tmpL;
vecX tmpV;
immI8 zero %{ (int) 0 %}
moveReg(tmpL, src);
repl32(tmpL);
mtvsrd(tmpV, tmpL);
xxpermdi(dst, tmpV, tmpV, zero);
%}
%}
instruct repl4I_immI0(vecX dst, immI_0 zero) %{
match(Set dst (ReplicateI zero));
predicate(n->as_Vector()->length() == 4);
format %{ "XXLXOR $dst, $zero \t// replicate4I" %}
size(4);
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl4I_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (ReplicateI src));
predicate(n->as_Vector()->length() == 4);
format %{ "XXLEQV $dst, $dst, $dst \t// replicate4I" %}
size(4);
ins_encode %{
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
// Move float to int register via stack, replicate.
instruct repl2F_reg_Ex(iRegLdst dst, regF src) %{
match(Set dst (ReplicateF src));
@ -13484,6 +13994,154 @@ instruct overflowMulL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
%}
instruct repl4F_reg_Ex(vecX dst, regF src) %{
match(Set dst (ReplicateF src));
predicate(n->as_Vector()->length() == 4);
ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST);
expand %{
stackSlotL tmpS;
iRegIdst tmpI;
iRegLdst tmpL;
vecX tmpV;
immI8 zero %{ (int) 0 %}
moveF2I_reg_stack(tmpS, src); // Move float to stack.
moveF2I_stack_reg(tmpI, tmpS); // Move stack to int reg.
moveReg(tmpL, tmpI); // Move int to long reg.
repl32(tmpL); // Replicate bitpattern.
mtvsrd(tmpV, tmpL);
xxpermdi(dst, tmpV, tmpV, zero);
%}
%}
instruct repl4F_immF_Ex(vecX dst, immF src) %{
match(Set dst (ReplicateF src));
predicate(n->as_Vector()->length() == 4);
ins_cost(10 * DEFAULT_COST);
postalloc_expand( postalloc_expand_load_replF_constant_vsx(dst, src, constanttablebase) );
%}
instruct repl4F_immF0(vecX dst, immF_0 zero) %{
match(Set dst (ReplicateF zero));
predicate(n->as_Vector()->length() == 4);
format %{ "XXLXOR $dst, $zero \t// replicate4F" %}
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl2D_reg_Ex(vecX dst, regD src) %{
match(Set dst (ReplicateD src));
predicate(n->as_Vector()->length() == 2);
expand %{
stackSlotL tmpS;
iRegLdst tmpL;
iRegLdst tmp;
vecX tmpV;
immI8 zero %{ (int) 0 %}
moveD2L_reg_stack(tmpS, src);
moveD2L_stack_reg(tmpL, tmpS);
mtvsrd(tmpV, tmpL);
xxpermdi(dst, tmpV, tmpV, zero);
%}
%}
instruct repl2D_immI0(vecX dst, immI_0 zero) %{
match(Set dst (ReplicateD zero));
predicate(n->as_Vector()->length() == 2);
format %{ "XXLXOR $dst, $zero \t// replicate2D" %}
size(4);
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl2D_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (ReplicateD src));
predicate(n->as_Vector()->length() == 2);
format %{ "XXLEQV $dst, $src \t// replicate16B" %}
size(4);
ins_encode %{
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct mtvsrd(vecX dst, iRegLsrc src) %{
predicate(false);
effect(DEF dst, USE src);
format %{ "MTVSRD $dst, $src \t// Move to 16-byte register"%}
size(4);
ins_encode %{
__ mtvsrd($dst$$VectorSRegister, $src$$Register);
%}
ins_pipe(pipe_class_default);
%}
instruct xxspltd(vecX dst, vecX src, immI8 zero) %{
effect(DEF dst, USE src, USE zero);
format %{ "XXSPLATD $dst, $src, $zero \t// Permute 16-byte register"%}
size(4);
ins_encode %{
__ xxpermdi($dst$$VectorSRegister, $src$$VectorSRegister, $src$$VectorSRegister, $zero$$constant);
%}
ins_pipe(pipe_class_default);
%}
instruct xxpermdi(vecX dst, vecX src1, vecX src2, immI8 zero) %{
effect(DEF dst, USE src1, USE src2, USE zero);
format %{ "XXPERMDI $dst, $src1, $src2, $zero \t// Permute 16-byte register"%}
size(4);
ins_encode %{
__ xxpermdi($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister, $zero$$constant);
%}
ins_pipe(pipe_class_default);
%}
instruct repl2L_reg_Ex(vecX dst, iRegLsrc src) %{
match(Set dst (ReplicateL src));
predicate(n->as_Vector()->length() == 2);
expand %{
vecX tmpV;
immI8 zero %{ (int) 0 %}
mtvsrd(tmpV, src);
xxpermdi(dst, tmpV, tmpV, zero);
%}
%}
instruct repl2L_immI0(vecX dst, immI_0 zero) %{
match(Set dst (ReplicateL zero));
predicate(n->as_Vector()->length() == 2);
format %{ "XXLXOR $dst, $zero \t// replicate2L" %}
size(4);
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl2L_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (ReplicateL src));
predicate(n->as_Vector()->length() == 2);
format %{ "XXLEQV $dst, $src \t// replicate16B" %}
size(4);
ins_encode %{
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
// ============================================================================
// Safepoint Instruction

View File

@ -31,3 +31,5 @@
REGISTER_DEFINITION(Register, noreg);
REGISTER_DEFINITION(FloatRegister, fnoreg);
REGISTER_DEFINITION(VectorSRegister, vsnoreg);

View File

@ -677,7 +677,7 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
* 2 // register halves
+ ConditionRegisterImpl::number_of_registers // condition code registers
+ SpecialRegisterImpl::number_of_registers // special registers
+ VectorRegisterImpl::number_of_registers // VSX registers
+ VectorSRegisterImpl::number_of_registers // VSX registers
};
static const int max_gpr;

View File

@ -479,8 +479,8 @@ void RegisterSaver::restore_result_registers(MacroAssembler* masm, int frame_siz
// Is vector's size (in bytes) bigger than a size saved by default?
bool SharedRuntime::is_wide_vector(int size) {
// Note, MaxVectorSize == 8 on PPC64.
assert(size <= 8, "%d bytes vectors are not supported", size);
// Note, MaxVectorSize == 8/16 on PPC64.
assert(size <= (SuperwordUseVSX ? 16 : 8), "%d bytes vectors are not supported", size);
return size > 8;
}
@ -2234,9 +2234,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
__ release();
// TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
__ stw(R0, thread_(thread_state));
if (UseMembar) {
__ fence();
}
// The JNI call
@ -2393,9 +2390,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
__ release();
// TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
__ stw(R0, thread_(thread_state));
if (UseMembar) {
__ fence();
}
__ bind(after_transition);
// Reguard any pages if necessary.

View File

@ -2667,7 +2667,7 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
// Arguments for generated stub (little endian only):
// Arguments for generated stub:
// R3_ARG1 - source byte array address
// R4_ARG2 - destination byte array address
// R5_ARG3 - round key array
@ -2686,7 +2686,6 @@ class StubGenerator: public StubCodeGenerator {
Register keylen = R8;
Register temp = R9;
Register keypos = R10;
Register hex = R11;
Register fifteen = R12;
VectorRegister vRet = VR0;
@ -2706,164 +2705,170 @@ class StubGenerator: public StubCodeGenerator {
VectorRegister vTmp3 = VR11;
VectorRegister vTmp4 = VR12;
VectorRegister vLow = VR13;
VectorRegister vHigh = VR14;
__ li (hex, 16);
__ li (fifteen, 15);
__ vspltisb (fSplt, 0x0f);
// load unaligned from[0-15] to vsRet
__ lvx (vRet, from);
__ lvx (vTmp1, fifteen, from);
__ lvsl (fromPerm, from);
#ifdef VM_LITTLE_ENDIAN
__ vspltisb (fSplt, 0x0f);
__ vxor (fromPerm, fromPerm, fSplt);
#endif
__ vperm (vRet, vRet, vTmp1, fromPerm);
// load keylen (44 or 52 or 60)
__ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
// to load keys
__ lvsr (keyPerm, key);
__ vxor (vTmp2, vTmp2, vTmp2);
__ load_perm (keyPerm, key);
#ifdef VM_LITTLE_ENDIAN
__ vspltisb (vTmp2, -16);
__ vrld (keyPerm, keyPerm, vTmp2);
__ vrld (keyPerm, keyPerm, vTmp2);
__ vsldoi (keyPerm, keyPerm, keyPerm, 8);
#endif
// load the 1st round key to vKey1
__ li (keypos, 0);
// load the 1st round key to vTmp1
__ lvx (vTmp1, key);
__ li (keypos, 16);
__ lvx (vKey1, keypos, key);
__ addi (keypos, keypos, 16);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey1, vTmp1, vKey1, keyPerm);
__ vec_perm (vTmp1, vKey1, keyPerm);
// 1st round
__ vxor (vRet, vRet, vKey1);
__ vxor (vRet, vRet, vTmp1);
// load the 2nd round key to vKey1
__ addi (keypos, keypos, 16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey1, vTmp2, vTmp1, keyPerm);
__ li (keypos, 32);
__ lvx (vKey2, keypos, key);
__ vec_perm (vKey1, vKey2, keyPerm);
// load the 3rd round key to vKey2
__ addi (keypos, keypos, 16);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey2, vTmp1, vTmp2, keyPerm);
__ li (keypos, 48);
__ lvx (vKey3, keypos, key);
__ vec_perm (vKey2, vKey3, keyPerm);
// load the 4th round key to vKey3
__ addi (keypos, keypos, 16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey3, vTmp2, vTmp1, keyPerm);
__ li (keypos, 64);
__ lvx (vKey4, keypos, key);
__ vec_perm (vKey3, vKey4, keyPerm);
// load the 5th round key to vKey4
__ addi (keypos, keypos, 16);
__ li (keypos, 80);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey4, vTmp1, vTmp2, keyPerm);
__ vec_perm (vKey4, vTmp1, keyPerm);
// 2nd - 5th rounds
__ vcipher (vRet, vRet, vKey1);
__ vcipher (vRet, vRet, vKey2);
__ vcipher (vRet, vRet, vKey3);
__ vcipher (vRet, vRet, vKey4);
__ vcipher (vRet, vRet, vKey1);
__ vcipher (vRet, vRet, vKey2);
__ vcipher (vRet, vRet, vKey3);
__ vcipher (vRet, vRet, vKey4);
// load the 6th round key to vKey1
__ addi (keypos, keypos, 16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey1, vTmp2, vTmp1, keyPerm);
__ li (keypos, 96);
__ lvx (vKey2, keypos, key);
__ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
// load the 7th round key to vKey2
__ addi (keypos, keypos, 16);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey2, vTmp1, vTmp2, keyPerm);
__ li (keypos, 112);
__ lvx (vKey3, keypos, key);
__ vec_perm (vKey2, vKey3, keyPerm);
// load the 8th round key to vKey3
__ addi (keypos, keypos, 16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey3, vTmp2, vTmp1, keyPerm);
__ li (keypos, 128);
__ lvx (vKey4, keypos, key);
__ vec_perm (vKey3, vKey4, keyPerm);
// load the 9th round key to vKey4
__ addi (keypos, keypos, 16);
__ li (keypos, 144);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey4, vTmp1, vTmp2, keyPerm);
__ vec_perm (vKey4, vTmp1, keyPerm);
// 6th - 9th rounds
__ vcipher (vRet, vRet, vKey1);
__ vcipher (vRet, vRet, vKey2);
__ vcipher (vRet, vRet, vKey3);
__ vcipher (vRet, vRet, vKey4);
__ vcipher (vRet, vRet, vKey1);
__ vcipher (vRet, vRet, vKey2);
__ vcipher (vRet, vRet, vKey3);
__ vcipher (vRet, vRet, vKey4);
// load the 10th round key to vKey1
__ addi (keypos, keypos, 16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey1, vTmp2, vTmp1, keyPerm);
__ li (keypos, 160);
__ lvx (vKey2, keypos, key);
__ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
// load the 11th round key to vKey2
__ addi (keypos, keypos, 16);
__ li (keypos, 176);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey2, vTmp1, vTmp2, keyPerm);
__ vec_perm (vKey2, vTmp1, keyPerm);
// if all round keys are loaded, skip next 4 rounds
__ cmpwi (CCR0, keylen, 44);
__ beq (CCR0, L_doLast);
// 10th - 11th rounds
__ vcipher (vRet, vRet, vKey1);
__ vcipher (vRet, vRet, vKey2);
__ vcipher (vRet, vRet, vKey1);
__ vcipher (vRet, vRet, vKey2);
// load the 12th round key to vKey1
__ addi (keypos, keypos, 16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey1, vTmp2, vTmp1, keyPerm);
__ li (keypos, 192);
__ lvx (vKey2, keypos, key);
__ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
// load the 13th round key to vKey2
__ addi (keypos, keypos, 16);
__ li (keypos, 208);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey2, vTmp1, vTmp2, keyPerm);
__ vec_perm (vKey2, vTmp1, keyPerm);
// if all round keys are loaded, skip next 2 rounds
__ cmpwi (CCR0, keylen, 52);
__ beq (CCR0, L_doLast);
// 12th - 13th rounds
__ vcipher (vRet, vRet, vKey1);
__ vcipher (vRet, vRet, vKey2);
__ vcipher (vRet, vRet, vKey1);
__ vcipher (vRet, vRet, vKey2);
// load the 14th round key to vKey1
__ addi (keypos, keypos, 16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey1, vTmp2, vTmp1, keyPerm);
__ li (keypos, 224);
__ lvx (vKey2, keypos, key);
__ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
// load the 15th round key to vKey2
__ addi (keypos, keypos, 16);
__ li (keypos, 240);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey2, vTmp1, vTmp2, keyPerm);
__ vec_perm (vKey2, vTmp1, keyPerm);
__ bind(L_doLast);
// last two rounds
__ vcipher (vRet, vRet, vKey1);
__ vcipherlast (vRet, vRet, vKey2);
__ vcipher (vRet, vRet, vKey1);
__ vcipherlast (vRet, vRet, vKey2);
__ neg (temp, to);
__ lvsr (toPerm, temp);
__ vspltisb (vTmp2, -1);
__ vxor (vTmp1, vTmp1, vTmp1);
__ vperm (vTmp2, vTmp2, vTmp1, toPerm);
__ vxor (toPerm, toPerm, fSplt);
// store result (unaligned)
#ifdef VM_LITTLE_ENDIAN
__ lvsl (toPerm, to);
#else
__ lvsr (toPerm, to);
#endif
__ vspltisb (vTmp3, -1);
__ vspltisb (vTmp4, 0);
__ lvx (vTmp1, to);
__ vperm (vRet, vRet, vRet, toPerm);
__ vsel (vTmp1, vTmp1, vRet, vTmp2);
__ lvx (vTmp4, fifteen, to);
__ lvx (vTmp2, fifteen, to);
#ifdef VM_LITTLE_ENDIAN
__ vperm (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask
__ vxor (toPerm, toPerm, fSplt); // swap bytes
#else
__ vperm (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask
#endif
__ vperm (vTmp4, vRet, vRet, toPerm); // rotate data
__ vsel (vTmp2, vTmp4, vTmp2, vTmp3);
__ vsel (vTmp1, vTmp1, vTmp4, vTmp3);
__ stvx (vTmp2, fifteen, to); // store this one first (may alias)
__ stvx (vTmp1, to);
__ vsel (vRet, vRet, vTmp4, vTmp2);
__ stvx (vRet, fifteen, to);
__ blr();
return start;
}
// Arguments for generated stub (little endian only):
// Arguments for generated stub:
// R3_ARG1 - source byte array address
// R4_ARG2 - destination byte array address
// R5_ARG3 - K (key) in little endian int array
@ -2885,7 +2890,6 @@ class StubGenerator: public StubCodeGenerator {
Register keylen = R8;
Register temp = R9;
Register keypos = R10;
Register hex = R11;
Register fifteen = R12;
VectorRegister vRet = VR0;
@ -2906,30 +2910,30 @@ class StubGenerator: public StubCodeGenerator {
VectorRegister vTmp3 = VR12;
VectorRegister vTmp4 = VR13;
VectorRegister vLow = VR14;
VectorRegister vHigh = VR15;
__ li (hex, 16);
__ li (fifteen, 15);
__ vspltisb (fSplt, 0x0f);
// load unaligned from[0-15] to vsRet
__ lvx (vRet, from);
__ lvx (vTmp1, fifteen, from);
__ lvsl (fromPerm, from);
#ifdef VM_LITTLE_ENDIAN
__ vspltisb (fSplt, 0x0f);
__ vxor (fromPerm, fromPerm, fSplt);
#endif
__ vperm (vRet, vRet, vTmp1, fromPerm); // align [and byte swap in LE]
// load keylen (44 or 52 or 60)
__ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
// to load keys
__ lvsr (keyPerm, key);
__ load_perm (keyPerm, key);
#ifdef VM_LITTLE_ENDIAN
__ vxor (vTmp2, vTmp2, vTmp2);
__ vspltisb (vTmp2, -16);
__ vrld (keyPerm, keyPerm, vTmp2);
__ vrld (keyPerm, keyPerm, vTmp2);
__ vsldoi (keyPerm, keyPerm, keyPerm, 8);
#endif
__ cmpwi (CCR0, keylen, 44);
__ beq (CCR0, L_do44);
@ -2937,32 +2941,32 @@ class StubGenerator: public StubCodeGenerator {
__ cmpwi (CCR0, keylen, 52);
__ beq (CCR0, L_do52);
// load the 15th round key to vKey11
// load the 15th round key to vKey1
__ li (keypos, 240);
__ lvx (vKey1, keypos, key);
__ li (keypos, 224);
__ lvx (vKey2, keypos, key);
__ vec_perm (vKey1, vKey2, vKey1, keyPerm);
// load the 14th round key to vKey2
__ li (keypos, 208);
__ lvx (vKey3, keypos, key);
__ vec_perm (vKey2, vKey3, vKey2, keyPerm);
// load the 13th round key to vKey3
__ li (keypos, 192);
__ lvx (vKey4, keypos, key);
__ vec_perm (vKey3, vKey4, vKey3, keyPerm);
// load the 12th round key to vKey4
__ li (keypos, 176);
__ lvx (vKey5, keypos, key);
__ vec_perm (vKey4, vKey5, vKey4, keyPerm);
// load the 11th round key to vKey5
__ li (keypos, 160);
__ lvx (vTmp1, keypos, key);
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey1, vTmp1, vTmp2, keyPerm);
// load the 14th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey2, vTmp2, vTmp1, keyPerm);
// load the 13th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey3, vTmp1, vTmp2, keyPerm);
// load the 12th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey4, vTmp2, vTmp1, keyPerm);
// load the 11th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey5, vTmp1, vTmp2, keyPerm);
__ vec_perm (vKey5, vTmp1, vKey5, keyPerm);
// 1st - 5th rounds
__ vxor (vRet, vRet, vKey1);
@ -2975,22 +2979,22 @@ class StubGenerator: public StubCodeGenerator {
__ bind (L_do52);
// load the 13th round key to vKey11
// load the 13th round key to vKey1
__ li (keypos, 208);
__ lvx (vTmp1, keypos, key);
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey1, vTmp1, vTmp2, keyPerm);
__ lvx (vKey1, keypos, key);
__ li (keypos, 192);
__ lvx (vKey2, keypos, key);
__ vec_perm (vKey1, vKey2, vKey1, keyPerm);
// load the 12th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey2, vTmp2, vTmp1, keyPerm);
// load the 12th round key to vKey2
__ li (keypos, 176);
__ lvx (vKey3, keypos, key);
__ vec_perm (vKey2, vKey3, vKey2, keyPerm);
// load the 11th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey3, vTmp1, vTmp2, keyPerm);
// load the 11th round key to vKey3
__ li (keypos, 160);
__ lvx (vTmp1, keypos, key);
__ vec_perm (vKey3, vTmp1, vKey3, keyPerm);
// 1st - 3rd rounds
__ vxor (vRet, vRet, vKey1);
@ -3001,42 +3005,42 @@ class StubGenerator: public StubCodeGenerator {
__ bind (L_do44);
// load the 11th round key to vKey11
// load the 11th round key to vKey1
__ li (keypos, 176);
__ lvx (vKey1, keypos, key);
__ li (keypos, 160);
__ lvx (vTmp1, keypos, key);
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey1, vTmp1, vTmp2, keyPerm);
__ vec_perm (vKey1, vTmp1, vKey1, keyPerm);
// 1st round
__ vxor (vRet, vRet, vKey1);
__ bind (L_doLast);
// load the 10th round key to vKey10
__ addi (keypos, keypos, -16);
// load the 10th round key to vKey1
__ li (keypos, 144);
__ lvx (vKey2, keypos, key);
__ vec_perm (vKey1, vKey2, vTmp1, keyPerm);
// load the 9th round key to vKey2
__ li (keypos, 128);
__ lvx (vKey3, keypos, key);
__ vec_perm (vKey2, vKey3, vKey2, keyPerm);
// load the 8th round key to vKey3
__ li (keypos, 112);
__ lvx (vKey4, keypos, key);
__ vec_perm (vKey3, vKey4, vKey3, keyPerm);
// load the 7th round key to vKey4
__ li (keypos, 96);
__ lvx (vKey5, keypos, key);
__ vec_perm (vKey4, vKey5, vKey4, keyPerm);
// load the 6th round key to vKey5
__ li (keypos, 80);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey1, vTmp2, vTmp1, keyPerm);
// load the 9th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey2, vTmp1, vTmp2, keyPerm);
// load the 8th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey3, vTmp2, vTmp1, keyPerm);
// load the 7th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey4, vTmp1, vTmp2, keyPerm);
// load the 6th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey5, vTmp2, vTmp1, keyPerm);
__ vec_perm (vKey5, vTmp1, vKey5, keyPerm);
// last 10th - 6th rounds
__ vncipher (vRet, vRet, vKey1);
@ -3045,30 +3049,29 @@ class StubGenerator: public StubCodeGenerator {
__ vncipher (vRet, vRet, vKey4);
__ vncipher (vRet, vRet, vKey5);
// load the 5th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey1, vTmp1, vTmp2, keyPerm);
// load the 5th round key to vKey1
__ li (keypos, 64);
__ lvx (vKey2, keypos, key);
__ vec_perm (vKey1, vKey2, vTmp1, keyPerm);
// load the 4th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey2, vTmp2, vTmp1, keyPerm);
// load the 4th round key to vKey2
__ li (keypos, 48);
__ lvx (vKey3, keypos, key);
__ vec_perm (vKey2, vKey3, vKey2, keyPerm);
// load the 3rd round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey3, vTmp1, vTmp2, keyPerm);
// load the 3rd round key to vKey3
__ li (keypos, 32);
__ lvx (vKey4, keypos, key);
__ vec_perm (vKey3, vKey4, vKey3, keyPerm);
// load the 2nd round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey4, vTmp2, vTmp1, keyPerm);
// load the 2nd round key to vKey4
__ li (keypos, 16);
__ lvx (vKey5, keypos, key);
__ vec_perm (vKey4, vKey5, vKey4, keyPerm);
// load the 1st round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey5, vTmp1, vTmp2, keyPerm);
// load the 1st round key to vKey5
__ lvx (vTmp1, key);
__ vec_perm (vKey5, vTmp1, vKey5, keyPerm);
// last 5th - 1th rounds
__ vncipher (vRet, vRet, vKey1);
@ -3077,24 +3080,54 @@ class StubGenerator: public StubCodeGenerator {
__ vncipher (vRet, vRet, vKey4);
__ vncipherlast (vRet, vRet, vKey5);
__ neg (temp, to);
__ lvsr (toPerm, temp);
__ vspltisb (vTmp2, -1);
__ vxor (vTmp1, vTmp1, vTmp1);
__ vperm (vTmp2, vTmp2, vTmp1, toPerm);
__ vxor (toPerm, toPerm, fSplt);
// store result (unaligned)
#ifdef VM_LITTLE_ENDIAN
__ lvsl (toPerm, to);
#else
__ lvsr (toPerm, to);
#endif
__ vspltisb (vTmp3, -1);
__ vspltisb (vTmp4, 0);
__ lvx (vTmp1, to);
__ vperm (vRet, vRet, vRet, toPerm);
__ vsel (vTmp1, vTmp1, vRet, vTmp2);
__ lvx (vTmp4, fifteen, to);
__ lvx (vTmp2, fifteen, to);
#ifdef VM_LITTLE_ENDIAN
__ vperm (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask
__ vxor (toPerm, toPerm, fSplt); // swap bytes
#else
__ vperm (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask
#endif
__ vperm (vTmp4, vRet, vRet, toPerm); // rotate data
__ vsel (vTmp2, vTmp4, vTmp2, vTmp3);
__ vsel (vTmp1, vTmp1, vTmp4, vTmp3);
__ stvx (vTmp2, fifteen, to); // store this one first (may alias)
__ stvx (vTmp1, to);
__ vsel (vRet, vRet, vTmp4, vTmp2);
__ stvx (vRet, fifteen, to);
__ blr();
return start;
}
address generate_sha256_implCompress(bool multi_block, const char *name) {
assert(UseSHA, "need SHA instructions");
StubCodeMark mark(this, "StubRoutines", name);
address start = __ function_entry();
__ sha256 (multi_block);
__ blr();
return start;
}
address generate_sha512_implCompress(bool multi_block, const char *name) {
assert(UseSHA, "need SHA instructions");
StubCodeMark mark(this, "StubRoutines", name);
address start = __ function_entry();
__ sha512 (multi_block);
__ blr();
return start;
}
void generate_arraycopy_stubs() {
// Note: the disjoint stubs must be generated first, some of
// the conjoint stubs use them.
@ -3306,6 +3339,267 @@ class StubGenerator: public StubCodeGenerator {
BLOCK_COMMENT("} Stub body");
}
/**
* Arguments:
*
* Input:
* R3_ARG1 - out address
* R4_ARG2 - in address
* R5_ARG3 - offset
* R6_ARG4 - len
* R7_ARG5 - k
* Output:
* R3_RET - carry
*/
address generate_mulAdd() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "mulAdd");
address start = __ function_entry();
// C2 does not sign extend signed parameters to full 64 bits registers:
__ rldic (R5_ARG3, R5_ARG3, 2, 32); // always positive
__ clrldi(R6_ARG4, R6_ARG4, 32); // force zero bits on higher word
__ clrldi(R7_ARG5, R7_ARG5, 32); // force zero bits on higher word
__ muladd(R3_ARG1, R4_ARG2, R5_ARG3, R6_ARG4, R7_ARG5, R8, R9, R10);
// Moves output carry to return register
__ mr (R3_RET, R10);
__ blr();
return start;
}
/**
* Arguments:
*
* Input:
* R3_ARG1 - in address
* R4_ARG2 - in length
* R5_ARG3 - out address
* R6_ARG4 - out length
*/
address generate_squareToLen() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "squareToLen");
address start = __ function_entry();
// args - higher word is cleaned (unsignedly) due to int to long casting
const Register in = R3_ARG1;
const Register in_len = R4_ARG2;
__ clrldi(in_len, in_len, 32);
const Register out = R5_ARG3;
const Register out_len = R6_ARG4;
__ clrldi(out_len, out_len, 32);
// output
const Register ret = R3_RET;
// temporaries
const Register lplw_s = R7;
const Register in_aux = R8;
const Register out_aux = R9;
const Register piece = R10;
const Register product = R14;
const Register lplw = R15;
const Register i_minus1 = R16;
const Register carry = R17;
const Register offset = R18;
const Register off_aux = R19;
const Register t = R20;
const Register mlen = R21;
const Register len = R22;
const Register a = R23;
const Register b = R24;
const Register i = R25;
const Register c = R26;
const Register cs = R27;
// Labels
Label SKIP_LSHIFT, SKIP_DIAGONAL_SUM, SKIP_ADDONE, SKIP_MULADD, SKIP_LOOP_SQUARE;
Label LOOP_LSHIFT, LOOP_DIAGONAL_SUM, LOOP_ADDONE, LOOP_MULADD, LOOP_SQUARE;
// Save non-volatile regs (frameless).
int current_offs = -8;
__ std(R28, current_offs, R1_SP); current_offs -= 8;
__ std(R27, current_offs, R1_SP); current_offs -= 8;
__ std(R26, current_offs, R1_SP); current_offs -= 8;
__ std(R25, current_offs, R1_SP); current_offs -= 8;
__ std(R24, current_offs, R1_SP); current_offs -= 8;
__ std(R23, current_offs, R1_SP); current_offs -= 8;
__ std(R22, current_offs, R1_SP); current_offs -= 8;
__ std(R21, current_offs, R1_SP); current_offs -= 8;
__ std(R20, current_offs, R1_SP); current_offs -= 8;
__ std(R19, current_offs, R1_SP); current_offs -= 8;
__ std(R18, current_offs, R1_SP); current_offs -= 8;
__ std(R17, current_offs, R1_SP); current_offs -= 8;
__ std(R16, current_offs, R1_SP); current_offs -= 8;
__ std(R15, current_offs, R1_SP); current_offs -= 8;
__ std(R14, current_offs, R1_SP);
// Store the squares, right shifted one bit (i.e., divided by 2)
__ subi (out_aux, out, 8);
__ subi (in_aux, in, 4);
__ cmpwi (CCR0, in_len, 0);
// Initialize lplw outside of the loop
__ xorr (lplw, lplw, lplw);
__ ble (CCR0, SKIP_LOOP_SQUARE); // in_len <= 0
__ mtctr (in_len);
__ bind(LOOP_SQUARE);
__ lwzu (piece, 4, in_aux);
__ mulld (product, piece, piece);
// shift left 63 bits and only keep the MSB
__ rldic (lplw_s, lplw, 63, 0);
__ mr (lplw, product);
// shift right 1 bit without sign extension
__ srdi (product, product, 1);
// join them to the same register and store it
__ orr (product, lplw_s, product);
#ifdef VM_LITTLE_ENDIAN
// Swap low and high words for little endian
__ rldicl (product, product, 32, 0);
#endif
__ stdu (product, 8, out_aux);
__ bdnz (LOOP_SQUARE);
__ bind(SKIP_LOOP_SQUARE);
// Add in off-diagonal sums
__ cmpwi (CCR0, in_len, 0);
__ ble (CCR0, SKIP_DIAGONAL_SUM);
// Avoid CTR usage here in order to use it at mulAdd
__ subi (i_minus1, in_len, 1);
__ li (offset, 4);
__ bind(LOOP_DIAGONAL_SUM);
__ sldi (off_aux, out_len, 2);
__ sub (off_aux, off_aux, offset);
__ mr (len, i_minus1);
__ sldi (mlen, i_minus1, 2);
__ lwzx (t, in, mlen);
__ muladd (out, in, off_aux, len, t, a, b, carry);
// begin<addOne>
// off_aux = out_len*4 - 4 - mlen - offset*4 - 4;
__ addi (mlen, mlen, 4);
__ sldi (a, out_len, 2);
__ subi (a, a, 4);
__ sub (a, a, mlen);
__ subi (off_aux, offset, 4);
__ sub (off_aux, a, off_aux);
__ lwzx (b, off_aux, out);
__ add (b, b, carry);
__ stwx (b, off_aux, out);
// if (((uint64_t)s >> 32) != 0) {
__ srdi_ (a, b, 32);
__ beq (CCR0, SKIP_ADDONE);
// while (--mlen >= 0) {
__ bind(LOOP_ADDONE);
__ subi (mlen, mlen, 4);
__ cmpwi (CCR0, mlen, 0);
__ beq (CCR0, SKIP_ADDONE);
// if (--offset_aux < 0) { // Carry out of number
__ subi (off_aux, off_aux, 4);
__ cmpwi (CCR0, off_aux, 0);
__ blt (CCR0, SKIP_ADDONE);
// } else {
__ lwzx (b, off_aux, out);
__ addi (b, b, 1);
__ stwx (b, off_aux, out);
__ cmpwi (CCR0, b, 0);
__ bne (CCR0, SKIP_ADDONE);
__ b (LOOP_ADDONE);
__ bind(SKIP_ADDONE);
// } } } end<addOne>
__ addi (offset, offset, 8);
__ subi (i_minus1, i_minus1, 1);
__ cmpwi (CCR0, i_minus1, 0);
__ bge (CCR0, LOOP_DIAGONAL_SUM);
__ bind(SKIP_DIAGONAL_SUM);
// Shift back up and set low bit
// Shifts 1 bit left up to len positions. Assumes no leading zeros
// begin<primitiveLeftShift>
__ cmpwi (CCR0, out_len, 0);
__ ble (CCR0, SKIP_LSHIFT);
__ li (i, 0);
__ lwz (c, 0, out);
__ subi (b, out_len, 1);
__ mtctr (b);
__ bind(LOOP_LSHIFT);
__ mr (b, c);
__ addi (cs, i, 4);
__ lwzx (c, out, cs);
__ sldi (b, b, 1);
__ srwi (cs, c, 31);
__ orr (b, b, cs);
__ stwx (b, i, out);
__ addi (i, i, 4);
__ bdnz (LOOP_LSHIFT);
__ sldi (c, out_len, 2);
__ subi (c, c, 4);
__ lwzx (b, out, c);
__ sldi (b, b, 1);
__ stwx (b, out, c);
__ bind(SKIP_LSHIFT);
// end<primitiveLeftShift>
// Set low bit
__ sldi (i, in_len, 2);
__ subi (i, i, 4);
__ lwzx (i, in, i);
__ sldi (c, out_len, 2);
__ subi (c, c, 4);
__ lwzx (b, out, c);
__ andi (i, i, 1);
__ orr (i, b, i);
__ stwx (i, out, c);
// Restore non-volatile regs.
current_offs = -8;
__ ld(R28, current_offs, R1_SP); current_offs -= 8;
__ ld(R27, current_offs, R1_SP); current_offs -= 8;
__ ld(R26, current_offs, R1_SP); current_offs -= 8;
__ ld(R25, current_offs, R1_SP); current_offs -= 8;
__ ld(R24, current_offs, R1_SP); current_offs -= 8;
__ ld(R23, current_offs, R1_SP); current_offs -= 8;
__ ld(R22, current_offs, R1_SP); current_offs -= 8;
__ ld(R21, current_offs, R1_SP); current_offs -= 8;
__ ld(R20, current_offs, R1_SP); current_offs -= 8;
__ ld(R19, current_offs, R1_SP); current_offs -= 8;
__ ld(R18, current_offs, R1_SP); current_offs -= 8;
__ ld(R17, current_offs, R1_SP); current_offs -= 8;
__ ld(R16, current_offs, R1_SP); current_offs -= 8;
__ ld(R15, current_offs, R1_SP); current_offs -= 8;
__ ld(R14, current_offs, R1_SP);
__ mr(ret, out);
__ blr();
return start;
}
/**
* Arguments:
@ -3500,6 +3794,12 @@ class StubGenerator: public StubCodeGenerator {
}
#endif
if (UseSquareToLenIntrinsic) {
StubRoutines::_squareToLen = generate_squareToLen();
}
if (UseMulAddIntrinsic) {
StubRoutines::_mulAdd = generate_mulAdd();
}
if (UseMontgomeryMultiplyIntrinsic) {
StubRoutines::_montgomeryMultiply
= CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
@ -3514,6 +3814,14 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
}
if (UseSHA256Intrinsics) {
StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
}
if (UseSHA512Intrinsics) {
StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress");
StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
}
}
public:

View File

@ -34,7 +34,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _call_
enum platform_dependent_constants {
code_size1 = 20000, // simply increase if too small (assembler will crash if too small)
code_size2 = 20000 // simply increase if too small (assembler will crash if too small)
code_size2 = 24000 // simply increase if too small (assembler will crash if too small)
};
// CRC32 Intrinsics.

View File

@ -1470,10 +1470,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
// TODO PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
__ stw(R0, thread_(thread_state));
if (UseMembar) {
__ fence();
}
//=============================================================================
// Call the native method. Argument registers must not have been
// overwritten since "__ call_stub(signature_handler);" (except for
@ -1594,9 +1590,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
__ li(R0/*thread_state*/, _thread_in_Java);
__ release();
__ stw(R0/*thread_state*/, thread_(thread_state));
if (UseMembar) {
__ fence();
}
if (CheckJNICalls) {
// clear_pending_jni_exception_check

View File

@ -2224,6 +2224,7 @@ void TemplateTable::load_field_cp_cache_entry(Register Robj,
if (is_static) {
__ ld(Robj, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::f1_offset()), Rcache);
__ ld(Robj, in_bytes(Klass::java_mirror_offset()), Robj);
__ resolve_oop_handle(Robj);
// Acquire not needed here. Following access has an address dependency on this value.
}
}

View File

@ -107,13 +107,23 @@ void VM_Version::initialize() {
// TODO: PPC port PdScheduling::power6SectorSize = 0x20;
}
MaxVectorSize = 8;
if (PowerArchitecturePPC64 >= 8) {
if (FLAG_IS_DEFAULT(SuperwordUseVSX)) {
FLAG_SET_ERGO(bool, SuperwordUseVSX, true);
}
} else {
if (SuperwordUseVSX) {
warning("SuperwordUseVSX specified, but needs at least Power8.");
FLAG_SET_DEFAULT(SuperwordUseVSX, false);
}
}
MaxVectorSize = SuperwordUseVSX ? 16 : 8;
#endif
// Create and print feature-string.
char buf[(num_features+1) * 16]; // Max 16 chars per feature.
jio_snprintf(buf, sizeof(buf),
"ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
"ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
(has_fsqrt() ? " fsqrt" : ""),
(has_isel() ? " isel" : ""),
(has_lxarxeh() ? " lxarxeh" : ""),
@ -130,7 +140,8 @@ void VM_Version::initialize() {
(has_mfdscr() ? " mfdscr" : ""),
(has_vsx() ? " vsx" : ""),
(has_ldbrx() ? " ldbrx" : ""),
(has_stdbrx() ? " stdbrx" : "")
(has_stdbrx() ? " stdbrx" : ""),
(has_vshasig() ? " sha" : "")
// Make sure number of %s matches num_features!
);
_features_string = os::strdup(buf);
@ -200,7 +211,6 @@ void VM_Version::initialize() {
}
// The AES intrinsic stubs require AES instruction support.
#if defined(VM_LITTLE_ENDIAN)
if (has_vcipher()) {
if (FLAG_IS_DEFAULT(UseAES)) {
UseAES = true;
@ -221,18 +231,6 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
}
#else
if (UseAES) {
warning("AES instructions are not available on this CPU");
FLAG_SET_DEFAULT(UseAES, false);
}
if (UseAESIntrinsics) {
if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
warning("AES intrinsics are not available on this CPU");
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
}
#endif
if (UseAESCTRIntrinsics) {
warning("AES/CTR intrinsics are not available on this CPU");
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
@ -247,17 +245,49 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseFMA, true);
}
if (UseSHA) {
warning("SHA instructions are not available on this CPU");
if (has_vshasig()) {
if (FLAG_IS_DEFAULT(UseSHA)) {
UseSHA = true;
}
} else if (UseSHA) {
if (!FLAG_IS_DEFAULT(UseSHA))
warning("SHA instructions are not available on this CPU");
FLAG_SET_DEFAULT(UseSHA, false);
}
if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
warning("SHA intrinsics are not available on this CPU");
if (UseSHA1Intrinsics) {
warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
}
if (UseSHA && has_vshasig()) {
if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
}
} else if (UseSHA256Intrinsics) {
warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
}
if (UseSHA && has_vshasig()) {
if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
}
} else if (UseSHA512Intrinsics) {
warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
}
if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA, false);
}
if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
UseSquareToLenIntrinsic = true;
}
if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
UseMulAddIntrinsic = true;
}
if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
UseMultiplyToLenIntrinsic = true;
}
@ -657,6 +687,7 @@ void VM_Version::determine_features() {
a->lxvd2x(VSR0, R3_ARG1); // code[14] -> vsx
a->ldbrx(R7, R3_ARG1, R4_ARG2); // code[15] -> ldbrx
a->stdbrx(R7, R3_ARG1, R4_ARG2); // code[16] -> stdbrx
a->vshasigmaw(VR0, VR1, 1, 0xF); // code[17] -> vshasig
a->blr();
// Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
@ -708,6 +739,7 @@ void VM_Version::determine_features() {
if (code[feature_cntr++]) features |= vsx_m;
if (code[feature_cntr++]) features |= ldbrx_m;
if (code[feature_cntr++]) features |= stdbrx_m;
if (code[feature_cntr++]) features |= vshasig_m;
// Print the detection code.
if (PrintAssembly) {

View File

@ -49,6 +49,7 @@ protected:
vsx,
ldbrx,
stdbrx,
vshasig,
num_features // last entry to count features
};
enum Feature_Flag_Set {
@ -64,6 +65,7 @@ protected:
vand_m = (1 << vand ),
lqarx_m = (1 << lqarx ),
vcipher_m = (1 << vcipher),
vshasig_m = (1 << vshasig),
vpmsumb_m = (1 << vpmsumb),
tcheck_m = (1 << tcheck ),
mfdscr_m = (1 << mfdscr ),
@ -106,6 +108,7 @@ public:
static bool has_vsx() { return (_features & vsx_m) != 0; }
static bool has_ldbrx() { return (_features & ldbrx_m) != 0; }
static bool has_stdbrx() { return (_features & stdbrx_m) != 0; }
static bool has_vshasig() { return (_features & vshasig_m) != 0; }
static bool has_mtfprd() { return has_vpmsumb(); } // alias for P8
// Assembler testing

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -250,7 +250,6 @@ class Address VALUE_OBJ_CLASS_SPEC {
bool is_RSform() { return has_base() && !has_index() && is_disp12(); }
bool is_RSYform() { return has_base() && !has_index() && is_disp20(); }
bool is_RXform() { return has_base() && has_index() && is_disp12(); }
bool is_RXEform() { return has_base() && has_index() && is_disp12(); }
bool is_RXYform() { return has_base() && has_index() && is_disp20(); }
bool uses(Register r) { return _base == r || _index == r; };
@ -1093,7 +1092,201 @@ class Assembler : public AbstractAssembler {
#define TRTT_ZOPC (unsigned int)(0xb9 << 24 | 0x90 << 16)
// Miscellaneous Operations
//---------------------------
//-- Vector Instructions --
//---------------------------
//---< Vector Support Instructions >---
//--- Load (memory) ---
#define VLM_ZOPC (unsigned long)(0xe7L << 40 | 0x36L << 0) // load full vreg range (n * 128 bit)
#define VL_ZOPC (unsigned long)(0xe7L << 40 | 0x06L << 0) // load full vreg (128 bit)
#define VLEB_ZOPC (unsigned long)(0xe7L << 40 | 0x00L << 0) // load vreg element (8 bit)
#define VLEH_ZOPC (unsigned long)(0xe7L << 40 | 0x01L << 0) // load vreg element (16 bit)
#define VLEF_ZOPC (unsigned long)(0xe7L << 40 | 0x03L << 0) // load vreg element (32 bit)
#define VLEG_ZOPC (unsigned long)(0xe7L << 40 | 0x02L << 0) // load vreg element (64 bit)
#define VLREP_ZOPC (unsigned long)(0xe7L << 40 | 0x05L << 0) // load and replicate into all vector elements
#define VLLEZ_ZOPC (unsigned long)(0xe7L << 40 | 0x04L << 0) // load logical element and zero.
// vector register gather
#define VGEF_ZOPC (unsigned long)(0xe7L << 40 | 0x13L << 0) // gather element (32 bit), V1(M3) = [D2(V2(M3),B2)]
#define VGEG_ZOPC (unsigned long)(0xe7L << 40 | 0x12L << 0) // gather element (64 bit), V1(M3) = [D2(V2(M3),B2)]
// vector register scatter
#define VSCEF_ZOPC (unsigned long)(0xe7L << 40 | 0x1bL << 0) // vector scatter element FW
#define VSCEG_ZOPC (unsigned long)(0xe7L << 40 | 0x1aL << 0) // vector scatter element DW
#define VLBB_ZOPC (unsigned long)(0xe7L << 40 | 0x07L << 0) // load vreg to block boundary (load to alignment).
#define VLL_ZOPC (unsigned long)(0xe7L << 40 | 0x37L << 0) // load vreg with length.
//--- Load (register) ---
#define VLR_ZOPC (unsigned long)(0xe7L << 40 | 0x56L << 0) // copy full vreg (128 bit)
#define VLGV_ZOPC (unsigned long)(0xe7L << 40 | 0x21L << 0) // copy vreg element -> GR
#define VLVG_ZOPC (unsigned long)(0xe7L << 40 | 0x22L << 0) // copy GR -> vreg element
#define VLVGP_ZOPC (unsigned long)(0xe7L << 40 | 0x62L << 0) // copy GR2, GR3 (disjoint pair) -> vreg
// vector register pack: cut in half the size the source vector elements
#define VPK_ZOPC (unsigned long)(0xe7L << 40 | 0x94L << 0) // just cut
#define VPKS_ZOPC (unsigned long)(0xe7L << 40 | 0x97L << 0) // saturate as signed values
#define VPKLS_ZOPC (unsigned long)(0xe7L << 40 | 0x95L << 0) // saturate as unsigned values
// vector register unpack: double in size the source vector elements
#define VUPH_ZOPC (unsigned long)(0xe7L << 40 | 0xd7L << 0) // signed, left half of the source vector elements
#define VUPLH_ZOPC (unsigned long)(0xe7L << 40 | 0xd5L << 0) // unsigned, left half of the source vector elements
#define VUPL_ZOPC (unsigned long)(0xe7L << 40 | 0xd6L << 0) // signed, right half of the source vector elements
#define VUPLL_ZOPC (unsigned long)(0xe7L << 40 | 0xd4L << 0) // unsigned, right half of the source vector element
// vector register merge
#define VMRH_ZOPC (unsigned long)(0xe7L << 40 | 0x61L << 0) // register merge high (left half of source registers)
#define VMRL_ZOPC (unsigned long)(0xe7L << 40 | 0x60L << 0) // register merge low (right half of source registers)
// vector register permute
#define VPERM_ZOPC (unsigned long)(0xe7L << 40 | 0x8cL << 0) // vector permute
#define VPDI_ZOPC (unsigned long)(0xe7L << 40 | 0x84L << 0) // vector permute DW immediate
// vector register replicate
#define VREP_ZOPC (unsigned long)(0xe7L << 40 | 0x4dL << 0) // vector replicate
#define VREPI_ZOPC (unsigned long)(0xe7L << 40 | 0x45L << 0) // vector replicate immediate
#define VSEL_ZOPC (unsigned long)(0xe7L << 40 | 0x8dL << 0) // vector select
#define VSEG_ZOPC (unsigned long)(0xe7L << 40 | 0x5fL << 0) // vector sign-extend to DW (rightmost element in each DW).
//--- Load (immediate) ---
#define VLEIB_ZOPC (unsigned long)(0xe7L << 40 | 0x40L << 0) // load vreg element (16 bit imm to 8 bit)
#define VLEIH_ZOPC (unsigned long)(0xe7L << 40 | 0x41L << 0) // load vreg element (16 bit imm to 16 bit)
#define VLEIF_ZOPC (unsigned long)(0xe7L << 40 | 0x43L << 0) // load vreg element (16 bit imm to 32 bit)
#define VLEIG_ZOPC (unsigned long)(0xe7L << 40 | 0x42L << 0) // load vreg element (16 bit imm to 64 bit)
//--- Store ---
#define VSTM_ZOPC (unsigned long)(0xe7L << 40 | 0x3eL << 0) // store full vreg range (n * 128 bit)
#define VST_ZOPC (unsigned long)(0xe7L << 40 | 0x0eL << 0) // store full vreg (128 bit)
#define VSTEB_ZOPC (unsigned long)(0xe7L << 40 | 0x08L << 0) // store vreg element (8 bit)
#define VSTEH_ZOPC (unsigned long)(0xe7L << 40 | 0x09L << 0) // store vreg element (16 bit)
#define VSTEF_ZOPC (unsigned long)(0xe7L << 40 | 0x0bL << 0) // store vreg element (32 bit)
#define VSTEG_ZOPC (unsigned long)(0xe7L << 40 | 0x0aL << 0) // store vreg element (64 bit)
#define VSTL_ZOPC (unsigned long)(0xe7L << 40 | 0x3fL << 0) // store vreg with length.
//--- Misc ---
#define VGM_ZOPC (unsigned long)(0xe7L << 40 | 0x46L << 0) // generate bit mask, [start..end] = '1', else '0'
#define VGBM_ZOPC (unsigned long)(0xe7L << 40 | 0x44L << 0) // generate byte mask, bits(imm16) -> bytes
//---< Vector Arithmetic Instructions >---
// Load
#define VLC_ZOPC (unsigned long)(0xe7L << 40 | 0xdeL << 0) // V1 := -V2, element size = 2**m
#define VLP_ZOPC (unsigned long)(0xe7L << 40 | 0xdfL << 0) // V1 := |V2|, element size = 2**m
// ADD
#define VA_ZOPC (unsigned long)(0xe7L << 40 | 0xf3L << 0) // V1 := V2 + V3, element size = 2**m
#define VACC_ZOPC (unsigned long)(0xe7L << 40 | 0xf1L << 0) // V1 := carry(V2 + V3), element size = 2**m
// SUB
#define VS_ZOPC (unsigned long)(0xe7L << 40 | 0xf7L << 0) // V1 := V2 - V3, element size = 2**m
#define VSCBI_ZOPC (unsigned long)(0xe7L << 40 | 0xf5L << 0) // V1 := borrow(V2 - V3), element size = 2**m
// MUL
#define VML_ZOPC (unsigned long)(0xe7L << 40 | 0xa2L << 0) // V1 := V2 * V3, element size = 2**m
#define VMH_ZOPC (unsigned long)(0xe7L << 40 | 0xa3L << 0) // V1 := V2 * V3, element size = 2**m
#define VMLH_ZOPC (unsigned long)(0xe7L << 40 | 0xa1L << 0) // V1 := V2 * V3, element size = 2**m, unsigned
#define VME_ZOPC (unsigned long)(0xe7L << 40 | 0xa6L << 0) // V1 := V2 * V3, element size = 2**m
#define VMLE_ZOPC (unsigned long)(0xe7L << 40 | 0xa4L << 0) // V1 := V2 * V3, element size = 2**m, unsigned
#define VMO_ZOPC (unsigned long)(0xe7L << 40 | 0xa7L << 0) // V1 := V2 * V3, element size = 2**m
#define VMLO_ZOPC (unsigned long)(0xe7L << 40 | 0xa5L << 0) // V1 := V2 * V3, element size = 2**m, unsigned
// MUL & ADD
#define VMAL_ZOPC (unsigned long)(0xe7L << 40 | 0xaaL << 0) // V1 := V2 * V3 + V4, element size = 2**m
#define VMAH_ZOPC (unsigned long)(0xe7L << 40 | 0xabL << 0) // V1 := V2 * V3 + V4, element size = 2**m
#define VMALH_ZOPC (unsigned long)(0xe7L << 40 | 0xa9L << 0) // V1 := V2 * V3 + V4, element size = 2**m, unsigned
#define VMAE_ZOPC (unsigned long)(0xe7L << 40 | 0xaeL << 0) // V1 := V2 * V3 + V4, element size = 2**m
#define VMALE_ZOPC (unsigned long)(0xe7L << 40 | 0xacL << 0) // V1 := V2 * V3 + V4, element size = 2**m, unsigned
#define VMAO_ZOPC (unsigned long)(0xe7L << 40 | 0xafL << 0) // V1 := V2 * V3 + V4, element size = 2**m
#define VMALO_ZOPC (unsigned long)(0xe7L << 40 | 0xadL << 0) // V1 := V2 * V3 + V4, element size = 2**m, unsigned
// Vector SUM
#define VSUM_ZOPC (unsigned long)(0xe7L << 40 | 0x64L << 0) // V1[j] := toFW(sum(V2[i]) + V3[j]), subelements: byte or HW
#define VSUMG_ZOPC (unsigned long)(0xe7L << 40 | 0x65L << 0) // V1[j] := toDW(sum(V2[i]) + V3[j]), subelements: HW or FW
#define VSUMQ_ZOPC (unsigned long)(0xe7L << 40 | 0x67L << 0) // V1[j] := toQW(sum(V2[i]) + V3[j]), subelements: FW or DW
// Average
#define VAVG_ZOPC (unsigned long)(0xe7L << 40 | 0xf2L << 0) // V1 := (V2+V3+1)/2, signed, element size = 2**m
#define VAVGL_ZOPC (unsigned long)(0xe7L << 40 | 0xf0L << 0) // V1 := (V2+V3+1)/2, unsigned, element size = 2**m
// VECTOR Galois Field Multiply Sum
#define VGFM_ZOPC (unsigned long)(0xe7L << 40 | 0xb4L << 0)
#define VGFMA_ZOPC (unsigned long)(0xe7L << 40 | 0xbcL << 0)
//---< Vector Logical Instructions >---
// AND
#define VN_ZOPC (unsigned long)(0xe7L << 40 | 0x68L << 0) // V1 := V2 & V3, element size = 2**m
#define VNC_ZOPC (unsigned long)(0xe7L << 40 | 0x69L << 0) // V1 := V2 & ~V3, element size = 2**m
// XOR
#define VX_ZOPC (unsigned long)(0xe7L << 40 | 0x6dL << 0) // V1 := V2 ^ V3, element size = 2**m
// NOR
#define VNO_ZOPC (unsigned long)(0xe7L << 40 | 0x6bL << 0) // V1 := !(V2 | V3), element size = 2**m
// OR
#define VO_ZOPC (unsigned long)(0xe7L << 40 | 0x6aL << 0) // V1 := V2 | V3, element size = 2**m
// Comparison (element-wise)
#define VCEQ_ZOPC (unsigned long)(0xe7L << 40 | 0xf8L << 0) // V1 := (V2 == V3) ? 0xffff : 0x0000, element size = 2**m
#define VCH_ZOPC (unsigned long)(0xe7L << 40 | 0xfbL << 0) // V1 := (V2 > V3) ? 0xffff : 0x0000, element size = 2**m, signed
#define VCHL_ZOPC (unsigned long)(0xe7L << 40 | 0xf9L << 0) // V1 := (V2 > V3) ? 0xffff : 0x0000, element size = 2**m, unsigned
// Max/Min (element-wise)
#define VMX_ZOPC (unsigned long)(0xe7L << 40 | 0xffL << 0) // V1 := (V2 > V3) ? V2 : V3, element size = 2**m, signed
#define VMXL_ZOPC (unsigned long)(0xe7L << 40 | 0xfdL << 0) // V1 := (V2 > V3) ? V2 : V3, element size = 2**m, unsigned
#define VMN_ZOPC (unsigned long)(0xe7L << 40 | 0xfeL << 0) // V1 := (V2 < V3) ? V2 : V3, element size = 2**m, signed
#define VMNL_ZOPC (unsigned long)(0xe7L << 40 | 0xfcL << 0) // V1 := (V2 < V3) ? V2 : V3, element size = 2**m, unsigned
// Leading/Trailing Zeros, population count
#define VCLZ_ZOPC (unsigned long)(0xe7L << 40 | 0x53L << 0) // V1 := leadingzeros(V2), element size = 2**m
#define VCTZ_ZOPC (unsigned long)(0xe7L << 40 | 0x52L << 0) // V1 := trailingzeros(V2), element size = 2**m
#define VPOPCT_ZOPC (unsigned long)(0xe7L << 40 | 0x50L << 0) // V1 := popcount(V2), bytewise!!
// Rotate/Shift
#define VERLLV_ZOPC (unsigned long)(0xe7L << 40 | 0x73L << 0) // V1 := rotateleft(V2), rotate count in V3 element
#define VERLL_ZOPC (unsigned long)(0xe7L << 40 | 0x33L << 0) // V1 := rotateleft(V3), rotate count from d2(b2).
#define VERIM_ZOPC (unsigned long)(0xe7L << 40 | 0x72L << 0) // Rotate then insert under mask. Read Principles of Operation!!
#define VESLV_ZOPC (unsigned long)(0xe7L << 40 | 0x70L << 0) // V1 := SLL(V2, V3), unsigned, element-wise
#define VESL_ZOPC (unsigned long)(0xe7L << 40 | 0x30L << 0) // V1 := SLL(V3), unsigned, shift count from d2(b2).
#define VESRAV_ZOPC (unsigned long)(0xe7L << 40 | 0x7AL << 0) // V1 := SRA(V2, V3), signed, element-wise
#define VESRA_ZOPC (unsigned long)(0xe7L << 40 | 0x3AL << 0) // V1 := SRA(V3), signed, shift count from d2(b2).
#define VESRLV_ZOPC (unsigned long)(0xe7L << 40 | 0x78L << 0) // V1 := SRL(V2, V3), unsigned, element-wise
#define VESRL_ZOPC (unsigned long)(0xe7L << 40 | 0x38L << 0) // V1 := SRL(V3), unsigned, shift count from d2(b2).
#define VSL_ZOPC (unsigned long)(0xe7L << 40 | 0x74L << 0) // V1 := SLL(V2), unsigned, bit-count
#define VSLB_ZOPC (unsigned long)(0xe7L << 40 | 0x75L << 0) // V1 := SLL(V2), unsigned, byte-count
#define VSLDB_ZOPC (unsigned long)(0xe7L << 40 | 0x77L << 0) // V1 := SLL((V2,V3)), unsigned, byte-count
#define VSRA_ZOPC (unsigned long)(0xe7L << 40 | 0x7eL << 0) // V1 := SRA(V2), signed, bit-count
#define VSRAB_ZOPC (unsigned long)(0xe7L << 40 | 0x7fL << 0) // V1 := SRA(V2), signed, byte-count
#define VSRL_ZOPC (unsigned long)(0xe7L << 40 | 0x7cL << 0) // V1 := SRL(V2), unsigned, bit-count
#define VSRLB_ZOPC (unsigned long)(0xe7L << 40 | 0x7dL << 0) // V1 := SRL(V2), unsigned, byte-count
// Test under Mask
#define VTM_ZOPC (unsigned long)(0xe7L << 40 | 0xd8L << 0) // Like TM, set CC according to state of selected bits.
//---< Vector String Instructions >---
#define VFAE_ZOPC (unsigned long)(0xe7L << 40 | 0x82L << 0) // Find any element
#define VFEE_ZOPC (unsigned long)(0xe7L << 40 | 0x80L << 0) // Find element equal
#define VFENE_ZOPC (unsigned long)(0xe7L << 40 | 0x81L << 0) // Find element not equal
#define VSTRC_ZOPC (unsigned long)(0xe7L << 40 | 0x8aL << 0) // String range compare
#define VISTR_ZOPC (unsigned long)(0xe7L << 40 | 0x5cL << 0) // Isolate String
//--------------------------------
//-- Miscellaneous Operations --
//--------------------------------
// Execute
#define EX_ZOPC (unsigned int)(68L << 24)
@ -1244,10 +1437,18 @@ class Assembler : public AbstractAssembler {
// unsigned arithmetic calculation instructions
// Mask bit#0 is not used by these instructions.
// There is no indication of overflow for these instr.
bcondLogZero = 2,
bcondLogNotZero = 5,
bcondLogZero_NoCarry = 8,
bcondLogZero_Carry = 2,
// bcondLogZero_Borrow = 8, // This CC is never generated.
bcondLogZero_NoBorrow = 2,
bcondLogZero = bcondLogZero_Carry | bcondLogZero_NoCarry,
bcondLogNotZero_NoCarry = 4,
bcondLogNotZero_Carry = 1,
bcondLogNotZero_Borrow = 4,
bcondLogNotZero_NoBorrow = 1,
bcondLogNotZero = bcondLogNotZero_Carry | bcondLogNotZero_NoCarry,
bcondLogCarry = bcondLogZero_Carry | bcondLogNotZero_Carry,
bcondLogBorrow = /* bcondLogZero_Borrow | */ bcondLogNotZero_Borrow,
// string search instructions
bcondFound = 4,
bcondNotFound = 2,
@ -1280,6 +1481,29 @@ class Assembler : public AbstractAssembler {
to_minus_infinity = 7
};
// Vector Register Element Type.
enum VRegElemType {
VRET_BYTE = 0,
VRET_HW = 1,
VRET_FW = 2,
VRET_DW = 3,
VRET_QW = 4
};
// Vector Operation Result Control.
// This is a set of flags used in some vector instructions to control
// the result (side) effects of instruction execution.
enum VOpRC {
VOPRC_CCSET = 0b0001, // set the CC.
VOPRC_CCIGN = 0b0000, // ignore, don't set CC.
VOPRC_ZS = 0b0010, // Zero Search. Additional, elementwise, comparison against zero.
VOPRC_NOZS = 0b0000, // No Zero Search.
VOPRC_RTBYTEIX = 0b0100, // generate byte index to lowest element with true comparison.
VOPRC_RTBITVEC = 0b0000, // generate bit vector, all 1s for true, all 0s for false element comparisons.
VOPRC_INVERT = 0b1000, // invert comparison results.
VOPRC_NOINVERT = 0b0000 // use comparison results as is, do not invert.
};
// Inverse condition code, i.e. determine "15 - cc" for a given condition code cc.
static branch_condition inverse_condition(branch_condition cc);
static branch_condition inverse_float_condition(branch_condition cc);
@ -1376,6 +1600,65 @@ class Assembler : public AbstractAssembler {
return r;
}
static int64_t rsmask_48( Address a) { assert(a.is_RSform(), "bad address format"); return rsmask_48( a.disp12(), a.base()); }
static int64_t rxmask_48( Address a) { if (a.is_RXform()) { return rxmask_48( a.disp12(), a.index(), a.base()); }
else if (a.is_RSform()) { return rsmask_48( a.disp12(), a.base()); }
else { guarantee(false, "bad address format"); return 0; }
}
static int64_t rsymask_48(Address a) { assert(a.is_RSYform(), "bad address format"); return rsymask_48(a.disp20(), a.base()); }
static int64_t rxymask_48(Address a) { if (a.is_RXYform()) { return rxymask_48( a.disp20(), a.index(), a.base()); }
else if (a.is_RSYform()) { return rsymask_48( a.disp20(), a.base()); }
else { guarantee(false, "bad address format"); return 0; }
}
static int64_t rsmask_48( int64_t d2, Register b2) { return uimm12(d2, 20, 48) | regz(b2, 16, 48); }
static int64_t rxmask_48( int64_t d2, Register x2, Register b2) { return uimm12(d2, 20, 48) | reg(x2, 12, 48) | regz(b2, 16, 48); }
static int64_t rsymask_48(int64_t d2, Register b2) { return simm20(d2) | regz(b2, 16, 48); }
static int64_t rxymask_48(int64_t d2, Register x2, Register b2) { return simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48); }
// Address calculated from d12(vx,b) - vx is vector index register.
static int64_t rvmask_48( int64_t d2, VectorRegister x2, Register b2) { return uimm12(d2, 20, 48) | vreg(x2, 12) | regz(b2, 16, 48); }
static int64_t vreg_mask(VectorRegister v, int pos) {
return vreg(v, pos) | v->RXB_mask(pos);
}
// Vector Element Size Control. 4-bit field which indicates the size of the vector elements.
static int64_t vesc_mask(int64_t size, int min_size, int max_size, int pos) {
// min_size - minimum element size. Not all instructions support element sizes beginning with "byte".
// max_size - maximum element size. Not all instructions support element sizes up to "QW".
assert((min_size <= size) && (size <= max_size), "element size control out of range");
return uimm4(size, pos, 48);
}
// Vector Element IndeX. 4-bit field which indexes the target vector element.
static int64_t veix_mask(int64_t ix, int el_size, int pos) {
// el_size - size of the vector element. This is a VRegElemType enum value.
// ix - vector element index.
int max_ix = -1;
switch (el_size) {
case VRET_BYTE: max_ix = 15; break;
case VRET_HW: max_ix = 7; break;
case VRET_FW: max_ix = 3; break;
case VRET_DW: max_ix = 1; break;
case VRET_QW: max_ix = 0; break;
default: guarantee(false, "bad vector element size %d", el_size); break;
}
assert((0 <= ix) && (ix <= max_ix), "element size out of range (0 <= %ld <= %d)", ix, max_ix);
return uimm4(ix, pos, 48);
}
// Vector Operation Result Control. 4-bit field.
static int64_t voprc_any(int64_t flags, int pos, int64_t allowed_flags = 0b1111) {
assert((flags & allowed_flags) == flags, "Invalid VOPRC_* flag combination: %d", (int)flags);
return uimm4(flags, pos, 48);
}
// Vector Operation Result Control. Condition code setting.
static int64_t voprc_ccmask(int64_t flags, int pos) {
return voprc_any(flags, pos, VOPRC_CCIGN | VOPRC_CCSET);
}
public:
//--------------------------------------------------
@ -1453,6 +1736,8 @@ class Assembler : public AbstractAssembler {
static long imm24(int64_t i24, int s, int len) { return imm(i24, 24) << (len-s-24); }
static long imm32(int64_t i32, int s, int len) { return imm(i32, 32) << (len-s-32); }
static long vreg(VectorRegister v, int pos) { const int len = 48; return u_field(v->encoding()&0x0f, (len-pos)-1, (len-pos)-4) | v->RXB_mask(pos); }
static long fregt(FloatRegister r, int s, int len) { return freg(r,s,len); }
static long freg( FloatRegister r, int s, int len) { return u_field(r->encoding(), (len-s)-1, (len-s)-4); }
@ -1840,13 +2125,16 @@ class Assembler : public AbstractAssembler {
inline void z_alsi( const Address& d, int64_t i2); // add logical *(d) += i2_imm8 ; uint32 -- z10
inline void z_algsi(const Address& d, int64_t i2); // add logical *(d) += i2_imm8 ; uint64 -- z10
// negate
// sign adjustment
inline void z_lcr( Register r1, Register r2 = noreg); // neg r1 = -r2 ; int32
inline void z_lcgr( Register r1, Register r2 = noreg); // neg r1 = -r2 ; int64
inline void z_lcgfr(Register r1, Register r2); // neg r1 = -r2 ; int64 <- int32
inline void z_lnr( Register r1, Register r2 = noreg); // neg r1 = -|r2| ; int32
inline void z_lngr( Register r1, Register r2 = noreg); // neg r1 = -|r2| ; int64
inline void z_lngfr(Register r1, Register r2); // neg r1 = -|r2| ; int64 <- int32
inline void z_lpr( Register r1, Register r2 = noreg); // r1 = |r2| ; int32
inline void z_lpgr( Register r1, Register r2 = noreg); // r1 = |r2| ; int64
inline void z_lpgfr(Register r1, Register r2); // r1 = |r2| ; int64 <- int32
// subtract intstructions
// sub registers
@ -2125,6 +2413,422 @@ class Assembler : public AbstractAssembler {
inline void z_trtt(Register r1, Register r2, int64_t m3);
//---------------------------
//-- Vector Instructions --
//---------------------------
//---< Vector Support Instructions >---
// Load (transfer from memory)
inline void z_vlm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vl( VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vleb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vleh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vlef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vleg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
// Gather/Scatter
inline void z_vgef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
inline void z_vgeg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
inline void z_vscef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
inline void z_vsceg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
// load and replicate
inline void z_vlrep( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vlrepb(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vlreph(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vlrepf(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vlrepg(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vllez( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vllezb(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vllezh(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vllezf(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vllezg(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vlbb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vll( VectorRegister v1, Register r3, int64_t d2, Register b2);
// Load (register to register)
inline void z_vlr( VectorRegister v1, VectorRegister v2);
inline void z_vlgv( Register r1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
inline void z_vlgvb( Register r1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vlgvh( Register r1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vlgvf( Register r1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vlgvg( Register r1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vlvg( VectorRegister v1, Register r3, int64_t d2, Register b2, int64_t m4);
inline void z_vlvgb( VectorRegister v1, Register r3, int64_t d2, Register b2);
inline void z_vlvgh( VectorRegister v1, Register r3, int64_t d2, Register b2);
inline void z_vlvgf( VectorRegister v1, Register r3, int64_t d2, Register b2);
inline void z_vlvgg( VectorRegister v1, Register r3, int64_t d2, Register b2);
inline void z_vlvgp( VectorRegister v1, Register r2, Register r3);
// vector register pack
inline void z_vpk( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vpkh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpkf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpkg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpks( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
inline void z_vpksh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpksf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpksg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpkshs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpksfs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpksgs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpkls( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
inline void z_vpklsh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpklsf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpklsg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpklshs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpklsfs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpklsgs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
// vector register unpack (sign-extended)
inline void z_vuph( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vuphb( VectorRegister v1, VectorRegister v2);
inline void z_vuphh( VectorRegister v1, VectorRegister v2);
inline void z_vuphf( VectorRegister v1, VectorRegister v2);
inline void z_vupl( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vuplb( VectorRegister v1, VectorRegister v2);
inline void z_vuplh( VectorRegister v1, VectorRegister v2);
inline void z_vuplf( VectorRegister v1, VectorRegister v2);
// vector register unpack (zero-extended)
inline void z_vuplh( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vuplhb( VectorRegister v1, VectorRegister v2);
inline void z_vuplhh( VectorRegister v1, VectorRegister v2);
inline void z_vuplhf( VectorRegister v1, VectorRegister v2);
inline void z_vupll( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vupllb( VectorRegister v1, VectorRegister v2);
inline void z_vupllh( VectorRegister v1, VectorRegister v2);
inline void z_vupllf( VectorRegister v1, VectorRegister v2);
// vector register merge high/low
inline void z_vmrh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmrhb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmrhh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmrhf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmrhg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmrl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmrlb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmrlh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmrlf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmrlg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
// vector register permute
inline void z_vperm( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
inline void z_vpdi( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
// vector register replicate
inline void z_vrep( VectorRegister v1, VectorRegister v3, int64_t imm2, int64_t m4);
inline void z_vrepb( VectorRegister v1, VectorRegister v3, int64_t imm2);
inline void z_vreph( VectorRegister v1, VectorRegister v3, int64_t imm2);
inline void z_vrepf( VectorRegister v1, VectorRegister v3, int64_t imm2);
inline void z_vrepg( VectorRegister v1, VectorRegister v3, int64_t imm2);
inline void z_vrepi( VectorRegister v1, int64_t imm2, int64_t m3);
inline void z_vrepib(VectorRegister v1, int64_t imm2);
inline void z_vrepih(VectorRegister v1, int64_t imm2);
inline void z_vrepif(VectorRegister v1, int64_t imm2);
inline void z_vrepig(VectorRegister v1, int64_t imm2);
inline void z_vsel( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
inline void z_vseg( VectorRegister v1, VectorRegister v2, int64_t imm3);
// Load (immediate)
inline void z_vleib( VectorRegister v1, int64_t imm2, int64_t m3);
inline void z_vleih( VectorRegister v1, int64_t imm2, int64_t m3);
inline void z_vleif( VectorRegister v1, int64_t imm2, int64_t m3);
inline void z_vleig( VectorRegister v1, int64_t imm2, int64_t m3);
// Store
inline void z_vstm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vst( VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vsteb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vsteh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vstef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vsteg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vstl( VectorRegister v1, Register r3, int64_t d2, Register b2);
// Misc
inline void z_vgm( VectorRegister v1, int64_t imm2, int64_t imm3, int64_t m4);
inline void z_vgmb( VectorRegister v1, int64_t imm2, int64_t imm3);
inline void z_vgmh( VectorRegister v1, int64_t imm2, int64_t imm3);
inline void z_vgmf( VectorRegister v1, int64_t imm2, int64_t imm3);
inline void z_vgmg( VectorRegister v1, int64_t imm2, int64_t imm3);
inline void z_vgbm( VectorRegister v1, int64_t imm2);
inline void z_vzero( VectorRegister v1); // preferred method to set vreg to all zeroes
inline void z_vone( VectorRegister v1); // preferred method to set vreg to all ones
//---< Vector Arithmetic Instructions >---
// Load
inline void z_vlc( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vlcb( VectorRegister v1, VectorRegister v2);
inline void z_vlch( VectorRegister v1, VectorRegister v2);
inline void z_vlcf( VectorRegister v1, VectorRegister v2);
inline void z_vlcg( VectorRegister v1, VectorRegister v2);
inline void z_vlp( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vlpb( VectorRegister v1, VectorRegister v2);
inline void z_vlph( VectorRegister v1, VectorRegister v2);
inline void z_vlpf( VectorRegister v1, VectorRegister v2);
inline void z_vlpg( VectorRegister v1, VectorRegister v2);
// ADD
inline void z_va( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vab( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vah( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vaf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vag( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vaq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vacc( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vaccb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vacch( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vaccf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vaccg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vaccq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// SUB
inline void z_vs( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vsb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vscbi( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vscbib( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vscbih( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vscbif( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vscbig( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vscbiq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// MULTIPLY
inline void z_vml( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmlh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vme( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmle( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmlo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
// MULTIPLY & ADD
inline void z_vmal( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
inline void z_vmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
inline void z_vmalh( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
inline void z_vmae( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
inline void z_vmale( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
inline void z_vmao( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
inline void z_vmalo( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
// VECTOR SUM
inline void z_vsum( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vsumb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsumh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsumg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vsumgh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsumgf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsumq( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vsumqf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsumqg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// Average
inline void z_vavg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vavgb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vavgh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vavgf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vavgg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vavgl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vavglb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vavglh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vavglf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vavglg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// VECTOR Galois Field Multiply Sum
inline void z_vgfm( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vgfmb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vgfmh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vgfmf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vgfmg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// VECTOR Galois Field Multiply Sum and Accumulate
inline void z_vgfma( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
inline void z_vgfmab( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
inline void z_vgfmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
inline void z_vgfmaf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
inline void z_vgfmag( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
//---< Vector Logical Instructions >---
// AND
inline void z_vn( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vnc( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// XOR
inline void z_vx( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// NOR
inline void z_vno( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// OR
inline void z_vo( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// Comparison (element-wise)
inline void z_vceq( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
inline void z_vceqb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vceqh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vceqf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vceqg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vceqbs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vceqhs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vceqfs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vceqgs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vch( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
inline void z_vchb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchbs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchhs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchfs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchgs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
inline void z_vchlb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchlh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchlf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchlg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchlbs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchlhs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchlfs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchlgs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// Max/Min (element-wise)
inline void z_vmx( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmxb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmxh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmxf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmxg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmxl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmxlb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmxlh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmxlf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmxlg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmn( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmnb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmnh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmnf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmng( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmnl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmnlb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmnlh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmnlf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmnlg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// Leading/Trailing Zeros, population count
inline void z_vclz( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vclzb( VectorRegister v1, VectorRegister v2);
inline void z_vclzh( VectorRegister v1, VectorRegister v2);
inline void z_vclzf( VectorRegister v1, VectorRegister v2);
inline void z_vclzg( VectorRegister v1, VectorRegister v2);
inline void z_vctz( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vctzb( VectorRegister v1, VectorRegister v2);
inline void z_vctzh( VectorRegister v1, VectorRegister v2);
inline void z_vctzf( VectorRegister v1, VectorRegister v2);
inline void z_vctzg( VectorRegister v1, VectorRegister v2);
inline void z_vpopct( VectorRegister v1, VectorRegister v2, int64_t m3);
// Rotate/Shift
inline void z_verllv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_verllvb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_verllvh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_verllvf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_verllvg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_verll( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
inline void z_verllb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_verllh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_verllf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_verllg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_verim( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t m5);
inline void z_verimb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
inline void z_verimh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
inline void z_verimf( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
inline void z_verimg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
inline void z_veslv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_veslvb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_veslvh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_veslvf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_veslvg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesl( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
inline void z_veslb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_veslh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_veslf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_veslg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesrav( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vesravb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesravh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesravf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesravg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesra( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
inline void z_vesrab( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesrah( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesraf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesrag( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesrlv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vesrlvb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesrlvh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesrlvf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesrlvg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesrl( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
inline void z_vesrlb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesrlh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesrlf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesrlg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vsl( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vslb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsldb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
inline void z_vsra( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsrab( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsrl( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsrlb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// Test under Mask
inline void z_vtm( VectorRegister v1, VectorRegister v2);
//---< Vector String Instructions >---
inline void z_vfae( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5); // Find any element
inline void z_vfaeb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfaeh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfaef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfee( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5); // Find element equal
inline void z_vfeeb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfeeh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfeef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfene( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5); // Find element not equal
inline void z_vfeneb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfeneh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfenef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vstrc( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t imm5, int64_t cc6); // String range compare
inline void z_vstrcb( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6);
inline void z_vstrch( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6);
inline void z_vstrcf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6);
inline void z_vistr( VectorRegister v1, VectorRegister v2, int64_t imm3, int64_t cc5); // Isolate String
inline void z_vistrb( VectorRegister v1, VectorRegister v2, int64_t cc5);
inline void z_vistrh( VectorRegister v1, VectorRegister v2, int64_t cc5);
inline void z_vistrf( VectorRegister v1, VectorRegister v2, int64_t cc5);
inline void z_vistrbs(VectorRegister v1, VectorRegister v2);
inline void z_vistrhs(VectorRegister v1, VectorRegister v2);
inline void z_vistrfs(VectorRegister v1, VectorRegister v2);
// Floatingpoint instructions
// ==========================

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -309,6 +309,9 @@ inline void Assembler::z_lcgfr(Register r1, Register r2) { emit_32( LCGFR_ZOPC |
inline void Assembler::z_lnr( Register r1, Register r2) { emit_16( LNR_ZOPC | regt( r1, 8, 16) | reg((r2 == noreg) ? r1:r2, 12, 16)); }
inline void Assembler::z_lngr( Register r1, Register r2) { emit_32( LNGR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
inline void Assembler::z_lngfr(Register r1, Register r2) { emit_32( LNGFR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
inline void Assembler::z_lpr( Register r1, Register r2) { emit_16( LPR_ZOPC | regt( r1, 8, 16) | reg((r2 == noreg) ? r1:r2, 12, 16)); }
inline void Assembler::z_lpgr( Register r1, Register r2) { emit_32( LPGR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
inline void Assembler::z_lpgfr(Register r1, Register r2) { emit_32( LPGFR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
inline void Assembler::z_lrvr( Register r1, Register r2) { emit_32( LRVR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
inline void Assembler::z_lrvgr(Register r1, Register r2) { emit_32( LRVGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
@ -702,6 +705,421 @@ inline void Assembler::z_cvd(Register r1, int64_t d2, Register x2, Register b2)
inline void Assembler::z_cvdg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CVDG_ZOPC | regt(r1, 8, 48) | reg(x2, 12, 48) | reg(b2, 16, 48) | simm20(d2)); }
//---------------------------
//-- Vector Instructions --
//---------------------------
//---< Vector Support Instructions >---
// Load (transfer from memory)
inline void Assembler::z_vlm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {emit_48(VLM_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2)); }
inline void Assembler::z_vl( VectorRegister v1, int64_t d2, Register x2, Register b2) {emit_48(VL_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2)); }
inline void Assembler::z_vleb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEB_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_BYTE, 32)); }
inline void Assembler::z_vleh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEH_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_HW, 32)); }
inline void Assembler::z_vlef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEF_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_FW, 32)); }
inline void Assembler::z_vleg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEG_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_DW, 32)); }
// Gather/Scatter
inline void Assembler::z_vgef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VGEF_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_FW, 32)); }
inline void Assembler::z_vgeg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VGEG_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_DW, 32)); }
inline void Assembler::z_vscef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VSCEF_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_FW, 32)); }
inline void Assembler::z_vsceg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VSCEG_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_DW, 32)); }
// load and replicate
inline void Assembler::z_vlrep( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLREP_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vlrepb( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vlrep(v1, d2, x2, b2, VRET_BYTE); }// load byte and replicate to all vector elements of type 'B'
inline void Assembler::z_vlreph( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vlrep(v1, d2, x2, b2, VRET_HW); } // load HW and replicate to all vector elements of type 'H'
inline void Assembler::z_vlrepf( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vlrep(v1, d2, x2, b2, VRET_FW); } // load FW and replicate to all vector elements of type 'F'
inline void Assembler::z_vlrepg( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vlrep(v1, d2, x2, b2, VRET_DW); } // load DW and replicate to all vector elements of type 'G'
inline void Assembler::z_vllez( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLLEZ_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vllezb( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vllez(v1, d2, x2, b2, VRET_BYTE); }// load logical byte into left DW of VR, zero all other bit positions.
inline void Assembler::z_vllezh( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vllez(v1, d2, x2, b2, VRET_HW); } // load logical HW into left DW of VR, zero all other bit positions.
inline void Assembler::z_vllezf( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vllez(v1, d2, x2, b2, VRET_FW); } // load logical FW into left DW of VR, zero all other bit positions.
inline void Assembler::z_vllezg( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vllez(v1, d2, x2, b2, VRET_DW); } // load logical DW into left DW of VR, zero all other bit positions.
inline void Assembler::z_vlbb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLBB_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | uimm4(m3, 32, 48)); }
inline void Assembler::z_vll( VectorRegister v1, Register r3, int64_t d2, Register b2) {emit_48(VLL_ZOPC | vreg(v1, 8) | reg(r3, 12, 48) | rsmask_48(d2, b2)); }
// Load (register to register)
inline void Assembler::z_vlr ( VectorRegister v1, VectorRegister v2) {emit_48(VLR_ZOPC | vreg(v1, 8) | vreg(v2, 12)); }
inline void Assembler::z_vlgv( Register r1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VLGV_ZOPC | reg(r1, 8, 48) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vlgvb( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_BYTE); } // load byte from VR element (index d2(b2)) into GR (logical)
inline void Assembler::z_vlgvh( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_HW); } // load HW from VR element (index d2(b2)) into GR (logical)
inline void Assembler::z_vlgvf( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_FW); } // load FW from VR element (index d2(b2)) into GR (logical)
inline void Assembler::z_vlgvg( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_DW); } // load DW from VR element (index d2(b2)) into GR.
inline void Assembler::z_vlvg( VectorRegister v1, Register r3, int64_t d2, Register b2, int64_t m4) {emit_48(VLVG_ZOPC | vreg(v1, 8) | reg(r3, 12, 48) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vlvgb( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_BYTE); }
inline void Assembler::z_vlvgh( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_HW); }
inline void Assembler::z_vlvgf( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_FW); }
inline void Assembler::z_vlvgg( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_DW); }
inline void Assembler::z_vlvgp( VectorRegister v1, Register r2, Register r3) {emit_48(VLVGP_ZOPC | vreg(v1, 8) | reg(r2, 12, 48) | reg(r3, 16, 48)); }
// vector register pack
inline void Assembler::z_vpk( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VPK_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_DW, 32)); }
inline void Assembler::z_vpkh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpk(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vpkf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpk(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vpkg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpk(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vpks( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VPKS_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
inline void Assembler::z_vpksh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
inline void Assembler::z_vpksf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
inline void Assembler::z_vpksg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
inline void Assembler::z_vpkshs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', set CC
inline void Assembler::z_vpksfs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', set CC
inline void Assembler::z_vpksgs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', set CC
inline void Assembler::z_vpkls( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VPKLS_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
inline void Assembler::z_vpklsh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
inline void Assembler::z_vpklsf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
inline void Assembler::z_vpklsg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
inline void Assembler::z_vpklshs(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', set CC
inline void Assembler::z_vpklsfs(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', set CC
inline void Assembler::z_vpklsgs(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', set CC
// vector register unpack (sign-extended)
inline void Assembler::z_vuph( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VUPH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vuphb( VectorRegister v1, VectorRegister v2) {z_vuph(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vuphh( VectorRegister v1, VectorRegister v2) {z_vuph(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vuphf( VectorRegister v1, VectorRegister v2) {z_vuph(v1, v2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vupl( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VUPL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vuplb( VectorRegister v1, VectorRegister v2) {z_vupl(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vuplh( VectorRegister v1, VectorRegister v2) {z_vupl(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vuplf( VectorRegister v1, VectorRegister v2) {z_vupl(v1, v2, VRET_FW); } // vector element type 'F'
// vector register unpack (zero-extended)
inline void Assembler::z_vuplh( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VUPLH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vuplhb( VectorRegister v1, VectorRegister v2) {z_vuplh(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vuplhh( VectorRegister v1, VectorRegister v2) {z_vuplh(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vuplhf( VectorRegister v1, VectorRegister v2) {z_vuplh(v1, v2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vupll( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VUPLL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vupllb( VectorRegister v1, VectorRegister v2) {z_vupll(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vupllh( VectorRegister v1, VectorRegister v2) {z_vupll(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vupllf( VectorRegister v1, VectorRegister v2) {z_vupll(v1, v2, VRET_FW); } // vector element type 'F'
// vector register merge high/low
inline void Assembler::z_vmrh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMRH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vmrhb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vmrhh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vmrhf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vmrhg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vmrl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMRL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vmrlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vmrlh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vmrlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vmrlg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_DW); } // vector element type 'G'
// vector register permute
inline void Assembler::z_vperm( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {emit_48(VPERM_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32)); }
inline void Assembler::z_vpdi( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VPDI_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | uimm4(m4, 32, 48)); }
// vector register replicate
inline void Assembler::z_vrep( VectorRegister v1, VectorRegister v3, int64_t imm2, int64_t m4) {emit_48(VREP_ZOPC | vreg(v1, 8) | vreg(v3, 12) | simm16(imm2, 16, 48) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vrepb( VectorRegister v1, VectorRegister v3, int64_t imm2) {z_vrep(v1, v3, imm2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vreph( VectorRegister v1, VectorRegister v3, int64_t imm2) {z_vrep(v1, v3, imm2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vrepf( VectorRegister v1, VectorRegister v3, int64_t imm2) {z_vrep(v1, v3, imm2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vrepg( VectorRegister v1, VectorRegister v3, int64_t imm2) {z_vrep(v1, v3, imm2, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vrepi( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VREPI_ZOPC | vreg(v1, 8) | simm16(imm2, 16, 48) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vrepib( VectorRegister v1, int64_t imm2) {z_vrepi(v1, imm2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vrepih( VectorRegister v1, int64_t imm2) {z_vrepi(v1, imm2, VRET_HW); } // vector element type 'B'
inline void Assembler::z_vrepif( VectorRegister v1, int64_t imm2) {z_vrepi(v1, imm2, VRET_FW); } // vector element type 'B'
inline void Assembler::z_vrepig( VectorRegister v1, int64_t imm2) {z_vrepi(v1, imm2, VRET_DW); } // vector element type 'B'
inline void Assembler::z_vsel( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {emit_48(VSEL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32)); }
inline void Assembler::z_vseg( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VSEG_ZOPC | vreg(v1, 8) | vreg(v2, 12) | uimm4(m3, 32, 48)); }
// Load (immediate)
inline void Assembler::z_vleib( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VLEIB_ZOPC | vreg(v1, 8) | simm16(imm2, 32, 48) | veix_mask(m3, VRET_BYTE, 32)); }
inline void Assembler::z_vleih( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VLEIH_ZOPC | vreg(v1, 8) | simm16(imm2, 32, 48) | veix_mask(m3, VRET_HW, 32)); }
inline void Assembler::z_vleif( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VLEIF_ZOPC | vreg(v1, 8) | simm16(imm2, 32, 48) | veix_mask(m3, VRET_FW, 32)); }
inline void Assembler::z_vleig( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VLEIG_ZOPC | vreg(v1, 8) | simm16(imm2, 32, 48) | veix_mask(m3, VRET_DW, 32)); }
// Store
inline void Assembler::z_vstm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {emit_48(VSTM_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2)); }
inline void Assembler::z_vst( VectorRegister v1, int64_t d2, Register x2, Register b2) {emit_48(VST_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2)); }
inline void Assembler::z_vsteb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEB_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_BYTE, 32)); }
inline void Assembler::z_vsteh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEH_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_HW, 32)); }
inline void Assembler::z_vstef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEF_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_FW, 32)); }
inline void Assembler::z_vsteg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEG_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_DW, 32)); }
inline void Assembler::z_vstl( VectorRegister v1, Register r3, int64_t d2, Register b2) {emit_48(VSTL_ZOPC | vreg(v1, 8) | reg(r3, 12, 48) | rsmask_48(d2, b2)); }
// Misc
inline void Assembler::z_vgm( VectorRegister v1, int64_t imm2, int64_t imm3, int64_t m4) {emit_48(VGM_ZOPC | vreg(v1, 8) | uimm8( imm2, 16, 48) | uimm8(imm3, 24, 48) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vgmb( VectorRegister v1, int64_t imm2, int64_t imm3) {z_vgm(v1, imm2, imm3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vgmh( VectorRegister v1, int64_t imm2, int64_t imm3) {z_vgm(v1, imm2, imm3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vgmf( VectorRegister v1, int64_t imm2, int64_t imm3) {z_vgm(v1, imm2, imm3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vgmg( VectorRegister v1, int64_t imm2, int64_t imm3) {z_vgm(v1, imm2, imm3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vgbm( VectorRegister v1, int64_t imm2) {emit_48(VGBM_ZOPC | vreg(v1, 8) | uimm16(imm2, 16, 48)); }
inline void Assembler::z_vzero( VectorRegister v1) {z_vgbm(v1, 0); } // preferred method to set vreg to all zeroes
inline void Assembler::z_vone( VectorRegister v1) {z_vgbm(v1, 0xffff); } // preferred method to set vreg to all ones
//---< Vector Arithmetic Instructions >---
// Load
inline void Assembler::z_vlc( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VLC_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vlcb( VectorRegister v1, VectorRegister v2) {z_vlc(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vlch( VectorRegister v1, VectorRegister v2) {z_vlc(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vlcf( VectorRegister v1, VectorRegister v2) {z_vlc(v1, v2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vlcg( VectorRegister v1, VectorRegister v2) {z_vlc(v1, v2, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vlp( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VLP_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vlpb( VectorRegister v1, VectorRegister v2) {z_vlp(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vlph( VectorRegister v1, VectorRegister v2) {z_vlp(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vlpf( VectorRegister v1, VectorRegister v2) {z_vlp(v1, v2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vlpg( VectorRegister v1, VectorRegister v2) {z_vlp(v1, v2, VRET_DW); } // vector element type 'G'
// ADD
inline void Assembler::z_va( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VA_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
inline void Assembler::z_vab( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vah( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vaf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vag( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vaq( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_QW); } // vector element type 'Q'
inline void Assembler::z_vacc( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VACC_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
inline void Assembler::z_vaccb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vacch( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vaccf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vaccg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vaccq( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_QW); } // vector element type 'Q'
// SUB
inline void Assembler::z_vs( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VS_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
inline void Assembler::z_vsb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vsh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vsf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vsg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vsq( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_QW); } // vector element type 'Q'
inline void Assembler::z_vscbi( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSCBI_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
inline void Assembler::z_vscbib( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vscbih( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vscbif( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vscbig( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vscbiq( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_QW); } // vector element type 'Q'
// MULTIPLY
inline void Assembler::z_vml( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VML_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vmh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vmlh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vme( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VME_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vmle( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vmo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vmlo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
// MULTIPLY & ADD
inline void Assembler::z_vmal( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
inline void Assembler::z_vmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
inline void Assembler::z_vmalh( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMALH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
inline void Assembler::z_vmae( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
inline void Assembler::z_vmale( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMALE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
inline void Assembler::z_vmao( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
inline void Assembler::z_vmalo( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMALO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
// VECTOR SUM
inline void Assembler::z_vsum( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSUM_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_HW, 32)); }
inline void Assembler::z_vsumb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsum(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vsumh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsum(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vsumg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSUMG_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_FW, 32)); }
inline void Assembler::z_vsumgh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsumg(v1, v2, v3, VRET_HW); } // vector element type 'B'
inline void Assembler::z_vsumgf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsumg(v1, v2, v3, VRET_FW); } // vector element type 'H'
inline void Assembler::z_vsumq( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSUMQ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_FW, VRET_DW, 32)); }
inline void Assembler::z_vsumqf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsumq(v1, v2, v3, VRET_FW); } // vector element type 'B'
inline void Assembler::z_vsumqg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsumq(v1, v2, v3, VRET_DW); } // vector element type 'H'
// Average
inline void Assembler::z_vavg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VAVG_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vavgb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavg(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vavgh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavg(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vavgf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavg(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vavgg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavg(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vavgl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VAVGL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vavglb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavgl(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vavglh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavgl(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vavglf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavgl(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vavglg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavgl(v1, v2, v3, VRET_DW); } // vector element type 'G'
// VECTOR Galois Field Multiply Sum
inline void Assembler::z_vgfm( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VGFM_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vgfmb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vgfm(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vgfmh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vgfm(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vgfmf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vgfm(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vgfmg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vgfm(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vgfma( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VGFMA_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v3, 16) | vesc_mask(m5, VRET_BYTE, VRET_DW, 20)); }
inline void Assembler::z_vgfmab( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vgfmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vgfmaf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vgfmag( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_DW); } // vector element type 'G'
//---< Vector Logical Instructions >---
// AND
inline void Assembler::z_vn( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VN_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
inline void Assembler::z_vnc( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VNC_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
// XOR
inline void Assembler::z_vx( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VX_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
// NOR
inline void Assembler::z_vno( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VNO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
// OR
inline void Assembler::z_vo( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
// Comparison (element-wise)
inline void Assembler::z_vceq( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VCEQ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
inline void Assembler::z_vceqb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_BYTE, VOPRC_CCIGN); } // vector element type 'B', don't set CC
inline void Assembler::z_vceqh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
inline void Assembler::z_vceqf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
inline void Assembler::z_vceqg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
inline void Assembler::z_vceqbs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_BYTE, VOPRC_CCSET); } // vector element type 'B', don't set CC
inline void Assembler::z_vceqhs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', don't set CC
inline void Assembler::z_vceqfs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', don't set CC
inline void Assembler::z_vceqgs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', don't set CC
inline void Assembler::z_vch( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VCH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
inline void Assembler::z_vchb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_BYTE, VOPRC_CCIGN); } // vector element type 'B', don't set CC
inline void Assembler::z_vchh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
inline void Assembler::z_vchf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
inline void Assembler::z_vchg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
inline void Assembler::z_vchbs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_BYTE, VOPRC_CCSET); } // vector element type 'B', don't set CC
inline void Assembler::z_vchhs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', don't set CC
inline void Assembler::z_vchfs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', don't set CC
inline void Assembler::z_vchgs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', don't set CC
inline void Assembler::z_vchl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VCHL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
inline void Assembler::z_vchlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_BYTE, VOPRC_CCIGN); } // vector element type 'B', don't set CC
inline void Assembler::z_vchlh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
inline void Assembler::z_vchlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
inline void Assembler::z_vchlg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
inline void Assembler::z_vchlbs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_BYTE, VOPRC_CCSET); } // vector element type 'B', don't set CC
inline void Assembler::z_vchlhs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', don't set CC
inline void Assembler::z_vchlfs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', don't set CC
inline void Assembler::z_vchlgs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', don't set CC
// Max/Min (element-wise)
inline void Assembler::z_vmx( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMX_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vmxb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmx(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vmxh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmx(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vmxf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmx(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vmxg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmx(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vmxl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMXL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vmxlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmxl(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vmxlh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmxl(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vmxlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmxl(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vmxlg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmxl(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vmn( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMN_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vmnb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmn(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vmnh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmn(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vmnf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmn(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vmng( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmn(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vmnl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMNL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vmnlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmnl(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vmnlh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmnl(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vmnlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmnl(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vmnlg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmnl(v1, v2, v3, VRET_DW); } // vector element type 'G'
// Leading/Trailing Zeros, population count
inline void Assembler::z_vclz( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VCLZ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vclzb( VectorRegister v1, VectorRegister v2) {z_vclz(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vclzh( VectorRegister v1, VectorRegister v2) {z_vclz(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vclzf( VectorRegister v1, VectorRegister v2) {z_vclz(v1, v2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vclzg( VectorRegister v1, VectorRegister v2) {z_vclz(v1, v2, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vctz( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VCTZ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vctzb( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vctzh( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vctzf( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vctzg( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vpopct( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VPOPCT_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
// Rotate/Shift
inline void Assembler::z_verllv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VERLLV_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_verllvb(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_verllvh(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_verllvf(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_verllvg(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_verll( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VERLL_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_verllb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
inline void Assembler::z_verllh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_HW);} // vector element type 'H'
inline void Assembler::z_verllf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_FW);} // vector element type 'F'
inline void Assembler::z_verllg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_DW);} // vector element type 'G'
inline void Assembler::z_verim( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t m5) {emit_48(VERLL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | uimm8(imm4, 24, 48) | vesc_mask(m5, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_verimb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_verimh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_HW); } // vector element type 'H'
inline void Assembler::z_verimf( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_FW); } // vector element type 'F'
inline void Assembler::z_verimg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_DW); } // vector element type 'G'
inline void Assembler::z_veslv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VESLV_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_veslvb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_veslv(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_veslvh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_veslv(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_veslvf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_veslv(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_veslvg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_veslv(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vesl( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VESL_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_veslb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesl(v1, v3, d2, b2, VRET_BYTE);} // vector element type 'B'
inline void Assembler::z_veslh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesl(v1, v3, d2, b2, VRET_HW);} // vector element type 'H'
inline void Assembler::z_veslf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesl(v1, v3, d2, b2, VRET_FW);} // vector element type 'F'
inline void Assembler::z_veslg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesl(v1, v3, d2, b2, VRET_DW);} // vector element type 'G'
inline void Assembler::z_vesrav( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VESRAV_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vesravb(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrav(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vesravh(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrav(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vesravf(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrav(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vesravg(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrav(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vesra( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VESRA_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vesrab( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesra(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
inline void Assembler::z_vesrah( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesra(v1, v3, d2, b2, VRET_HW);} // vector element type 'H'
inline void Assembler::z_vesraf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesra(v1, v3, d2, b2, VRET_FW);} // vector element type 'F'
inline void Assembler::z_vesrag( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesra(v1, v3, d2, b2, VRET_DW);} // vector element type 'G'
inline void Assembler::z_vesrlv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VESRLV_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vesrlvb(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrlv(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vesrlvh(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrlv(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vesrlvf(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrlv(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vesrlvg(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrlv(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vesrl( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VESRL_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vesrlb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesrl(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
inline void Assembler::z_vesrlh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesrl(v1, v3, d2, b2, VRET_HW);} // vector element type 'H'
inline void Assembler::z_vesrlf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesrl(v1, v3, d2, b2, VRET_FW);} // vector element type 'F'
inline void Assembler::z_vesrlg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesrl(v1, v3, d2, b2, VRET_DW);} // vector element type 'G'
inline void Assembler::z_vsl( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
inline void Assembler::z_vslb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSLB_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
inline void Assembler::z_vsldb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {emit_48(VSLDB_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | uimm8(imm4, 24, 48)); }
inline void Assembler::z_vsra( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSRA_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
inline void Assembler::z_vsrab( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSRAB_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
inline void Assembler::z_vsrl( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSRL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
inline void Assembler::z_vsrlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSRLB_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
// Test under Mask
inline void Assembler::z_vtm( VectorRegister v1, VectorRegister v2) {emit_48(VTM_ZOPC | vreg(v1, 8) | vreg(v2, 12)); }
//---< Vector String Instructions >---
inline void Assembler::z_vfae( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5) {emit_48(VFAE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(imm4, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); } // Find any element
inline void Assembler::z_vfaeb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfae(v1, v2, v3, VRET_BYTE, cc5); }
inline void Assembler::z_vfaeh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfae(v1, v2, v3, VRET_HW, cc5); }
inline void Assembler::z_vfaef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfae(v1, v2, v3, VRET_FW, cc5); }
inline void Assembler::z_vfee( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5) {emit_48(VFEE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(imm4, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); } // Find element equal
inline void Assembler::z_vfeeb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfee(v1, v2, v3, VRET_BYTE, cc5); }
inline void Assembler::z_vfeeh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfee(v1, v2, v3, VRET_HW, cc5); }
inline void Assembler::z_vfeef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfee(v1, v2, v3, VRET_FW, cc5); }
inline void Assembler::z_vfene( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5) {emit_48(VFENE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(imm4, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); } // Find element not equal
inline void Assembler::z_vfeneb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfene(v1, v2, v3, VRET_BYTE, cc5); }
inline void Assembler::z_vfeneh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfene(v1, v2, v3, VRET_HW, cc5); }
inline void Assembler::z_vfenef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfene(v1, v2, v3, VRET_FW, cc5); }
inline void Assembler::z_vstrc( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t imm5, int64_t cc6) {emit_48(VSTRC_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(imm5, VRET_BYTE, VRET_FW, 20) | voprc_any(cc6, 24) ); } // String range compare
inline void Assembler::z_vstrcb( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6) {z_vstrc(v1, v2, v3, v4, VRET_BYTE, cc6); }
inline void Assembler::z_vstrch( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6) {z_vstrc(v1, v2, v3, v4, VRET_HW, cc6); }
inline void Assembler::z_vstrcf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6) {z_vstrc(v1, v2, v3, v4, VRET_FW, cc6); }
inline void Assembler::z_vistr( VectorRegister v1, VectorRegister v2, int64_t imm3, int64_t cc5) {emit_48(VISTR_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(imm3, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); } // isolate string
inline void Assembler::z_vistrb( VectorRegister v1, VectorRegister v2, int64_t cc5) {z_vistr(v1, v2, VRET_BYTE, cc5); }
inline void Assembler::z_vistrh( VectorRegister v1, VectorRegister v2, int64_t cc5) {z_vistr(v1, v2, VRET_HW, cc5); }
inline void Assembler::z_vistrf( VectorRegister v1, VectorRegister v2, int64_t cc5) {z_vistr(v1, v2, VRET_FW, cc5); }
inline void Assembler::z_vistrbs(VectorRegister v1, VectorRegister v2) {z_vistr(v1, v2, VRET_BYTE, VOPRC_CCSET); }
inline void Assembler::z_vistrhs(VectorRegister v1, VectorRegister v2) {z_vistr(v1, v2, VRET_HW, VOPRC_CCSET); }
inline void Assembler::z_vistrfs(VectorRegister v1, VectorRegister v2) {z_vistr(v1, v2, VRET_FW, VOPRC_CCSET); }
//-------------------------------
// FLOAT INSTRUCTIONS
//-------------------------------

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* Copyright (c) 2016, 2017 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,7 +34,7 @@
// Sorted according to sparc.
// z/Architecture remembers branch targets, so don't share vtables.
define_pd_global(bool, ShareVtableStubs, false);
define_pd_global(bool, ShareVtableStubs, true);
define_pd_global(bool, NeedsDeoptSuspend, false); // Only register window machines need this.
define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks.

View File

@ -4671,6 +4671,7 @@ void MacroAssembler::load_mirror(Register mirror, Register method) {
mem2reg_opt(mirror, Address(mirror, ConstMethod::constants_offset()));
mem2reg_opt(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes()));
mem2reg_opt(mirror, Address(mirror, Klass::java_mirror_offset()));
resolve_oop_handle(mirror);
}
//---------------------------------------------------------------

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -35,3 +35,5 @@
REGISTER_DEFINITION(Register, noreg);
REGISTER_DEFINITION(FloatRegister, fnoreg);
REGISTER_DEFINITION(VectorRegister, vnoreg);

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -46,3 +46,13 @@ const char* FloatRegisterImpl::name() const {
};
return is_valid() ? names[encoding()] : "fnoreg";
}
const char* VectorRegisterImpl::name() const {
const char* names[number_of_registers] = {
"Z_V0", "Z_V1", "Z_V2", "Z_V3", "Z_V4", "Z_V5", "Z_V6", "Z_V7",
"Z_V8", "Z_V9", "Z_V10", "Z_V11", "Z_V12", "Z_V13", "Z_V14", "Z_V15",
"Z_V16", "Z_V17", "Z_V18", "Z_V19", "Z_V20", "Z_V21", "Z_V22", "Z_V23",
"Z_V24", "Z_V25", "Z_V26", "Z_V27", "Z_V28", "Z_V29", "Z_V30", "Z_V31"
};
return is_valid() ? names[encoding()] : "fnoreg";
}

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,11 +34,6 @@ class VMRegImpl;
typedef VMRegImpl* VMReg;
// Use Register as shortcut.
class RegisterImpl;
typedef RegisterImpl* Register;
// The implementation of integer registers for z/Architecture.
// z/Architecture registers, see "LINUX for zSeries ELF ABI Supplement", IBM March 2001
//
@ -57,6 +52,17 @@ typedef RegisterImpl* Register;
// f1,f3,f5,f7 General purpose (volatile)
// f8-f15 General purpose (nonvolatile)
//===========================
//=== Integer Registers ===
//===========================
// Use Register as shortcut.
class RegisterImpl;
typedef RegisterImpl* Register;
// The implementation of integer registers for z/Architecture.
inline Register as_Register(int encoding) {
return (Register)(long)encoding;
}
@ -110,6 +116,11 @@ CONSTANT_REGISTER_DECLARATION(Register, Z_R13, (13));
CONSTANT_REGISTER_DECLARATION(Register, Z_R14, (14));
CONSTANT_REGISTER_DECLARATION(Register, Z_R15, (15));
//=============================
//=== Condition Registers ===
//=============================
// Use ConditionRegister as shortcut
class ConditionRegisterImpl;
typedef ConditionRegisterImpl* ConditionRegister;
@ -159,7 +170,7 @@ CONSTANT_REGISTER_DECLARATION(ConditionRegister, Z_CR, (0));
// dangers of defines.
// If a particular file has a problem with these defines then it's possible
// to turn them off in that file by defining
// DONT_USE_REGISTER_DEFINES. Register_definition_s390.cpp does that
// DONT_USE_REGISTER_DEFINES. Register_definitions_s390.cpp does that
// so that it's able to provide real definitions of these registers
// for use in debuggers and such.
@ -186,6 +197,11 @@ CONSTANT_REGISTER_DECLARATION(ConditionRegister, Z_CR, (0));
#define Z_CR ((ConditionRegister)(Z_CR_ConditionRegisterEnumValue))
#endif // DONT_USE_REGISTER_DEFINES
//=========================
//=== Float Registers ===
//=========================
// Use FloatRegister as shortcut
class FloatRegisterImpl;
typedef FloatRegisterImpl* FloatRegister;
@ -263,22 +279,6 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F15, (15));
#define Z_F15 ((FloatRegister)( Z_F15_FloatRegisterEnumValue))
#endif // DONT_USE_REGISTER_DEFINES
// Need to know the total number of registers of all sorts for SharedInfo.
// Define a class that exports it.
class ConcreteRegisterImpl : public AbstractRegisterImpl {
public:
enum {
number_of_registers =
(RegisterImpl::number_of_registers +
FloatRegisterImpl::number_of_registers)
* 2 // register halves
+ 1 // condition code register
};
static const int max_gpr;
static const int max_fpr;
};
// Single, Double and Quad fp reg classes. These exist to map the ADLC
// encoding for a floating point register, to the FloatRegister number
// desired by the macroassembler. A FloatRegister is a number between
@ -329,6 +329,161 @@ class QuadFloatRegisterImpl {
};
//==========================
//=== Vector Registers ===
//==========================
// Use VectorRegister as shortcut
class VectorRegisterImpl;
typedef VectorRegisterImpl* VectorRegister;
// The implementation of vector registers for z/Architecture.
inline VectorRegister as_VectorRegister(int encoding) {
return (VectorRegister)(long)encoding;
}
class VectorRegisterImpl: public AbstractRegisterImpl {
public:
enum {
number_of_registers = 32,
number_of_arg_registers = 0
};
// construction
inline friend VectorRegister as_VectorRegister(int encoding);
inline VMReg as_VMReg();
// accessors
int encoding() const {
assert(is_valid(), "invalid register"); return value();
}
bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
bool is_volatile() const { return true; }
bool is_nonvolatile() const { return false; }
// Register fields in z/Architecture instructions are 4 bits wide, restricting the
// addressable register set size to 16.
// The vector register set size is 32, requiring an extension, by one bit, of the
// register encoding. This is accomplished by the introduction of a RXB field in the
// instruction. RXB = Register eXtension Bits.
// The RXB field contains the MSBs (most significant bit) of the vector register numbers
// used for this instruction. Assignment of MSB in RBX is by bit position of the
// register field in the instruction.
// Example:
// The register field starting at bit position 12 in the instruction is assigned RXB bit 0b0100.
int64_t RXB_mask(int pos) {
if (encoding() >= number_of_registers/2) {
switch (pos) {
case 8: return ((int64_t)0b1000) << 8; // actual bit pos: 36
case 12: return ((int64_t)0b0100) << 8; // actual bit pos: 37
case 16: return ((int64_t)0b0010) << 8; // actual bit pos: 38
case 32: return ((int64_t)0b0001) << 8; // actual bit pos: 39
default:
ShouldNotReachHere();
}
}
return 0;
}
const char* name() const;
VectorRegister successor() const { return as_VectorRegister(encoding() + 1); }
};
// The Vector registers of z/Architecture.
CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg, (-1));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V0, (0));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V1, (1));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V2, (2));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V3, (3));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V4, (4));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V5, (5));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V6, (6));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V7, (7));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V8, (8));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V9, (9));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V10, (10));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V11, (11));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V12, (12));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V13, (13));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V14, (14));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V15, (15));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V16, (16));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V17, (17));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V18, (18));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V19, (19));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V20, (20));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V21, (21));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V22, (22));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V23, (23));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V24, (24));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V25, (25));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V26, (26));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V27, (27));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V28, (28));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V29, (29));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V30, (30));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V31, (31));
#ifndef DONT_USE_REGISTER_DEFINES
#define vnoreg ((VectorRegister)(vnoreg_VectorRegisterEnumValue))
#define Z_V0 ((VectorRegister)( Z_V0_VectorRegisterEnumValue))
#define Z_V1 ((VectorRegister)( Z_V1_VectorRegisterEnumValue))
#define Z_V2 ((VectorRegister)( Z_V2_VectorRegisterEnumValue))
#define Z_V3 ((VectorRegister)( Z_V3_VectorRegisterEnumValue))
#define Z_V4 ((VectorRegister)( Z_V4_VectorRegisterEnumValue))
#define Z_V5 ((VectorRegister)( Z_V5_VectorRegisterEnumValue))
#define Z_V6 ((VectorRegister)( Z_V6_VectorRegisterEnumValue))
#define Z_V7 ((VectorRegister)( Z_V7_VectorRegisterEnumValue))
#define Z_V8 ((VectorRegister)( Z_V8_VectorRegisterEnumValue))
#define Z_V9 ((VectorRegister)( Z_V9_VectorRegisterEnumValue))
#define Z_V10 ((VectorRegister)( Z_V10_VectorRegisterEnumValue))
#define Z_V11 ((VectorRegister)( Z_V11_VectorRegisterEnumValue))
#define Z_V12 ((VectorRegister)( Z_V12_VectorRegisterEnumValue))
#define Z_V13 ((VectorRegister)( Z_V13_VectorRegisterEnumValue))
#define Z_V14 ((VectorRegister)( Z_V14_VectorRegisterEnumValue))
#define Z_V15 ((VectorRegister)( Z_V15_VectorRegisterEnumValue))
#define Z_V16 ((VectorRegister)( Z_V16_VectorRegisterEnumValue))
#define Z_V17 ((VectorRegister)( Z_V17_VectorRegisterEnumValue))
#define Z_V18 ((VectorRegister)( Z_V18_VectorRegisterEnumValue))
#define Z_V19 ((VectorRegister)( Z_V19_VectorRegisterEnumValue))
#define Z_V20 ((VectorRegister)( Z_V20_VectorRegisterEnumValue))
#define Z_V21 ((VectorRegister)( Z_V21_VectorRegisterEnumValue))
#define Z_V22 ((VectorRegister)( Z_V22_VectorRegisterEnumValue))
#define Z_V23 ((VectorRegister)( Z_V23_VectorRegisterEnumValue))
#define Z_V24 ((VectorRegister)( Z_V24_VectorRegisterEnumValue))
#define Z_V25 ((VectorRegister)( Z_V25_VectorRegisterEnumValue))
#define Z_V26 ((VectorRegister)( Z_V26_VectorRegisterEnumValue))
#define Z_V27 ((VectorRegister)( Z_V27_VectorRegisterEnumValue))
#define Z_V28 ((VectorRegister)( Z_V28_VectorRegisterEnumValue))
#define Z_V29 ((VectorRegister)( Z_V29_VectorRegisterEnumValue))
#define Z_V30 ((VectorRegister)( Z_V30_VectorRegisterEnumValue))
#define Z_V31 ((VectorRegister)( Z_V31_VectorRegisterEnumValue))
#endif // DONT_USE_REGISTER_DEFINES
// Need to know the total number of registers of all sorts for SharedInfo.
// Define a class that exports it.
class ConcreteRegisterImpl : public AbstractRegisterImpl {
public:
enum {
number_of_registers =
(RegisterImpl::number_of_registers +
FloatRegisterImpl::number_of_registers)
* 2 // register halves
+ 1 // condition code register
};
static const int max_gpr;
static const int max_fpr;
};
// Common register declarations used in assembler code.
REGISTER_DECLARATION(Register, Z_EXC_OOP, Z_R2);
REGISTER_DECLARATION(Register, Z_EXC_PC, Z_R3);

View File

@ -3149,7 +3149,7 @@ operand noArg_iRegI() %{
interface(REG_INTER);
%}
// Revenregi and roddRegI constitute and even-odd-pair.
// revenRegI and roddRegI constitute and even-odd-pair.
operand revenRegI() %{
constraint(ALLOC_IN_RC(z_rarg3_int_reg));
match(iRegI);
@ -3157,7 +3157,7 @@ operand revenRegI() %{
interface(REG_INTER);
%}
// Revenregi and roddRegI constitute and even-odd-pair.
// revenRegI and roddRegI constitute and even-odd-pair.
operand roddRegI() %{
constraint(ALLOC_IN_RC(z_rarg4_int_reg));
match(iRegI);
@ -3283,7 +3283,7 @@ operand memoryRegP() %{
interface(REG_INTER);
%}
// Revenregp and roddRegP constitute and even-odd-pair.
// revenRegP and roddRegP constitute and even-odd-pair.
operand revenRegP() %{
constraint(ALLOC_IN_RC(z_rarg3_ptr_reg));
match(iRegP);
@ -3291,7 +3291,7 @@ operand revenRegP() %{
interface(REG_INTER);
%}
// Revenregl and roddRegL constitute and even-odd-pair.
// revenRegP and roddRegP constitute and even-odd-pair.
operand roddRegP() %{
constraint(ALLOC_IN_RC(z_rarg4_ptr_reg));
match(iRegP);
@ -3380,7 +3380,7 @@ operand iRegL() %{
interface(REG_INTER);
%}
// Revenregl and roddRegL constitute and even-odd-pair.
// revenRegL and roddRegL constitute and even-odd-pair.
operand revenRegL() %{
constraint(ALLOC_IN_RC(z_rarg3_long_reg));
match(iRegL);
@ -3388,7 +3388,7 @@ operand revenRegL() %{
interface(REG_INTER);
%}
// Revenregl and roddRegL constitute and even-odd-pair.
// revenRegL and roddRegL constitute and even-odd-pair.
operand roddRegL() %{
constraint(ALLOC_IN_RC(z_rarg4_long_reg));
match(iRegL);
@ -6443,6 +6443,32 @@ instruct mulL_Reg_mem(iRegL dst, memory src)%{
ins_pipe(pipe_class_dummy);
%}
instruct mulHiL_reg_reg(revenRegL Rdst, roddRegL Rsrc1, iRegL Rsrc2, iRegL Rtmp1, flagsReg cr)%{
match(Set Rdst (MulHiL Rsrc1 Rsrc2));
effect(TEMP_DEF Rdst, USE_KILL Rsrc1, TEMP Rtmp1, KILL cr);
ins_cost(7*DEFAULT_COST);
// TODO: s390 port size(VARIABLE_SIZE);
format %{ "MulHiL $Rdst, $Rsrc1, $Rsrc2\t # Multiply High Long" %}
ins_encode%{
Register dst = $Rdst$$Register;
Register src1 = $Rsrc1$$Register;
Register src2 = $Rsrc2$$Register;
Register tmp1 = $Rtmp1$$Register;
Register tmp2 = $Rdst$$Register;
// z/Architecture has only unsigned multiply (64 * 64 -> 128).
// implementing mulhs(a,b) = mulhu(a,b) (a & (b>>63)) (b & (a>>63))
__ z_srag(tmp2, src1, 63); // a>>63
__ z_srag(tmp1, src2, 63); // b>>63
__ z_ngr(tmp2, src2); // b & (a>>63)
__ z_ngr(tmp1, src1); // a & (b>>63)
__ z_agr(tmp1, tmp2); // ((a & (b>>63)) + (b & (a>>63)))
__ z_mlgr(dst, src2); // tricky: 128-bit product is written to even/odd pair (dst,src1),
// multiplicand is taken from oddReg (src1), multiplier in src2.
__ z_sgr(dst, tmp1);
%}
ins_pipe(pipe_class_dummy);
%}
// DIV
// Integer DIVMOD with Register, both quotient and mod results

View File

@ -2382,6 +2382,7 @@ void TemplateTable::load_field_cp_cache_entry(Register obj,
if (is_static) {
__ mem2reg_opt(obj, Address(cache, index, cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
__ mem2reg_opt(obj, Address(obj, Klass::java_mirror_offset()));
__ resolve_oop_handle(obj);
}
}

View File

@ -706,12 +706,13 @@ void VM_Version::determine_features() {
Label getCPUFEATURES; // fcode = -1 (cache)
Label getCIPHERFEATURES; // fcode = -2 (cipher)
Label getMSGDIGESTFEATURES; // fcode = -3 (SHA)
Label getVECTORFEATURES; // fcode = -4 (OS support for vector instructions)
Label checkLongDispFast;
Label noLongDisp;
Label posDisp, negDisp;
Label errRTN;
a->z_ltgfr(Z_R0, Z_ARG2); // Buf len to r0 and test.
a->z_brl(getFEATURES); // negative -> Get machine features.
a->z_brl(getFEATURES); // negative -> Get machine features not covered by facility list.
a->z_brz(checkLongDispFast); // zero -> Check for high-speed Long Displacement Facility.
a->z_aghi(Z_R0, -1);
a->z_stfle(0, Z_ARG1);
@ -736,6 +737,8 @@ void VM_Version::determine_features() {
a->z_bre(getCIPHERFEATURES);
a->z_cghi(Z_R0, -3); // -3: Extract detailed crypto capabilities (msg digest instructions).
a->z_bre(getMSGDIGESTFEATURES);
a->z_cghi(Z_R0, -4); // -4: Verify vector instruction availability (OS support).
a->z_bre(getVECTORFEATURES);
a->z_xgr(Z_RET, Z_RET); // Not a valid function code.
a->z_br(Z_R14); // Return "operation aborted".
@ -766,6 +769,11 @@ void VM_Version::determine_features() {
a->z_ecag(Z_RET,Z_R0,0,Z_ARG3); // Extract information as requested by Z_ARG1 contents.
a->z_br(Z_R14);
// Use a vector instruction to verify OS support. Will fail with SIGFPE if OS support is missing.
a->bind(getVECTORFEATURES);
a->z_vtm(Z_V0,Z_V0); // non-destructive vector instruction. Will cause SIGFPE if not supported.
a->z_br(Z_R14);
// Check the performance of the Long Displacement Facility, i.e. find out if we are running on z900 or newer.
a->bind(checkLongDispFast);
a->z_llill(Z_R0, 0xffff); // preset #iterations
@ -962,6 +970,19 @@ void VM_Version::determine_features() {
_nfeatures = 0;
}
if (has_VectorFacility()) {
// Verify that feature can actually be used. OS support required.
call_getFeatures(buffer, -4, 0);
if (printVerbose) {
ttyLocker ttyl;
if (has_VectorFacility()) {
tty->print_cr(" Vector Facility has been verified to be supported by OS");
} else {
tty->print_cr(" Vector Facility has been disabled - not supported by OS");
}
}
}
// Extract Crypto Facility details.
if (has_Crypto()) {
// Get cipher features.

View File

@ -473,6 +473,8 @@ class VM_Version: public Abstract_VM_Version {
static void set_has_CryptoExt5() { _features[0] |= CryptoExtension5Mask; }
static void set_has_VectorFacility() { _features[2] |= VectorFacilityMask; }
static void reset_has_VectorFacility() { _features[2] &= ~VectorFacilityMask; }
// Assembler testing.
static void allow_all();
static void revert();

View File

@ -122,6 +122,7 @@ class Assembler : public AbstractAssembler {
fpop1_op3 = 0x34,
fpop2_op3 = 0x35,
impdep1_op3 = 0x36,
addx_op3 = 0x36,
aes3_op3 = 0x36,
sha_op3 = 0x36,
bmask_op3 = 0x36,
@ -133,6 +134,8 @@ class Assembler : public AbstractAssembler {
fzero_op3 = 0x36,
fsrc_op3 = 0x36,
fnot_op3 = 0x36,
mpmul_op3 = 0x36,
umulx_op3 = 0x36,
xmulx_op3 = 0x36,
crc32c_op3 = 0x36,
impdep2_op3 = 0x37,
@ -195,6 +198,9 @@ class Assembler : public AbstractAssembler {
fnegs_opf = 0x05,
fnegd_opf = 0x06,
addxc_opf = 0x11,
addxccc_opf = 0x13,
umulxhi_opf = 0x16,
alignaddr_opf = 0x18,
bmask_opf = 0x19,
@ -240,7 +246,8 @@ class Assembler : public AbstractAssembler {
sha256_opf = 0x142,
sha512_opf = 0x143,
crc32c_opf = 0x147
crc32c_opf = 0x147,
mpmul_opf = 0x148
};
enum op5s {
@ -380,7 +387,7 @@ class Assembler : public AbstractAssembler {
assert_signed_range(x, nbits + 2);
}
static void assert_unsigned_const(int x, int nbits) {
static void assert_unsigned_range(int x, int nbits) {
assert(juint(x) < juint(1 << nbits), "unsigned constant out of range");
}
@ -534,6 +541,12 @@ class Assembler : public AbstractAssembler {
return x & ((1 << nbits) - 1);
}
// unsigned immediate, in low bits, at most nbits long.
static int uimm(int x, int nbits) {
assert_unsigned_range(x, nbits);
return x & ((1 << nbits) - 1);
}
// compute inverse of wdisp16
static intptr_t inv_wdisp16(int x, intptr_t pos) {
int lo = x & ((1 << 14) - 1);
@ -631,6 +644,9 @@ class Assembler : public AbstractAssembler {
// FMAf instructions supported only on certain processors
static void fmaf_only() { assert(VM_Version::has_fmaf(), "This instruction only works on SPARC with FMAf"); }
// MPMUL instruction supported only on certain processors
static void mpmul_only() { assert(VM_Version::has_mpmul(), "This instruction only works on SPARC with MPMUL"); }
// instruction only in VIS1
static void vis1_only() { assert(VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }
@ -772,11 +788,12 @@ class Assembler : public AbstractAssembler {
AbstractAssembler::flush();
}
inline void emit_int32(int); // shadows AbstractAssembler::emit_int32
inline void emit_data(int);
inline void emit_data(int, RelocationHolder const &rspec);
inline void emit_data(int, relocInfo::relocType rtype);
// helper for above functions
inline void emit_int32(int32_t); // shadows AbstractAssembler::emit_int32
inline void emit_data(int32_t);
inline void emit_data(int32_t, RelocationHolder const&);
inline void emit_data(int32_t, relocInfo::relocType rtype);
// Helper for the above functions.
inline void check_delay();
@ -929,6 +946,10 @@ class Assembler : public AbstractAssembler {
// fmaf instructions.
inline void fmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
inline void fmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
inline void fnmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
inline void fnmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
// pp 165
@ -960,6 +981,8 @@ class Assembler : public AbstractAssembler {
inline void ldf(FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d,
RelocationHolder const &rspec = RelocationHolder());
inline void ldd(Register s1, Register s2, FloatRegister d);
inline void ldd(Register s1, int simm13a, FloatRegister d);
inline void ldfsr(Register s1, Register s2);
inline void ldfsr(Register s1, int simm13a);
@ -987,8 +1010,6 @@ class Assembler : public AbstractAssembler {
inline void lduw(Register s1, int simm13a, Register d);
inline void ldx(Register s1, Register s2, Register d);
inline void ldx(Register s1, int simm13a, Register d);
inline void ldd(Register s1, Register s2, Register d);
inline void ldd(Register s1, int simm13a, Register d);
// pp 177
@ -1157,6 +1178,9 @@ class Assembler : public AbstractAssembler {
inline void stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2);
inline void stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, int simm13a);
inline void std(FloatRegister d, Register s1, Register s2);
inline void std(FloatRegister d, Register s1, int simm13a);
inline void stfsr(Register s1, Register s2);
inline void stfsr(Register s1, int simm13a);
inline void stxfsr(Register s1, Register s2);
@ -1177,8 +1201,6 @@ class Assembler : public AbstractAssembler {
inline void stw(Register d, Register s1, int simm13a);
inline void stx(Register d, Register s1, Register s2);
inline void stx(Register d, Register s1, int simm13a);
inline void std(Register d, Register s1, Register s2);
inline void std(Register d, Register s1, int simm13a);
// pp 177
@ -1267,6 +1289,9 @@ class Assembler : public AbstractAssembler {
// VIS3 instructions
inline void addxc(Register s1, Register s2, Register d);
inline void addxccc(Register s1, Register s2, Register d);
inline void movstosw(FloatRegister s, Register d);
inline void movstouw(FloatRegister s, Register d);
inline void movdtox(FloatRegister s, Register d);
@ -1276,6 +1301,7 @@ class Assembler : public AbstractAssembler {
inline void xmulx(Register s1, Register s2, Register d);
inline void xmulxhi(Register s1, Register s2, Register d);
inline void umulxhi(Register s1, Register s2, Register d);
// Crypto SHA instructions
@ -1287,6 +1313,10 @@ class Assembler : public AbstractAssembler {
inline void crc32c(FloatRegister s1, FloatRegister s2, FloatRegister d);
// MPMUL instruction
inline void mpmul(int uimm5);
// Creation
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
#ifdef VALIDATE_PIPELINE

View File

@ -59,7 +59,7 @@ inline void Assembler::check_delay() {
#endif
}
inline void Assembler::emit_int32(int x) {
inline void Assembler::emit_int32(int32_t x) {
check_delay();
#ifdef VALIDATE_PIPELINE
_hazard_state = NoHazard;
@ -67,16 +67,16 @@ inline void Assembler::emit_int32(int x) {
AbstractAssembler::emit_int32(x);
}
inline void Assembler::emit_data(int x) {
inline void Assembler::emit_data(int32_t x) {
emit_int32(x);
}
inline void Assembler::emit_data(int x, relocInfo::relocType rtype) {
inline void Assembler::emit_data(int32_t x, relocInfo::relocType rtype) {
relocate(rtype);
emit_int32(x);
}
inline void Assembler::emit_data(int x, RelocationHolder const &rspec) {
inline void Assembler::emit_data(int32_t x, RelocationHolder const &rspec) {
relocate(rspec);
emit_int32(x);
}
@ -359,6 +359,19 @@ inline void Assembler::fmadd(FloatRegisterImpl::Width w, FloatRegister s1, Float
fmaf_only();
emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(w) | fs2(s2, w));
}
inline void Assembler::fmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
fmaf_only();
emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0x4 + w) | fs2(s2, w));
}
inline void Assembler::fnmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
fmaf_only();
emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0xc + w) | fs2(s2, w));
}
inline void Assembler::fnmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
fmaf_only();
emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0x8 + w) | fs2(s2, w));
}
inline void Assembler::flush(Register s1, Register s2) {
emit_int32(op(arith_op) | op3(flush_op3) | rs1(s1) | rs2(s2));
@ -402,6 +415,15 @@ inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, int simm13a,
emit_data(op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec);
}
inline void Assembler::ldd(Register s1, Register s2, FloatRegister d) {
assert(d->is_even(), "not even");
ldf(FloatRegisterImpl::D, s1, s2, d);
}
inline void Assembler::ldd(Register s1, int simm13a, FloatRegister d) {
assert(d->is_even(), "not even");
ldf(FloatRegisterImpl::D, s1, simm13a, d);
}
inline void Assembler::ldxfsr(Register s1, Register s2) {
emit_int32(op(ldst_op) | rd(G1) | op3(ldfsr_op3) | rs1(s1) | rs2(s2));
}
@ -460,16 +482,6 @@ inline void Assembler::ldx(Register s1, Register s2, Register d) {
inline void Assembler::ldx(Register s1, int simm13a, Register d) {
emit_data(op(ldst_op) | rd(d) | op3(ldx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
}
inline void Assembler::ldd(Register s1, Register s2, Register d) {
v9_dep();
assert(d->is_even(), "not even");
emit_int32(op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | rs2(s2));
}
inline void Assembler::ldd(Register s1, int simm13a, Register d) {
v9_dep();
assert(d->is_even(), "not even");
emit_data(op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
}
inline void Assembler::ldsba(Register s1, Register s2, int ia, Register d) {
emit_int32(op(ldst_op) | rd(d) | op3(ldsb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2));
@ -806,6 +818,15 @@ inline void Assembler::stf(FloatRegisterImpl::Width w, FloatRegister d, Register
emit_data(op(ldst_op) | fd(d, w) | alt_op3(stf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13));
}
inline void Assembler::std(FloatRegister d, Register s1, Register s2) {
assert(d->is_even(), "not even");
stf(FloatRegisterImpl::D, d, s1, s2);
}
inline void Assembler::std(FloatRegister d, Register s1, int simm13a) {
assert(d->is_even(), "not even");
stf(FloatRegisterImpl::D, d, s1, simm13a);
}
inline void Assembler::stxfsr(Register s1, Register s2) {
emit_int32(op(ldst_op) | rd(G1) | op3(stfsr_op3) | rs1(s1) | rs2(s2));
}
@ -848,16 +869,6 @@ inline void Assembler::stx(Register d, Register s1, Register s2) {
inline void Assembler::stx(Register d, Register s1, int simm13a) {
emit_data(op(ldst_op) | rd(d) | op3(stx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
}
inline void Assembler::std(Register d, Register s1, Register s2) {
v9_dep();
assert(d->is_even(), "not even");
emit_int32(op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | rs2(s2));
}
inline void Assembler::std(Register d, Register s1, int simm13a) {
v9_dep();
assert(d->is_even(), "not even");
emit_data(op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
}
inline void Assembler::stba(Register d, Register s1, Register s2, int ia) {
emit_int32(op(ldst_op) | rd(d) | op3(stb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2));
@ -1043,6 +1054,15 @@ inline void Assembler::bshuffle(FloatRegister s1, FloatRegister s2, FloatRegiste
// VIS3 instructions
inline void Assembler::addxc(Register s1, Register s2, Register d) {
vis3_only();
emit_int32(op(arith_op) | rd(d) | op3(addx_op3) | rs1(s1) | opf(addxc_opf) | rs2(s2));
}
inline void Assembler::addxccc(Register s1, Register s2, Register d) {
vis3_only();
emit_int32(op(arith_op) | rd(d) | op3(addx_op3) | rs1(s1) | opf(addxccc_opf) | rs2(s2));
}
inline void Assembler::movstosw(FloatRegister s, Register d) {
vis3_only();
emit_int32(op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mstosw_opf) | fs2(s, FloatRegisterImpl::S));
@ -1073,6 +1093,10 @@ inline void Assembler::xmulxhi(Register s1, Register s2, Register d) {
vis3_only();
emit_int32(op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulxhi_opf) | rs2(s2));
}
inline void Assembler::umulxhi(Register s1, Register s2, Register d) {
vis3_only();
emit_int32(op(arith_op) | rd(d) | op3(umulx_op3) | rs1(s1) | opf(umulxhi_opf) | rs2(s2));
}
// Crypto SHA instructions
@ -1096,4 +1120,11 @@ inline void Assembler::crc32c(FloatRegister s1, FloatRegister s2, FloatRegister
emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(crc32c_op3) | fs1(s1, FloatRegisterImpl::D) | opf(crc32c_opf) | fs2(s2, FloatRegisterImpl::D));
}
// MPMUL instruction
inline void Assembler::mpmul(int uimm5) {
mpmul_only();
emit_int32(op(arith_op) | rd(0) | op3(mpmul_op3) | rs1(0) | opf(mpmul_opf) | uimm(uimm5, 5));
}
#endif // CPU_SPARC_VM_ASSEMBLER_SPARC_INLINE_HPP

View File

@ -119,8 +119,8 @@ address RegisterMap::pd_location(VMReg regname) const {
reg = regname->as_Register();
}
if (reg->is_out()) {
assert(_younger_window != NULL, "Younger window should be available");
return second_word + (address)&_younger_window[reg->after_save()->sp_offset_in_saved_window()];
return _younger_window == NULL ? NULL :
second_word + (address)&_younger_window[reg->after_save()->sp_offset_in_saved_window()];
}
if (reg->is_local() || reg->is_in()) {
assert(_window != NULL, "Window should be available");

View File

@ -97,12 +97,15 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
writeable) \
\
product(intx, UseVIS, 99, \
"Highest supported VIS instructions set on Sparc") \
"Highest supported VIS instructions set on SPARC") \
range(0, 99) \
\
product(bool, UseCBCond, false, \
"Use compare and branch instruction on SPARC") \
\
product(bool, UseMPMUL, false, \
"Use multi-precision multiply instruction (mpmul) on SPARC") \
\
product(bool, UseBlockZeroing, false, \
"Use special cpu instructions for block zeroing") \
\

View File

@ -1574,29 +1574,39 @@ void MacroAssembler::br_null_short(Register s1, Predict p, Label& L) {
assert_not_delayed();
if (use_cbcond(L)) {
Assembler::cbcond(zero, ptr_cc, s1, 0, L);
return;
} else {
br_null(s1, false, p, L);
delayed()->nop();
}
br_null(s1, false, p, L);
delayed()->nop();
}
void MacroAssembler::br_notnull_short(Register s1, Predict p, Label& L) {
assert_not_delayed();
if (use_cbcond(L)) {
Assembler::cbcond(notZero, ptr_cc, s1, 0, L);
return;
} else {
br_notnull(s1, false, p, L);
delayed()->nop();
}
br_notnull(s1, false, p, L);
delayed()->nop();
}
// Unconditional short branch
void MacroAssembler::ba_short(Label& L) {
assert_not_delayed();
if (use_cbcond(L)) {
Assembler::cbcond(equal, icc, G0, G0, L);
return;
} else {
br(always, false, pt, L);
delayed()->nop();
}
br(always, false, pt, L);
}
// Branch if 'icc' says zero or not (i.e. icc.z == 1|0).
void MacroAssembler::br_icc_zero(bool iszero, Predict p, Label &L) {
assert_not_delayed();
Condition cf = (iszero ? Assembler::zero : Assembler::notZero);
br(cf, false, p, L);
delayed()->nop();
}
@ -3834,6 +3844,7 @@ void MacroAssembler::load_mirror(Register mirror, Register method) {
ld_ptr(mirror, in_bytes(ConstMethod::constants_offset()), mirror);
ld_ptr(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
ld_ptr(mirror, mirror_offset, mirror);
resolve_oop_handle(mirror);
}
void MacroAssembler::load_klass(Register src_oop, Register klass) {

View File

@ -606,7 +606,7 @@ class MacroAssembler : public Assembler {
// offset. No explicit code generation is needed if the offset is within a certain
// range (0 <= offset <= page_size).
//
// %%%%%% Currently not done for SPARC
// FIXME: Currently not done for SPARC
void null_check(Register reg, int offset = -1);
static bool needs_explicit_null_check(intptr_t offset);
@ -648,6 +648,9 @@ class MacroAssembler : public Assembler {
// unconditional short branch
void ba_short(Label& L);
// Branch on icc.z (true or not).
void br_icc_zero(bool iszero, Predict p, Label &L);
inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
inline void bp( Condition c, bool a, CC cc, Predict p, Label& L );
@ -663,19 +666,19 @@ class MacroAssembler : public Assembler {
inline void fbp( Condition c, bool a, CC cc, Predict p, Label& L );
// Sparc shorthands(pp 85, V8 manual, pp 289 V9 manual)
inline void cmp( Register s1, Register s2 );
inline void cmp( Register s1, int simm13a );
inline void cmp( Register s1, Register s2 );
inline void cmp( Register s1, int simm13a );
inline void jmp( Register s1, Register s2 );
inline void jmp( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() );
// Check if the call target is out of wdisp30 range (relative to the code cache)
static inline bool is_far_target(address d);
inline void call( address d, relocInfo::relocType rt = relocInfo::runtime_call_type );
inline void call( address d, RelocationHolder const& rspec);
inline void call( address d, relocInfo::relocType rt = relocInfo::runtime_call_type );
inline void call( address d, RelocationHolder const& rspec);
inline void call( Label& L, relocInfo::relocType rt = relocInfo::runtime_call_type );
inline void call( Label& L, RelocationHolder const& rspec);
inline void call( Label& L, relocInfo::relocType rt = relocInfo::runtime_call_type );
inline void call( Label& L, RelocationHolder const& rspec);
inline void callr( Register s1, Register s2 );
inline void callr( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() );

View File

@ -185,7 +185,7 @@ inline void MacroAssembler::br( Condition c, bool a, Predict p, address d, reloc
}
inline void MacroAssembler::br( Condition c, bool a, Predict p, Label& L ) {
// See note[+] on 'avoid_pipeline_stalls()', in "assembler_sparc.inline.hpp".
// See note[+] on 'avoid_pipeline_stall()', in "assembler_sparc.inline.hpp".
avoid_pipeline_stall();
br(c, a, p, target(L));
}

View File

@ -236,7 +236,7 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
inline VMReg as_VMReg( );
// accessors
int encoding() const { assert(is_valid(), "invalid register"); return value(); }
int encoding() const { assert(is_valid(), "invalid register"); return value(); }
public:
int encoding(Width w) const {
@ -258,10 +258,12 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
return -1;
}
bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
bool is_even() const { return (encoding() & 1) == 0; }
const char* name() const;
FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
};

View File

@ -2628,7 +2628,6 @@ enc_class fsqrtd (dflt_reg dst, dflt_reg src) %{
%}
enc_class fmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
MacroAssembler _masm(&cbuf);
@ -2651,7 +2650,71 @@ enc_class fmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
__ fmadd(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
%}
enc_class fmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
__ fmsub(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
%}
enc_class fmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
__ fmsub(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
%}
enc_class fnmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
__ fnmadd(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
%}
enc_class fnmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
__ fnmadd(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
%}
enc_class fnmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
__ fnmsub(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
%}
enc_class fnmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
__ fnmsub(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
%}
enc_class fmovs (dflt_reg dst, dflt_reg src) %{
@ -7597,7 +7660,7 @@ instruct sqrtD_reg_reg(regD dst, regD src) %{
ins_pipe(fdivD_reg_reg);
%}
// Single precision fused floating-point multiply-add (d = a * b + c).
// Single/Double precision fused floating-point multiply-add (d = a * b + c).
instruct fmaF_regx4(regF dst, regF a, regF b, regF c) %{
predicate(UseFMA);
match(Set dst (FmaF c (Binary a b)));
@ -7606,7 +7669,6 @@ instruct fmaF_regx4(regF dst, regF a, regF b, regF c) %{
ins_pipe(fmaF_regx4);
%}
// Double precision fused floating-point multiply-add (d = a * b + c).
instruct fmaD_regx4(regD dst, regD a, regD b, regD c) %{
predicate(UseFMA);
match(Set dst (FmaD c (Binary a b)));
@ -7615,6 +7677,66 @@ instruct fmaD_regx4(regD dst, regD a, regD b, regD c) %{
ins_pipe(fmaD_regx4);
%}
// Additional patterns matching complement versions that we can map directly to
// variants of the fused multiply-add instructions.
// Single/Double precision fused floating-point multiply-sub (d = a * b - c)
instruct fmsubF_regx4(regF dst, regF a, regF b, regF c) %{
predicate(UseFMA);
match(Set dst (FmaF (NegF c) (Binary a b)));
format %{ "fmsubs $a,$b,$c,$dst\t# $dst = $a * $b - $c" %}
ins_encode(fmsubs(dst, a, b, c));
ins_pipe(fmaF_regx4);
%}
instruct fmsubD_regx4(regD dst, regD a, regD b, regD c) %{
predicate(UseFMA);
match(Set dst (FmaD (NegD c) (Binary a b)));
format %{ "fmsubd $a,$b,$c,$dst\t# $dst = $a * $b - $c" %}
ins_encode(fmsubd(dst, a, b, c));
ins_pipe(fmaD_regx4);
%}
// Single/Double precision fused floating-point neg. multiply-add,
// d = -1 * a * b - c = -(a * b + c)
instruct fnmaddF_regx4(regF dst, regF a, regF b, regF c) %{
predicate(UseFMA);
match(Set dst (FmaF (NegF c) (Binary (NegF a) b)));
match(Set dst (FmaF (NegF c) (Binary a (NegF b))));
format %{ "fnmadds $a,$b,$c,$dst\t# $dst = -($a * $b + $c)" %}
ins_encode(fnmadds(dst, a, b, c));
ins_pipe(fmaF_regx4);
%}
instruct fnmaddD_regx4(regD dst, regD a, regD b, regD c) %{
predicate(UseFMA);
match(Set dst (FmaD (NegD c) (Binary (NegD a) b)));
match(Set dst (FmaD (NegD c) (Binary a (NegD b))));
format %{ "fnmaddd $a,$b,$c,$dst\t# $dst = -($a * $b + $c)" %}
ins_encode(fnmaddd(dst, a, b, c));
ins_pipe(fmaD_regx4);
%}
// Single/Double precision fused floating-point neg. multiply-sub,
// d = -1 * a * b + c = -(a * b - c)
instruct fnmsubF_regx4(regF dst, regF a, regF b, regF c) %{
predicate(UseFMA);
match(Set dst (FmaF c (Binary (NegF a) b)));
match(Set dst (FmaF c (Binary a (NegF b))));
format %{ "fnmsubs $a,$b,$c,$dst\t# $dst = -($a * $b - $c)" %}
ins_encode(fnmsubs(dst, a, b, c));
ins_pipe(fmaF_regx4);
%}
instruct fnmsubD_regx4(regD dst, regD a, regD b, regD c) %{
predicate(UseFMA);
match(Set dst (FmaD c (Binary (NegD a) b)));
match(Set dst (FmaD c (Binary a (NegD b))));
format %{ "fnmsubd $a,$b,$c,$dst\t# $dst = -($a * $b - $c)" %}
ins_encode(fnmsubd(dst, a, b, c));
ins_pipe(fmaD_regx4);
%}
//----------Logical Instructions-----------------------------------------------
// And Instructions
// Register And

View File

@ -58,7 +58,6 @@
// Note: The register L7 is used as L7_thread_cache, and may not be used
// any other way within this module.
static const Register& Lstub_temp = L2;
// -------------------------------------------------------------------------------------------------------------------------
@ -4943,7 +4942,7 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
/**
/**
* Arguments:
*
* Inputs:
@ -4975,6 +4974,773 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
/**
* Arguments:
*
* Inputs:
* I0 - int* x-addr
* I1 - int x-len
* I2 - int* y-addr
* I3 - int y-len
* I4 - int* z-addr (output vector)
* I5 - int z-len
*/
address generate_multiplyToLen() {
assert(UseMultiplyToLenIntrinsic, "need VIS3 instructions");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
address start = __ pc();
__ save_frame(0);
const Register xptr = I0; // input address
const Register xlen = I1; // ...and length in 32b-words
const Register yptr = I2; //
const Register ylen = I3; //
const Register zptr = I4; // output address
const Register zlen = I5; // ...and length in 32b-words
/* The minimal "limb" representation suggest that odd length vectors are as
* likely as even length dittos. This in turn suggests that we need to cope
* with odd/even length arrays and data not aligned properly for 64-bit read
* and write operations. We thus use a number of different kernels:
*
* if (is_even(x.len) && is_even(y.len))
* if (is_align64(x) && is_align64(y) && is_align64(z))
* if (x.len == y.len && 16 <= x.len && x.len <= 64)
* memv_mult_mpmul(...)
* else
* memv_mult_64x64(...)
* else
* memv_mult_64x64u(...)
* else
* memv_mult_32x32(...)
*
* Here we assume VIS3 support (for 'umulxhi', 'addxc' and 'addxccc').
* In case CBCOND instructions are supported, we will use 'cxbX'. If the
* MPMUL instruction is supported, we will generate a kernel using 'mpmul'
* (for vectors with proper characteristics).
*/
const Register tmp0 = L0;
const Register tmp1 = L1;
Label L_mult_32x32;
Label L_mult_64x64u;
Label L_mult_64x64;
Label L_exit;
if_both_even(xlen, ylen, tmp0, false, L_mult_32x32);
if_all3_aligned(xptr, yptr, zptr, tmp1, 64, false, L_mult_64x64u);
if (UseMPMUL) {
if_eq(xlen, ylen, false, L_mult_64x64);
if_in_rng(xlen, 16, 64, tmp0, tmp1, false, L_mult_64x64);
// 1. Multiply naturally aligned 64b-datums using a generic 'mpmul' kernel,
// operating on equal length vectors of size [16..64].
gen_mult_mpmul(xlen, xptr, yptr, zptr, L_exit);
}
// 2. Multiply naturally aligned 64-bit datums (64x64).
__ bind(L_mult_64x64);
gen_mult_64x64(xptr, xlen, yptr, ylen, zptr, zlen, L_exit);
// 3. Multiply unaligned 64-bit datums (64x64).
__ bind(L_mult_64x64u);
gen_mult_64x64_unaligned(xptr, xlen, yptr, ylen, zptr, zlen, L_exit);
// 4. Multiply naturally aligned 32-bit datums (32x32).
__ bind(L_mult_32x32);
gen_mult_32x32(xptr, xlen, yptr, ylen, zptr, zlen, L_exit);
__ bind(L_exit);
__ ret();
__ delayed()->restore();
return start;
}
// Additional help functions used by multiplyToLen generation.
void if_both_even(Register r1, Register r2, Register tmp, bool iseven, Label &L)
{
__ or3(r1, r2, tmp);
__ andcc(tmp, 0x1, tmp);
__ br_icc_zero(iseven, Assembler::pn, L);
}
void if_all3_aligned(Register r1, Register r2, Register r3,
Register tmp, uint align, bool isalign, Label &L)
{
__ or3(r1, r2, tmp);
__ or3(r3, tmp, tmp);
__ andcc(tmp, (align - 1), tmp);
__ br_icc_zero(isalign, Assembler::pn, L);
}
void if_eq(Register x, Register y, bool iseq, Label &L)
{
Assembler::Condition cf = (iseq ? Assembler::equal : Assembler::notEqual);
__ cmp_and_br_short(x, y, cf, Assembler::pt, L);
}
void if_in_rng(Register x, int lb, int ub, Register t1, Register t2, bool inrng, Label &L)
{
assert(Assembler::is_simm13(lb), "Small ints only!");
assert(Assembler::is_simm13(ub), "Small ints only!");
// Compute (x - lb) * (ub - x) >= 0
// NOTE: With the local use of this routine, we rely on small integers to
// guarantee that we do not overflow in the multiplication.
__ add(G0, ub, t2);
__ sub(x, lb, t1);
__ sub(t2, x, t2);
__ mulx(t1, t2, t1);
Assembler::Condition cf = (inrng ? Assembler::greaterEqual : Assembler::less);
__ cmp_and_br_short(t1, G0, cf, Assembler::pt, L);
}
void ldd_entry(Register base, Register offs, FloatRegister dest)
{
__ ldd(base, offs, dest);
__ inc(offs, 8);
}
void ldx_entry(Register base, Register offs, Register dest)
{
__ ldx(base, offs, dest);
__ inc(offs, 8);
}
void mpmul_entry(int m, Label &next)
{
__ mpmul(m);
__ cbcond(Assembler::equal, Assembler::icc, G0, G0, next);
}
void stx_entry(Label &L, Register r1, Register r2, Register base, Register offs)
{
__ bind(L);
__ stx(r1, base, offs);
__ inc(offs, 8);
__ stx(r2, base, offs);
__ inc(offs, 8);
}
void offs_entry(Label &Lbl0, Label &Lbl1)
{
assert(Lbl0.is_bound(), "must be");
assert(Lbl1.is_bound(), "must be");
int offset = Lbl0.loc_pos() - Lbl1.loc_pos();
__ emit_data(offset);
}
/* Generate the actual multiplication kernels for BigInteger vectors:
*
* 1. gen_mult_mpmul(...)
*
* 2. gen_mult_64x64(...)
*
* 3. gen_mult_64x64_unaligned(...)
*
* 4. gen_mult_32x32(...)
*/
void gen_mult_mpmul(Register len, Register xptr, Register yptr, Register zptr,
Label &L_exit)
{
const Register zero = G0;
const Register gxp = G1; // Need to use global registers across RWs.
const Register gyp = G2;
const Register gzp = G3;
const Register offs = G4;
const Register disp = G5;
__ mov(xptr, gxp);
__ mov(yptr, gyp);
__ mov(zptr, gzp);
/* Compute jump vector entry:
*
* 1. mpmul input size (0..31) x 64b
* 2. vector input size in 32b limbs (even number)
* 3. branch entries in reverse order (31..0), using two
* instructions per entry (2 * 4 bytes).
*
* displacement = byte_offset(bra_offset(len))
* = byte_offset((64 - len)/2)
* = 8 * (64 - len)/2
* = 4 * (64 - len)
*/
Register temp = I5; // Alright to use input regs. in first batch.
__ sub(zero, len, temp);
__ add(temp, 64, temp);
__ sllx(temp, 2, disp); // disp := (64 - len) << 2
// Dispatch relative current PC, into instruction table below.
__ rdpc(temp);
__ add(temp, 16, temp);
__ jmp(temp, disp);
__ delayed()->clr(offs);
ldd_entry(gxp, offs, F22);
ldd_entry(gxp, offs, F20);
ldd_entry(gxp, offs, F18);
ldd_entry(gxp, offs, F16);
ldd_entry(gxp, offs, F14);
ldd_entry(gxp, offs, F12);
ldd_entry(gxp, offs, F10);
ldd_entry(gxp, offs, F8);
ldd_entry(gxp, offs, F6);
ldd_entry(gxp, offs, F4);
ldx_entry(gxp, offs, I5);
ldx_entry(gxp, offs, I4);
ldx_entry(gxp, offs, I3);
ldx_entry(gxp, offs, I2);
ldx_entry(gxp, offs, I1);
ldx_entry(gxp, offs, I0);
ldx_entry(gxp, offs, L7);
ldx_entry(gxp, offs, L6);
ldx_entry(gxp, offs, L5);
ldx_entry(gxp, offs, L4);
ldx_entry(gxp, offs, L3);
ldx_entry(gxp, offs, L2);
ldx_entry(gxp, offs, L1);
ldx_entry(gxp, offs, L0);
ldd_entry(gxp, offs, F2);
ldd_entry(gxp, offs, F0);
ldx_entry(gxp, offs, O5);
ldx_entry(gxp, offs, O4);
ldx_entry(gxp, offs, O3);
ldx_entry(gxp, offs, O2);
ldx_entry(gxp, offs, O1);
ldx_entry(gxp, offs, O0);
__ save(SP, -176, SP);
const Register addr = gxp; // Alright to reuse 'gxp'.
// Dispatch relative current PC, into instruction table below.
__ rdpc(addr);
__ add(addr, 16, addr);
__ jmp(addr, disp);
__ delayed()->clr(offs);
ldd_entry(gyp, offs, F58);
ldd_entry(gyp, offs, F56);
ldd_entry(gyp, offs, F54);
ldd_entry(gyp, offs, F52);
ldd_entry(gyp, offs, F50);
ldd_entry(gyp, offs, F48);
ldd_entry(gyp, offs, F46);
ldd_entry(gyp, offs, F44);
ldd_entry(gyp, offs, F42);
ldd_entry(gyp, offs, F40);
ldd_entry(gyp, offs, F38);
ldd_entry(gyp, offs, F36);
ldd_entry(gyp, offs, F34);
ldd_entry(gyp, offs, F32);
ldd_entry(gyp, offs, F30);
ldd_entry(gyp, offs, F28);
ldd_entry(gyp, offs, F26);
ldd_entry(gyp, offs, F24);
ldx_entry(gyp, offs, O5);
ldx_entry(gyp, offs, O4);
ldx_entry(gyp, offs, O3);
ldx_entry(gyp, offs, O2);
ldx_entry(gyp, offs, O1);
ldx_entry(gyp, offs, O0);
ldx_entry(gyp, offs, L7);
ldx_entry(gyp, offs, L6);
ldx_entry(gyp, offs, L5);
ldx_entry(gyp, offs, L4);
ldx_entry(gyp, offs, L3);
ldx_entry(gyp, offs, L2);
ldx_entry(gyp, offs, L1);
ldx_entry(gyp, offs, L0);
__ save(SP, -176, SP);
__ save(SP, -176, SP);
__ save(SP, -176, SP);
__ save(SP, -176, SP);
__ save(SP, -176, SP);
Label L_mpmul_restore_4, L_mpmul_restore_3, L_mpmul_restore_2;
Label L_mpmul_restore_1, L_mpmul_restore_0;
// Dispatch relative current PC, into instruction table below.
__ rdpc(addr);
__ add(addr, 16, addr);
__ jmp(addr, disp);
__ delayed()->clr(offs);
mpmul_entry(31, L_mpmul_restore_0);
mpmul_entry(30, L_mpmul_restore_0);
mpmul_entry(29, L_mpmul_restore_0);
mpmul_entry(28, L_mpmul_restore_0);
mpmul_entry(27, L_mpmul_restore_1);
mpmul_entry(26, L_mpmul_restore_1);
mpmul_entry(25, L_mpmul_restore_1);
mpmul_entry(24, L_mpmul_restore_1);
mpmul_entry(23, L_mpmul_restore_1);
mpmul_entry(22, L_mpmul_restore_1);
mpmul_entry(21, L_mpmul_restore_1);
mpmul_entry(20, L_mpmul_restore_2);
mpmul_entry(19, L_mpmul_restore_2);
mpmul_entry(18, L_mpmul_restore_2);
mpmul_entry(17, L_mpmul_restore_2);
mpmul_entry(16, L_mpmul_restore_2);
mpmul_entry(15, L_mpmul_restore_2);
mpmul_entry(14, L_mpmul_restore_2);
mpmul_entry(13, L_mpmul_restore_3);
mpmul_entry(12, L_mpmul_restore_3);
mpmul_entry(11, L_mpmul_restore_3);
mpmul_entry(10, L_mpmul_restore_3);
mpmul_entry( 9, L_mpmul_restore_3);
mpmul_entry( 8, L_mpmul_restore_3);
mpmul_entry( 7, L_mpmul_restore_3);
mpmul_entry( 6, L_mpmul_restore_4);
mpmul_entry( 5, L_mpmul_restore_4);
mpmul_entry( 4, L_mpmul_restore_4);
mpmul_entry( 3, L_mpmul_restore_4);
mpmul_entry( 2, L_mpmul_restore_4);
mpmul_entry( 1, L_mpmul_restore_4);
mpmul_entry( 0, L_mpmul_restore_4);
Label L_z31, L_z30, L_z29, L_z28, L_z27, L_z26, L_z25, L_z24;
Label L_z23, L_z22, L_z21, L_z20, L_z19, L_z18, L_z17, L_z16;
Label L_z15, L_z14, L_z13, L_z12, L_z11, L_z10, L_z09, L_z08;
Label L_z07, L_z06, L_z05, L_z04, L_z03, L_z02, L_z01, L_z00;
Label L_zst_base; // Store sequence base address.
__ bind(L_zst_base);
stx_entry(L_z31, L7, L6, gzp, offs);
stx_entry(L_z30, L5, L4, gzp, offs);
stx_entry(L_z29, L3, L2, gzp, offs);
stx_entry(L_z28, L1, L0, gzp, offs);
__ restore();
stx_entry(L_z27, O5, O4, gzp, offs);
stx_entry(L_z26, O3, O2, gzp, offs);
stx_entry(L_z25, O1, O0, gzp, offs);
stx_entry(L_z24, L7, L6, gzp, offs);
stx_entry(L_z23, L5, L4, gzp, offs);
stx_entry(L_z22, L3, L2, gzp, offs);
stx_entry(L_z21, L1, L0, gzp, offs);
__ restore();
stx_entry(L_z20, O5, O4, gzp, offs);
stx_entry(L_z19, O3, O2, gzp, offs);
stx_entry(L_z18, O1, O0, gzp, offs);
stx_entry(L_z17, L7, L6, gzp, offs);
stx_entry(L_z16, L5, L4, gzp, offs);
stx_entry(L_z15, L3, L2, gzp, offs);
stx_entry(L_z14, L1, L0, gzp, offs);
__ restore();
stx_entry(L_z13, O5, O4, gzp, offs);
stx_entry(L_z12, O3, O2, gzp, offs);
stx_entry(L_z11, O1, O0, gzp, offs);
stx_entry(L_z10, L7, L6, gzp, offs);
stx_entry(L_z09, L5, L4, gzp, offs);
stx_entry(L_z08, L3, L2, gzp, offs);
stx_entry(L_z07, L1, L0, gzp, offs);
__ restore();
stx_entry(L_z06, O5, O4, gzp, offs);
stx_entry(L_z05, O3, O2, gzp, offs);
stx_entry(L_z04, O1, O0, gzp, offs);
stx_entry(L_z03, L7, L6, gzp, offs);
stx_entry(L_z02, L5, L4, gzp, offs);
stx_entry(L_z01, L3, L2, gzp, offs);
stx_entry(L_z00, L1, L0, gzp, offs);
__ restore();
__ restore();
// Exit out of 'mpmul' routine, back to multiplyToLen.
__ ba_short(L_exit);
Label L_zst_offs;
__ bind(L_zst_offs);
offs_entry(L_z31, L_zst_base); // index 31: 2048x2048
offs_entry(L_z30, L_zst_base);
offs_entry(L_z29, L_zst_base);
offs_entry(L_z28, L_zst_base);
offs_entry(L_z27, L_zst_base);
offs_entry(L_z26, L_zst_base);
offs_entry(L_z25, L_zst_base);
offs_entry(L_z24, L_zst_base);
offs_entry(L_z23, L_zst_base);
offs_entry(L_z22, L_zst_base);
offs_entry(L_z21, L_zst_base);
offs_entry(L_z20, L_zst_base);
offs_entry(L_z19, L_zst_base);
offs_entry(L_z18, L_zst_base);
offs_entry(L_z17, L_zst_base);
offs_entry(L_z16, L_zst_base);
offs_entry(L_z15, L_zst_base);
offs_entry(L_z14, L_zst_base);
offs_entry(L_z13, L_zst_base);
offs_entry(L_z12, L_zst_base);
offs_entry(L_z11, L_zst_base);
offs_entry(L_z10, L_zst_base);
offs_entry(L_z09, L_zst_base);
offs_entry(L_z08, L_zst_base);
offs_entry(L_z07, L_zst_base);
offs_entry(L_z06, L_zst_base);
offs_entry(L_z05, L_zst_base);
offs_entry(L_z04, L_zst_base);
offs_entry(L_z03, L_zst_base);
offs_entry(L_z02, L_zst_base);
offs_entry(L_z01, L_zst_base);
offs_entry(L_z00, L_zst_base); // index 0: 64x64
__ bind(L_mpmul_restore_4);
__ restore();
__ bind(L_mpmul_restore_3);
__ restore();
__ bind(L_mpmul_restore_2);
__ restore();
__ bind(L_mpmul_restore_1);
__ restore();
__ bind(L_mpmul_restore_0);
// Dispatch via offset vector entry, into z-store sequence.
Label L_zst_rdpc;
__ bind(L_zst_rdpc);
assert(L_zst_base.is_bound(), "must be");
assert(L_zst_offs.is_bound(), "must be");
assert(L_zst_rdpc.is_bound(), "must be");
int dbase = L_zst_rdpc.loc_pos() - L_zst_base.loc_pos();
int doffs = L_zst_rdpc.loc_pos() - L_zst_offs.loc_pos();
temp = gyp; // Alright to reuse 'gyp'.
__ rdpc(addr);
__ sub(addr, doffs, temp);
__ srlx(disp, 1, disp);
__ lduw(temp, disp, offs);
__ sub(addr, dbase, temp);
__ jmp(temp, offs);
__ delayed()->clr(offs);
}
void gen_mult_64x64(Register xp, Register xn,
Register yp, Register yn,
Register zp, Register zn, Label &L_exit)
{
// Assuming that a stack frame has already been created, i.e. local and
// output registers are available for immediate use.
const Register ri = L0; // Outer loop index, xv[i]
const Register rj = L1; // Inner loop index, yv[j]
const Register rk = L2; // Output loop index, zv[k]
const Register rx = L4; // x-vector datum [i]
const Register ry = L5; // y-vector datum [j]
const Register rz = L6; // z-vector datum [k]
const Register rc = L7; // carry over (to z-vector datum [k-1])
const Register lop = O0; // lo-64b product
const Register hip = O1; // hi-64b product
const Register zero = G0;
Label L_loop_i, L_exit_loop_i;
Label L_loop_j;
Label L_loop_i2, L_exit_loop_i2;
__ srlx(xn, 1, xn); // index for u32 to u64 ditto
__ srlx(yn, 1, yn); // index for u32 to u64 ditto
__ srlx(zn, 1, zn); // index for u32 to u64 ditto
__ dec(xn); // Adjust [0..(N/2)-1]
__ dec(yn);
__ dec(zn);
__ clr(rc); // u64 c = 0
__ sllx(xn, 3, ri); // int i = xn (byte offset i = 8*xn)
__ sllx(yn, 3, rj); // int j = yn (byte offset i = 8*xn)
__ sllx(zn, 3, rk); // int k = zn (byte offset k = 8*zn)
__ ldx(yp, rj, ry); // u64 y = yp[yn]
// for (int i = xn; i >= 0; i--)
__ bind(L_loop_i);
__ cmp_and_br_short(ri, 0, // i >= 0
Assembler::less, Assembler::pn, L_exit_loop_i);
__ ldx(xp, ri, rx); // x = xp[i]
__ mulx(rx, ry, lop); // lo-64b-part of result 64x64
__ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64
__ addcc(rc, lop, lop); // Accumulate lower order bits (producing carry)
__ addxc(hip, zero, rc); // carry over to next datum [k-1]
__ stx(lop, zp, rk); // z[k] = lop
__ dec(rk, 8); // k--
__ dec(ri, 8); // i--
__ ba_short(L_loop_i);
__ bind(L_exit_loop_i);
__ stx(rc, zp, rk); // z[k] = c
// for (int j = yn - 1; j >= 0; j--)
__ sllx(yn, 3, rj); // int j = yn - 1 (byte offset j = 8*yn)
__ dec(rj, 8);
__ bind(L_loop_j);
__ cmp_and_br_short(rj, 0, // j >= 0
Assembler::less, Assembler::pn, L_exit);
__ clr(rc); // u64 c = 0
__ ldx(yp, rj, ry); // u64 y = yp[j]
// for (int i = xn, k = --zn; i >= 0; i--)
__ dec(zn); // --zn
__ sllx(xn, 3, ri); // int i = xn (byte offset i = 8*xn)
__ sllx(zn, 3, rk); // int k = zn (byte offset k = 8*zn)
__ bind(L_loop_i2);
__ cmp_and_br_short(ri, 0, // i >= 0
Assembler::less, Assembler::pn, L_exit_loop_i2);
__ ldx(xp, ri, rx); // x = xp[i]
__ ldx(zp, rk, rz); // z = zp[k], accumulator
__ mulx(rx, ry, lop); // lo-64b-part of result 64x64
__ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64
__ addcc(rz, rc, rz); // Accumulate lower order bits,
__ addxc(hip, zero, rc); // Accumulate higher order bits to carry
__ addcc(rz, lop, rz); // z += lo(p) + c
__ addxc(rc, zero, rc);
__ stx(rz, zp, rk); // zp[k] = z
__ dec(rk, 8); // k--
__ dec(ri, 8); // i--
__ ba_short(L_loop_i2);
__ bind(L_exit_loop_i2);
__ stx(rc, zp, rk); // z[k] = c
__ dec(rj, 8); // j--
__ ba_short(L_loop_j);
}
void gen_mult_64x64_unaligned(Register xp, Register xn,
Register yp, Register yn,
Register zp, Register zn, Label &L_exit)
{
// Assuming that a stack frame has already been created, i.e. local and
// output registers are available for use.
const Register xpc = L0; // Outer loop cursor, xp[i]
const Register ypc = L1; // Inner loop cursor, yp[j]
const Register zpc = L2; // Output loop cursor, zp[k]
const Register rx = L4; // x-vector datum [i]
const Register ry = L5; // y-vector datum [j]
const Register rz = L6; // z-vector datum [k]
const Register rc = L7; // carry over (to z-vector datum [k-1])
const Register rt = O2;
const Register lop = O0; // lo-64b product
const Register hip = O1; // hi-64b product
const Register zero = G0;
Label L_loop_i, L_exit_loop_i;
Label L_loop_j;
Label L_loop_i2, L_exit_loop_i2;
__ srlx(xn, 1, xn); // index for u32 to u64 ditto
__ srlx(yn, 1, yn); // index for u32 to u64 ditto
__ srlx(zn, 1, zn); // index for u32 to u64 ditto
__ dec(xn); // Adjust [0..(N/2)-1]
__ dec(yn);
__ dec(zn);
__ clr(rc); // u64 c = 0
__ sllx(xn, 3, xpc); // u32* xpc = &xp[xn] (byte offset 8*xn)
__ add(xp, xpc, xpc);
__ sllx(yn, 3, ypc); // u32* ypc = &yp[yn] (byte offset 8*yn)
__ add(yp, ypc, ypc);
__ sllx(zn, 3, zpc); // u32* zpc = &zp[zn] (byte offset 8*zn)
__ add(zp, zpc, zpc);
__ lduw(ypc, 0, rt); // u64 y = yp[yn]
__ lduw(ypc, 4, ry); // ...
__ sllx(rt, 32, rt);
__ or3(rt, ry, ry);
// for (int i = xn; i >= 0; i--)
__ bind(L_loop_i);
__ cmp_and_br_short(xpc, xp,// i >= 0
Assembler::less, Assembler::pn, L_exit_loop_i);
__ lduw(xpc, 0, rt); // u64 x = xp[i]
__ lduw(xpc, 4, rx); // ...
__ sllx(rt, 32, rt);
__ or3(rt, rx, rx);
__ mulx(rx, ry, lop); // lo-64b-part of result 64x64
__ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64
__ addcc(rc, lop, lop); // Accumulate lower order bits (producing carry)
__ addxc(hip, zero, rc); // carry over to next datum [k-1]
__ srlx(lop, 32, rt);
__ stw(rt, zpc, 0); // z[k] = lop
__ stw(lop, zpc, 4); // ...
__ dec(zpc, 8); // k-- (zpc--)
__ dec(xpc, 8); // i-- (xpc--)
__ ba_short(L_loop_i);
__ bind(L_exit_loop_i);
__ srlx(rc, 32, rt);
__ stw(rt, zpc, 0); // z[k] = c
__ stw(rc, zpc, 4);
// for (int j = yn - 1; j >= 0; j--)
__ sllx(yn, 3, ypc); // u32* ypc = &yp[yn] (byte offset 8*yn)
__ add(yp, ypc, ypc);
__ dec(ypc, 8); // yn - 1 (ypc--)
__ bind(L_loop_j);
__ cmp_and_br_short(ypc, yp,// j >= 0
Assembler::less, Assembler::pn, L_exit);
__ clr(rc); // u64 c = 0
__ lduw(ypc, 0, rt); // u64 y = yp[j] (= *ypc)
__ lduw(ypc, 4, ry); // ...
__ sllx(rt, 32, rt);
__ or3(rt, ry, ry);
// for (int i = xn, k = --zn; i >= 0; i--)
__ sllx(xn, 3, xpc); // u32* xpc = &xp[xn] (byte offset 8*xn)
__ add(xp, xpc, xpc);
__ dec(zn); // --zn
__ sllx(zn, 3, zpc); // u32* zpc = &zp[zn] (byte offset 8*zn)
__ add(zp, zpc, zpc);
__ bind(L_loop_i2);
__ cmp_and_br_short(xpc, xp,// i >= 0
Assembler::less, Assembler::pn, L_exit_loop_i2);
__ lduw(xpc, 0, rt); // u64 x = xp[i] (= *xpc)
__ lduw(xpc, 4, rx); // ...
__ sllx(rt, 32, rt);
__ or3(rt, rx, rx);
__ lduw(zpc, 0, rt); // u64 z = zp[k] (= *zpc)
__ lduw(zpc, 4, rz); // ...
__ sllx(rt, 32, rt);
__ or3(rt, rz, rz);
__ mulx(rx, ry, lop); // lo-64b-part of result 64x64
__ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64
__ addcc(rz, rc, rz); // Accumulate lower order bits...
__ addxc(hip, zero, rc); // Accumulate higher order bits to carry
__ addcc(rz, lop, rz); // ... z += lo(p) + c
__ addxccc(rc, zero, rc);
__ srlx(rz, 32, rt);
__ stw(rt, zpc, 0); // zp[k] = z (*zpc = z)
__ stw(rz, zpc, 4);
__ dec(zpc, 8); // k-- (zpc--)
__ dec(xpc, 8); // i-- (xpc--)
__ ba_short(L_loop_i2);
__ bind(L_exit_loop_i2);
__ srlx(rc, 32, rt);
__ stw(rt, zpc, 0); // z[k] = c
__ stw(rc, zpc, 4);
__ dec(ypc, 8); // j-- (ypc--)
__ ba_short(L_loop_j);
}
void gen_mult_32x32(Register xp, Register xn,
Register yp, Register yn,
Register zp, Register zn, Label &L_exit)
{
// Assuming that a stack frame has already been created, i.e. local and
// output registers are available for use.
const Register ri = L0; // Outer loop index, xv[i]
const Register rj = L1; // Inner loop index, yv[j]
const Register rk = L2; // Output loop index, zv[k]
const Register rx = L4; // x-vector datum [i]
const Register ry = L5; // y-vector datum [j]
const Register rz = L6; // z-vector datum [k]
const Register rc = L7; // carry over (to z-vector datum [k-1])
const Register p64 = O0; // 64b product
const Register z65 = O1; // carry+64b accumulator
const Register c65 = O2; // carry at bit 65
const Register c33 = O2; // carry at bit 33 (after shift)
const Register zero = G0;
Label L_loop_i, L_exit_loop_i;
Label L_loop_j;
Label L_loop_i2, L_exit_loop_i2;
__ dec(xn); // Adjust [0..N-1]
__ dec(yn);
__ dec(zn);
__ clr(rc); // u32 c = 0
__ sllx(xn, 2, ri); // int i = xn (byte offset i = 4*xn)
__ sllx(yn, 2, rj); // int j = yn (byte offset i = 4*xn)
__ sllx(zn, 2, rk); // int k = zn (byte offset k = 4*zn)
__ lduw(yp, rj, ry); // u32 y = yp[yn]
// for (int i = xn; i >= 0; i--)
__ bind(L_loop_i);
__ cmp_and_br_short(ri, 0, // i >= 0
Assembler::less, Assembler::pn, L_exit_loop_i);
__ lduw(xp, ri, rx); // x = xp[i]
__ mulx(rx, ry, p64); // 64b result of 32x32
__ addcc(rc, p64, z65); // Accumulate to 65 bits (producing carry)
__ addxc(zero, zero, c65); // Materialise carry (in bit 65) into lsb,
__ sllx(c65, 32, c33); // and shift into bit 33
__ srlx(z65, 32, rc); // carry = c33 | hi(z65) >> 32
__ add(c33, rc, rc); // carry over to next datum [k-1]
__ stw(z65, zp, rk); // z[k] = lo(z65)
__ dec(rk, 4); // k--
__ dec(ri, 4); // i--
__ ba_short(L_loop_i);
__ bind(L_exit_loop_i);
__ stw(rc, zp, rk); // z[k] = c
// for (int j = yn - 1; j >= 0; j--)
__ sllx(yn, 2, rj); // int j = yn - 1 (byte offset j = 4*yn)
__ dec(rj, 4);
__ bind(L_loop_j);
__ cmp_and_br_short(rj, 0, // j >= 0
Assembler::less, Assembler::pn, L_exit);
__ clr(rc); // u32 c = 0
__ lduw(yp, rj, ry); // u32 y = yp[j]
// for (int i = xn, k = --zn; i >= 0; i--)
__ dec(zn); // --zn
__ sllx(xn, 2, ri); // int i = xn (byte offset i = 4*xn)
__ sllx(zn, 2, rk); // int k = zn (byte offset k = 4*zn)
__ bind(L_loop_i2);
__ cmp_and_br_short(ri, 0, // i >= 0
Assembler::less, Assembler::pn, L_exit_loop_i2);
__ lduw(xp, ri, rx); // x = xp[i]
__ lduw(zp, rk, rz); // z = zp[k], accumulator
__ mulx(rx, ry, p64); // 64b result of 32x32
__ add(rz, rc, rz); // Accumulate lower order bits,
__ addcc(rz, p64, z65); // z += lo(p64) + c
__ addxc(zero, zero, c65); // Materialise carry (in bit 65) into lsb,
__ sllx(c65, 32, c33); // and shift into bit 33
__ srlx(z65, 32, rc); // carry = c33 | hi(z65) >> 32
__ add(c33, rc, rc); // carry over to next datum [k-1]
__ stw(z65, zp, rk); // zp[k] = lo(z65)
__ dec(rk, 4); // k--
__ dec(ri, 4); // i--
__ ba_short(L_loop_i2);
__ bind(L_exit_loop_i2);
__ stw(rc, zp, rk); // z[k] = c
__ dec(rj, 4); // j--
__ ba_short(L_loop_j);
}
void generate_initial() {
// Generates all stubs and initializes the entry points
@ -5073,8 +5839,14 @@ class StubGenerator: public StubCodeGenerator {
if (UseAdler32Intrinsics) {
StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
}
}
#ifdef COMPILER2
// Intrinsics supported by C2 only:
if (UseMultiplyToLenIntrinsic) {
StubRoutines::_multiplyToLen = generate_multiplyToLen();
}
#endif // COMPILER2
}
public:
StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {

View File

@ -41,7 +41,7 @@ static bool returns_to_call_stub(address return_pc) {
enum /* platform_dependent_constants */ {
// %%%%%%%% May be able to shrink this a lot
code_size1 = 20000, // simply increase if too small (assembler will crash if too small)
code_size2 = 27000 // simply increase if too small (assembler will crash if too small)
code_size2 = 29000 // simply increase if too small (assembler will crash if too small)
};
class Sparc {

View File

@ -2049,6 +2049,7 @@ void TemplateTable::load_field_cp_cache_entry(Register Robj,
__ ld_ptr(Rcache, cp_base_offset + ConstantPoolCacheEntry::f1_offset(), Robj);
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ ld_ptr( Robj, mirror_offset, Robj);
__ resolve_oop_handle(Robj);
}
}

View File

@ -101,6 +101,14 @@
declare_constant(VM_Version::ISA_XMONT) \
declare_constant(VM_Version::ISA_PAUSE_NSEC) \
declare_constant(VM_Version::ISA_VAMASK) \
declare_constant(VM_Version::ISA_SPARC6) \
declare_constant(VM_Version::ISA_DICTUNP) \
declare_constant(VM_Version::ISA_FPCMPSHL) \
declare_constant(VM_Version::ISA_RLE) \
declare_constant(VM_Version::ISA_SHA3) \
declare_constant(VM_Version::ISA_VIS3C) \
declare_constant(VM_Version::ISA_SPARC5B) \
declare_constant(VM_Version::ISA_MME) \
declare_constant(VM_Version::CPU_FAST_IDIV) \
declare_constant(VM_Version::CPU_FAST_RDPC) \
declare_constant(VM_Version::CPU_FAST_BIS) \

View File

@ -103,7 +103,7 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(AllocatePrefetchInstr, 1);
}
else if (has_sparc5()) {
// Use prefetch instruction to avoid partial RAW issue on Core S4 processors,
// Use prefetch instruction to avoid partial RAW issue on Core C4 processors,
// also use prefetch style 3.
FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
@ -128,7 +128,7 @@ void VM_Version::initialize() {
// We increase the number of prefetched cache lines, to use just a bit more
// aggressive approach, when the L2-cache line size is small (32 bytes), or
// when running on newer processor implementations, such as the Core S4.
// when running on newer processor implementations, such as the Core C4.
bool inc_prefetch = cache_line_size > 0 && (cache_line_size < 64 || has_sparc5());
if (inc_prefetch) {
@ -168,6 +168,16 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseCBCond, false);
}
// Use 'mpmul' instruction if available.
if (has_mpmul()) {
if (FLAG_IS_DEFAULT(UseMPMUL)) {
FLAG_SET_DEFAULT(UseMPMUL, true);
}
} else if (UseMPMUL) {
warning("MPMUL instruction is not available on this CPU");
FLAG_SET_DEFAULT(UseMPMUL, false);
}
assert(BlockZeroingLowLimit > 0, "invalid value");
if (has_blk_zeroing() && cache_line_size > 0) {
@ -208,7 +218,9 @@ void VM_Version::initialize() {
char buf[512];
jio_snprintf(buf, sizeof(buf),
"%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
"%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s"
"%s%s%s%s%s%s%s%s%s" "%s%s%s%s%s%s%s%s%s"
"%s%s%s%s%s%s%s",
(has_v9() ? "v9" : ""),
(has_popc() ? ", popc" : ""),
(has_vis1() ? ", vis1" : ""),
@ -241,6 +253,16 @@ void VM_Version::initialize() {
(has_pause_nsec() ? ", pause_nsec" : ""),
(has_vamask() ? ", vamask" : ""),
(has_sparc6() ? ", sparc6" : ""),
(has_dictunp() ? ", dictunp" : ""),
(has_fpcmpshl() ? ", fpcmpshl" : ""),
(has_rle() ? ", rle" : ""),
(has_sha3() ? ", sha3" : ""),
(has_athena_plus2()? ", athena_plus2" : ""),
(has_vis3c() ? ", vis3c" : ""),
(has_sparc5b() ? ", sparc5b" : ""),
(has_mme() ? ", mme" : ""),
(has_fast_idiv() ? ", *idiv" : ""),
(has_fast_rdpc() ? ", *rdpc" : ""),
(has_fast_bis() ? ", *bis" : ""),
@ -409,6 +431,15 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
}
if (UseVIS > 2) {
if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
}
} else if (UseMultiplyToLenIntrinsic) {
warning("SPARC multiplyToLen intrinsics require VIS3 instructions support. Intrinsics will be disabled");
FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
}
if (UseVectorizedMismatchIntrinsic) {
warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU.");
FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);

View File

@ -67,6 +67,16 @@ protected:
ISA_PAUSE_NSEC,
ISA_VAMASK,
ISA_SPARC6,
ISA_DICTUNP,
ISA_FPCMPSHL,
ISA_RLE,
ISA_SHA3,
ISA_FJATHPLUS2,
ISA_VIS3C,
ISA_SPARC5B,
ISA_MME,
// Synthesised properties:
CPU_FAST_IDIV,
@ -79,7 +89,7 @@ protected:
};
private:
enum { ISA_last_feature = ISA_VAMASK,
enum { ISA_last_feature = ISA_MME,
CPU_last_feature = CPU_BLK_ZEROING };
enum {
@ -119,6 +129,16 @@ private:
ISA_pause_nsec_msk = UINT64_C(1) << ISA_PAUSE_NSEC,
ISA_vamask_msk = UINT64_C(1) << ISA_VAMASK,
ISA_sparc6_msk = UINT64_C(1) << ISA_SPARC6,
ISA_dictunp_msk = UINT64_C(1) << ISA_DICTUNP,
ISA_fpcmpshl_msk = UINT64_C(1) << ISA_FPCMPSHL,
ISA_rle_msk = UINT64_C(1) << ISA_RLE,
ISA_sha3_msk = UINT64_C(1) << ISA_SHA3,
ISA_fjathplus2_msk = UINT64_C(1) << ISA_FJATHPLUS2,
ISA_vis3c_msk = UINT64_C(1) << ISA_VIS3C,
ISA_sparc5b_msk = UINT64_C(1) << ISA_SPARC5B,
ISA_mme_msk = UINT64_C(1) << ISA_MME,
CPU_fast_idiv_msk = UINT64_C(1) << CPU_FAST_IDIV,
CPU_fast_rdpc_msk = UINT64_C(1) << CPU_FAST_RDPC,
CPU_fast_bis_msk = UINT64_C(1) << CPU_FAST_BIS,
@ -153,40 +173,51 @@ private:
* UltraSPARC T2+: (Victoria Falls, etc.)
* SPARC-V9, VIS, VIS2, ASI_BIS, POPC (Crypto/hash in SPU)
*
* UltraSPARC T3: (Rainbow Falls/S2)
* UltraSPARC T3: (Rainbow Falls/C2)
* SPARC-V9, VIS, VIS2, ASI_BIS, POPC (Crypto/hash in SPU)
*
* Oracle SPARC T4/T5/M5: (Core S3)
* Oracle SPARC T4/T5/M5: (Core C3)
* SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND,
* AES, DES, Kasumi, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL
*
* Oracle SPARC M7: (Core S4)
* Oracle SPARC M7: (Core C4)
* SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND,
* AES, DES, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL, VIS3b,
* ADI, SPARC5, MWAIT, XMPMUL, XMONT, PAUSE_NSEC, VAMASK
*
* Oracle SPARC M8: (Core C5)
* SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND,
* AES, DES, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL, VIS3b,
* ADI, SPARC5, MWAIT, XMPMUL, XMONT, PAUSE_NSEC, VAMASK, SPARC6, FPCMPSHL,
* DICTUNP, RLE, SHA3, MME
*
* NOTE: Oracle Number support ignored.
*/
enum {
niagara1_msk = ISA_v9_msk | ISA_vis1_msk | ISA_blk_init_msk,
niagara2_msk = niagara1_msk | ISA_popc_msk,
core_S2_msk = niagara2_msk | ISA_vis2_msk,
core_C2_msk = niagara2_msk | ISA_vis2_msk,
core_S3_msk = core_S2_msk | ISA_fmaf_msk | ISA_vis3_msk | ISA_hpc_msk |
core_C3_msk = core_C2_msk | ISA_fmaf_msk | ISA_vis3_msk | ISA_hpc_msk |
ISA_ima_msk | ISA_aes_msk | ISA_des_msk | ISA_kasumi_msk |
ISA_camellia_msk | ISA_md5_msk | ISA_sha1_msk | ISA_sha256_msk |
ISA_sha512_msk | ISA_mpmul_msk | ISA_mont_msk | ISA_pause_msk |
ISA_cbcond_msk | ISA_crc32c_msk,
core_S4_msk = core_S3_msk - ISA_kasumi_msk |
core_C4_msk = core_C3_msk - ISA_kasumi_msk |
ISA_vis3b_msk | ISA_adi_msk | ISA_sparc5_msk | ISA_mwait_msk |
ISA_xmpmul_msk | ISA_xmont_msk | ISA_pause_nsec_msk | ISA_vamask_msk,
core_C5_msk = core_C4_msk | ISA_sparc6_msk | ISA_dictunp_msk |
ISA_fpcmpshl_msk | ISA_rle_msk | ISA_sha3_msk | ISA_mme_msk,
ultra_sparc_t1_msk = niagara1_msk,
ultra_sparc_t2_msk = niagara2_msk,
ultra_sparc_t3_msk = core_S2_msk,
ultra_sparc_m5_msk = core_S3_msk, // NOTE: First out-of-order pipeline.
ultra_sparc_m7_msk = core_S4_msk
ultra_sparc_t3_msk = core_C2_msk,
ultra_sparc_m5_msk = core_C3_msk, // NOTE: First out-of-order pipeline.
ultra_sparc_m7_msk = core_C4_msk,
ultra_sparc_m8_msk = core_C5_msk
};
static uint _L2_data_cache_line_size;
@ -247,6 +278,16 @@ public:
static bool has_pause_nsec() { return (_features & ISA_pause_nsec_msk) != 0; }
static bool has_vamask() { return (_features & ISA_vamask_msk) != 0; }
static bool has_sparc6() { return (_features & ISA_sparc6_msk) != 0; }
static bool has_dictunp() { return (_features & ISA_dictunp_msk) != 0; }
static bool has_fpcmpshl() { return (_features & ISA_fpcmpshl_msk) != 0; }
static bool has_rle() { return (_features & ISA_rle_msk) != 0; }
static bool has_sha3() { return (_features & ISA_sha3_msk) != 0; }
static bool has_athena_plus2() { return (_features & ISA_fjathplus2_msk) != 0; }
static bool has_vis3c() { return (_features & ISA_vis3c_msk) != 0; }
static bool has_sparc5b() { return (_features & ISA_sparc5b_msk) != 0; }
static bool has_mme() { return (_features & ISA_mme_msk) != 0; }
static bool has_fast_idiv() { return (_features & CPU_fast_idiv_msk) != 0; }
static bool has_fast_rdpc() { return (_features & CPU_fast_rdpc_msk) != 0; }
static bool has_fast_bis() { return (_features & CPU_fast_bis_msk) != 0; }

View File

@ -383,6 +383,7 @@ void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp
//------------------------------------------------------------------------------
// frame::adjust_unextended_sp
#ifdef ASSERT
void frame::adjust_unextended_sp() {
// On x86, sites calling method handle intrinsics and lambda forms are treated
// as any other call site. Therefore, no special action is needed when we are
@ -394,11 +395,12 @@ void frame::adjust_unextended_sp() {
// If the sender PC is a deoptimization point, get the original PC.
if (sender_cm->is_deopt_entry(_pc) ||
sender_cm->is_deopt_mh_entry(_pc)) {
DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp));
verify_deopt_original_pc(sender_cm, _unextended_sp);
}
}
}
}
#endif
//------------------------------------------------------------------------------
// frame::update_map_with_saved_link

View File

@ -117,7 +117,7 @@
// original sp we use that convention.
intptr_t* _unextended_sp;
void adjust_unextended_sp();
void adjust_unextended_sp() NOT_DEBUG_RETURN;
intptr_t* ptr_at_addr(int offset) const {
return (intptr_t*) addr_at(offset);

View File

@ -6617,6 +6617,7 @@ void MacroAssembler::load_mirror(Register mirror, Register method) {
movptr(mirror, Address(mirror, ConstMethod::constants_offset()));
movptr(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes()));
movptr(mirror, Address(mirror, mirror_offset));
resolve_oop_handle(mirror);
}
void MacroAssembler::load_klass(Register dst, Register src) {

View File

@ -2665,6 +2665,7 @@ void TemplateTable::load_field_cp_cache_entry(Register obj,
ConstantPoolCacheEntry::f1_offset())));
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ movptr(obj, Address(obj, mirror_offset));
__ resolve_oop_handle(obj);
}
}

View File

@ -46,7 +46,7 @@ address VM_Version::_cpuinfo_segv_addr = 0;
address VM_Version::_cpuinfo_cont_addr = 0;
static BufferBlob* stub_blob;
static const int stub_size = 1000;
static const int stub_size = 1100;
extern "C" {
typedef void (*get_cpu_info_stub_t)(void*);
@ -70,7 +70,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done, wrapup;
Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
@ -267,14 +267,30 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
__ jcc(Assembler::belowEqual, done);
__ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
__ jccb(Assembler::belowEqual, ext_cpuid1);
__ jcc(Assembler::belowEqual, ext_cpuid1);
__ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported?
__ jccb(Assembler::belowEqual, ext_cpuid5);
__ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
__ jccb(Assembler::belowEqual, ext_cpuid7);
__ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported?
__ jccb(Assembler::belowEqual, ext_cpuid8);
__ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported?
__ jccb(Assembler::below, ext_cpuid8);
//
// Extended cpuid(0x8000001E)
//
__ movl(rax, 0x8000001E);
__ cpuid();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
__ movl(Address(rsi, 0), rax);
__ movl(Address(rsi, 4), rbx);
__ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi,12), rdx);
//
// Extended cpuid(0x80000008)
//
__ bind(ext_cpuid8);
__ movl(rax, 0x80000008);
__ cpuid();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
@ -1109,11 +1125,27 @@ void VM_Version::get_processor_features() {
}
#ifdef COMPILER2
if (MaxVectorSize > 16) {
// Limit vectors size to 16 bytes on current AMD cpus.
if (cpu_family() < 0x17 && MaxVectorSize > 16) {
// Limit vectors size to 16 bytes on AMD cpus < 17h.
FLAG_SET_DEFAULT(MaxVectorSize, 16);
}
#endif // COMPILER2
// Some defaults for AMD family 17h
if ( cpu_family() == 0x17 ) {
// On family 17h processors use XMM and UnalignedLoadStores for Array Copy
if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
}
if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
}
#ifdef COMPILER2
if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
FLAG_SET_DEFAULT(UseFPUForSpilling, true);
}
#endif
}
}
if( is_intel() ) { // Intel cpus specific settings

View File

@ -228,6 +228,15 @@ class VM_Version : public Abstract_VM_Version {
} bits;
};
union ExtCpuid1EEbx {
uint32_t value;
struct {
uint32_t : 8,
threads_per_core : 8,
: 16;
} bits;
};
union XemXcr0Eax {
uint32_t value;
struct {
@ -398,6 +407,12 @@ protected:
ExtCpuid8Ecx ext_cpuid8_ecx;
uint32_t ext_cpuid8_edx; // reserved
// cpuid function 0x8000001E // AMD 17h
uint32_t ext_cpuid1E_eax;
ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
uint32_t ext_cpuid1E_ecx;
uint32_t ext_cpuid1E_edx; // unused currently
// extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
XemXcr0Eax xem_xcr0_eax;
uint32_t xem_xcr0_edx; // reserved
@ -505,6 +520,14 @@ protected:
result |= CPU_CLMUL;
if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
result |= CPU_RTM;
if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
result |= CPU_ADX;
if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
result |= CPU_BMI2;
if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
result |= CPU_SHA;
if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
result |= CPU_FMA;
// AMD features.
if (is_amd()) {
@ -518,16 +541,8 @@ protected:
}
// Intel features.
if(is_intel()) {
if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
result |= CPU_ADX;
if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
result |= CPU_BMI2;
if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
result |= CPU_SHA;
if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
result |= CPU_LZCNT;
if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
result |= CPU_FMA;
// for Intel, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
result |= CPU_3DNOW_PREFETCH;
@ -590,6 +605,7 @@ public:
static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
static ByteSize ext_cpuid1E_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
@ -673,8 +689,12 @@ public:
if (is_intel() && supports_processor_topology()) {
result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
} else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
cores_per_cpu();
if (cpu_family() >= 0x17) {
result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
} else {
result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
cores_per_cpu();
}
}
return (result == 0 ? 1 : result);
}

View File

@ -770,8 +770,15 @@ static void *thread_native_entry(Thread *thread) {
const pthread_t pthread_id = ::pthread_self();
const tid_t kernel_thread_id = ::thread_self();
log_info(os, thread)("Thread is alive (tid: " UINTX_FORMAT ", kernel thread id: " UINTX_FORMAT ").",
os::current_thread_id(), (uintx) kernel_thread_id);
LogTarget(Info, os, thread) lt;
if (lt.is_enabled()) {
address low_address = thread->stack_end();
address high_address = thread->stack_base();
lt.print("Thread is alive (tid: " UINTX_FORMAT ", kernel thread id: " UINTX_FORMAT
", stack [" PTR_FORMAT " - " PTR_FORMAT " (" SIZE_FORMAT "k using %uk pages)).",
os::current_thread_id(), (uintx) kernel_thread_id, low_address, high_address,
(high_address - low_address) / K, os::Aix::query_pagesize(low_address) / K);
}
// Normally, pthread stacks on AIX live in the data segment (are allocated with malloc()
// by the pthread library). In rare cases, this may not be the case, e.g. when third-party
@ -864,6 +871,14 @@ bool os::create_thread(Thread* thread, ThreadType thr_type,
// Calculate stack size if it's not specified by caller.
size_t stack_size = os::Posix::get_initial_stack_size(thr_type, req_stack_size);
// JDK-8187028: It was observed that on some configurations (4K backed thread stacks)
// the real thread stack size may be smaller than the requested stack size, by as much as 64K.
// This very much looks like a pthread lib error. As a workaround, increase the stack size
// by 64K for small thread stacks (arbitrarily choosen to be < 4MB)
if (stack_size < 4096 * K) {
stack_size += 64 * K;
}
// On Aix, pthread_attr_setstacksize fails with huge values and leaves the
// thread size in attr unchanged. If this is the minimal stack size as set
// by pthread_attr_init this leads to crashes after thread creation. E.g. the
@ -3443,8 +3458,6 @@ void os::init(void) {
init_random(1234567);
ThreadCritical::initialize();
// Main_thread points to the aboriginal thread.
Aix::_main_thread = pthread_self();

View File

@ -38,12 +38,6 @@ static pthread_t tc_owner = 0;
static pthread_mutex_t tc_mutex = PTHREAD_MUTEX_INITIALIZER;
static int tc_count = 0;
void ThreadCritical::initialize() {
}
void ThreadCritical::release() {
}
ThreadCritical::ThreadCritical() {
pthread_t self = pthread_self();
if (self != tc_owner) {

View File

@ -3353,8 +3353,6 @@ void os::init(void) {
init_random(1234567);
ThreadCritical::initialize();
Bsd::set_page_size(getpagesize());
if (Bsd::page_size() == -1) {
fatal("os_bsd.cpp: os::init: sysconf failed (%s)", os::strerror(errno));

View File

@ -37,12 +37,6 @@ static pthread_t tc_owner = 0;
static pthread_mutex_t tc_mutex = PTHREAD_MUTEX_INITIALIZER;
static int tc_count = 0;
void ThreadCritical::initialize() {
}
void ThreadCritical::release() {
}
ThreadCritical::ThreadCritical() {
pthread_t self = pthread_self();
if (self != tc_owner) {

View File

@ -4768,8 +4768,6 @@ void os::init(void) {
init_random(1234567);
ThreadCritical::initialize();
Linux::set_page_size(sysconf(_SC_PAGESIZE));
if (Linux::page_size() == -1) {
fatal("os_linux.cpp: os::init: sysconf failed (%s)",

View File

@ -98,6 +98,11 @@ inline int os::ftruncate(int fd, jlong length) {
inline struct dirent* os::readdir(DIR* dirp, dirent *dbuf)
{
// readdir_r has been deprecated since glibc 2.24.
// See https://sourceware.org/bugzilla/show_bug.cgi?id=19056 for more details.
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
dirent* p;
int status;
assert(dirp != NULL, "just checking");
@ -111,6 +116,8 @@ inline struct dirent* os::readdir(DIR* dirp, dirent *dbuf)
return NULL;
} else
return p;
#pragma GCC diagnostic pop
}
inline int os::closedir(DIR *dirp) {

View File

@ -37,12 +37,6 @@ static pthread_t tc_owner = 0;
static pthread_mutex_t tc_mutex = PTHREAD_MUTEX_INITIALIZER;
static int tc_count = 0;
void ThreadCritical::initialize() {
}
void ThreadCritical::release() {
}
ThreadCritical::ThreadCritical() {
pthread_t self = pthread_self();
if (self != tc_owner) {

View File

@ -1770,6 +1770,12 @@ int os::PlatformEvent::park(jlong millis) {
if (v == 0) { // Do this the hard way by blocking ...
struct timespec abst;
// We have to watch for overflow when converting millis to nanos,
// but if millis is that large then we will end up limiting to
// MAX_SECS anyway, so just do that here.
if (millis / MILLIUNITS > MAX_SECS) {
millis = jlong(MAX_SECS) * MILLIUNITS;
}
to_abstime(&abst, millis * (NANOUNITS / MILLIUNITS), false);
int ret = OS_TIMEOUT;

View File

@ -4076,6 +4076,7 @@ int_fnP_cond_tP os::Solaris::_cond_broadcast;
int_fnP_cond_tP_i_vP os::Solaris::_cond_init;
int_fnP_cond_tP os::Solaris::_cond_destroy;
int os::Solaris::_cond_scope = USYNC_THREAD;
bool os::Solaris::_synchronization_initialized;
void os::Solaris::synchronization_init() {
if (UseLWPSynchronization) {
@ -4125,6 +4126,7 @@ void os::Solaris::synchronization_init() {
os::Solaris::set_cond_destroy(::cond_destroy);
}
}
_synchronization_initialized = true;
}
bool os::Solaris::liblgrp_init() {
@ -4198,9 +4200,6 @@ void os::init(void) {
dladdr1_func = CAST_TO_FN_PTR(dladdr1_func_type, dlsym(hdl, "dladdr1"));
}
// (Solaris only) this switches to calls that actually do locking.
ThreadCritical::initialize();
main_thread = thr_self();
// dynamic lookup of functions that may not be available in our lowest

View File

@ -65,6 +65,8 @@ class Solaris {
static int_fnP_cond_tP _cond_destroy;
static int _cond_scope;
static bool _synchronization_initialized;
typedef uintptr_t lgrp_cookie_t;
typedef id_t lgrp_id_t;
typedef int lgrp_rsrc_t;
@ -227,6 +229,8 @@ class Solaris {
static void set_cond_destroy(int_fnP_cond_tP func) { _cond_destroy = func; }
static void set_cond_scope(int scope) { _cond_scope = scope; }
static bool synchronization_initialized() { return _synchronization_initialized; }
static void set_lgrp_home(lgrp_home_func_t func) { _lgrp_home = func; }
static void set_lgrp_init(lgrp_init_func_t func) { _lgrp_init = func; }
static void set_lgrp_fini(lgrp_fini_func_t func) { _lgrp_fini = func; }

View File

@ -42,10 +42,9 @@
static mutex_t global_mut;
static thread_t global_mut_owner = -1;
static int global_mut_count = 0;
static bool initialized = false;
ThreadCritical::ThreadCritical() {
if (initialized) {
if (os::Solaris::synchronization_initialized()) {
thread_t owner = thr_self();
if (global_mut_owner != owner) {
if (os::Solaris::mutex_lock(&global_mut))
@ -62,7 +61,7 @@ ThreadCritical::ThreadCritical() {
}
ThreadCritical::~ThreadCritical() {
if (initialized) {
if (os::Solaris::synchronization_initialized()) {
assert(global_mut_owner == thr_self(), "must have correct owner");
assert(global_mut_count > 0, "must have correct count");
--global_mut_count;
@ -75,12 +74,3 @@ ThreadCritical::~ThreadCritical() {
assert (Threads::number_of_threads() == 0, "valid only during initialization");
}
}
void ThreadCritical::initialize() {
// This method is called at the end of os::init(). Until
// then, we don't do real locking.
initialized = true;
}
void ThreadCritical::release() {
}

View File

@ -428,7 +428,7 @@ static unsigned __stdcall thread_native_entry(Thread* thread) {
// When the VMThread gets here, the main thread may have already exited
// which frees the CodeHeap containing the Atomic::add code
if (thread != VMThread::vm_thread() && VMThread::vm_thread() != NULL) {
Atomic::dec_ptr((intptr_t*)&os::win32::_os_thread_count);
Atomic::dec(&os::win32::_os_thread_count);
}
// If a thread has not deleted itself ("delete this") as part of its
@ -634,7 +634,7 @@ bool os::create_thread(Thread* thread, ThreadType thr_type,
return NULL;
}
Atomic::inc_ptr((intptr_t*)&os::win32::_os_thread_count);
Atomic::inc(&os::win32::_os_thread_count);
// Store info on the Win32 thread into the OSThread
osthread->set_thread_handle(thread_handle);

View File

@ -51,16 +51,6 @@ static DWORD lock_owner = -1;
// and found them ~30 times slower than the critical region code.
//
void ThreadCritical::initialize() {
}
void ThreadCritical::release() {
assert(lock_owner == -1, "Mutex being deleted while owned.");
assert(lock_count == -1, "Mutex being deleted while recursively locked");
assert(lock_event != NULL, "Sanity check");
CloseHandle(lock_event);
}
ThreadCritical::ThreadCritical() {
DWORD current_thread = GetCurrentThreadId();

View File

@ -34,22 +34,6 @@
// Implementation of class atomic
inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
inline jlong Atomic::load(const volatile jlong* src) { return *src; }
//
// machine barrier instructions:
//
@ -148,90 +132,15 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
return result;
}
inline void Atomic::inc (volatile jint* dest) {
unsigned int temp;
__asm__ __volatile__ (
strasm_nobarrier
"1: lwarx %0, 0, %2 \n"
" addic %0, %0, 1 \n"
" stwcx. %0, 0, %2 \n"
" bne- 1b \n"
strasm_nobarrier
: /*%0*/"=&r" (temp), "=m" (*dest)
: /*%2*/"r" (dest), "m" (*dest)
: "cc" strasm_nobarrier_clobber_memory);
}
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
long temp;
__asm__ __volatile__ (
strasm_nobarrier
"1: ldarx %0, 0, %2 \n"
" addic %0, %0, 1 \n"
" stdcx. %0, 0, %2 \n"
" bne- 1b \n"
strasm_nobarrier
: /*%0*/"=&r" (temp), "=m" (*dest)
: /*%2*/"r" (dest), "m" (*dest)
: "cc" strasm_nobarrier_clobber_memory);
}
inline void Atomic::inc_ptr(volatile void* dest) {
inc_ptr((volatile intptr_t*)dest);
}
inline void Atomic::dec (volatile jint* dest) {
unsigned int temp;
__asm__ __volatile__ (
strasm_nobarrier
"1: lwarx %0, 0, %2 \n"
" addic %0, %0, -1 \n"
" stwcx. %0, 0, %2 \n"
" bne- 1b \n"
strasm_nobarrier
: /*%0*/"=&r" (temp), "=m" (*dest)
: /*%2*/"r" (dest), "m" (*dest)
: "cc" strasm_nobarrier_clobber_memory);
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
long temp;
__asm__ __volatile__ (
strasm_nobarrier
"1: ldarx %0, 0, %2 \n"
" addic %0, %0, -1 \n"
" stdcx. %0, 0, %2 \n"
" bne- 1b \n"
strasm_nobarrier
: /*%0*/"=&r" (temp), "=m" (*dest)
: /*%2*/"r" (dest), "m" (*dest)
: "cc" strasm_nobarrier_clobber_memory);
}
inline void Atomic::dec_ptr(volatile void* dest) {
dec_ptr((volatile intptr_t*)dest);
}
inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(4 == sizeof(T));
// Note that xchg_ptr doesn't necessarily do an acquire
// (see synchronizer.cpp).
unsigned int old_value;
T old_value;
const uint64_t zero = 0;
__asm__ __volatile__ (
@ -259,15 +168,18 @@ inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
"memory"
);
return (jint) old_value;
return old_value;
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
// Note that xchg_ptr doesn't necessarily do an acquire
// (see synchronizer.cpp).
long old_value;
T old_value;
const uint64_t zero = 0;
__asm__ __volatile__ (
@ -295,11 +207,7 @@ inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* des
"memory"
);
return (intptr_t) old_value;
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
return old_value;
}
inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {

View File

@ -78,16 +78,17 @@ inline void OrderAccess::acquire() { inlasm_lwsync(); }
inline void OrderAccess::release() { inlasm_lwsync(); }
inline void OrderAccess::fence() { inlasm_sync(); }
template<> inline jbyte OrderAccess::specialized_load_acquire<jbyte> (const volatile jbyte* p) { register jbyte t = load(p); inlasm_acquire_reg(t); return t; }
template<> inline jshort OrderAccess::specialized_load_acquire<jshort>(const volatile jshort* p) { register jshort t = load(p); inlasm_acquire_reg(t); return t; }
template<> inline jint OrderAccess::specialized_load_acquire<jint> (const volatile jint* p) { register jint t = load(p); inlasm_acquire_reg(t); return t; }
template<> inline jlong OrderAccess::specialized_load_acquire<jlong> (const volatile jlong* p) { register jlong t = load(p); inlasm_acquire_reg(t); return t; }
template<size_t byte_size>
struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
T operator()(const volatile T* p) const { register T t = Atomic::load(p); inlasm_acquire_reg(t); return t; }
};
#undef inlasm_sync
#undef inlasm_lwsync
#undef inlasm_eieio
#undef inlasm_isync
#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
#endif // OS_CPU_AIX_OJDKPPC_VM_ORDERACCESS_AIX_PPC_INLINE_HPP

View File

@ -27,19 +27,6 @@
// Implementation of class atomic
inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
template<size_t byte_size>
struct Atomic::PlatformAdd
: Atomic::FetchAndAdd<Atomic::PlatformAdd<byte_size> >
@ -61,25 +48,11 @@ inline D Atomic::PlatformAdd<4>::fetch_and_add(I add_value, D volatile* dest) co
return old_value;
}
inline void Atomic::inc (volatile jint* dest) {
__asm__ volatile ( "lock addl $1,(%0)" :
: "r" (dest) : "cc", "memory");
}
inline void Atomic::inc_ptr(volatile void* dest) {
inc_ptr((volatile intptr_t*)dest);
}
inline void Atomic::dec (volatile jint* dest) {
__asm__ volatile ( "lock subl $1,(%0)" :
: "r" (dest) : "cc", "memory");
}
inline void Atomic::dec_ptr(volatile void* dest) {
dec_ptr((volatile intptr_t*)dest);
}
inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(4 == sizeof(T));
__asm__ volatile ( "xchgl (%2),%0"
: "=r" (exchange_value)
: "0" (exchange_value), "r" (dest)
@ -87,10 +60,6 @@ inline jint Atomic::xchg (jint exchange_value, volatile jint* des
return exchange_value;
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
}
template<>
template<typename T>
inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
@ -120,9 +89,6 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
}
#ifdef AMD64
inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
template<>
template<typename I, typename D>
inline D Atomic::PlatformAdd<8>::fetch_and_add(I add_value, D volatile* dest) const {
@ -136,21 +102,11 @@ inline D Atomic::PlatformAdd<8>::fetch_and_add(I add_value, D volatile* dest) co
return old_value;
}
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
__asm__ __volatile__ ( "lock addq $1,(%0)"
:
: "r" (dest)
: "cc", "memory");
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
__asm__ __volatile__ ( "lock subq $1,(%0)"
:
: "r" (dest)
: "cc", "memory");
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
__asm__ __volatile__ ("xchgq (%2),%0"
: "=r" (exchange_value)
: "0" (exchange_value), "r" (dest)
@ -172,22 +128,8 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
return exchange_value;
}
inline jlong Atomic::load(const volatile jlong* src) { return *src; }
#else // !AMD64
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
inc((volatile jint*)dest);
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
dec((volatile jint*)dest);
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
}
extern "C" {
// defined in bsd_x86.s
jlong _Atomic_cmpxchg_long(jlong, volatile jlong*, jlong, bool);
@ -204,18 +146,21 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
return cmpxchg_using_helper<jlong>(_Atomic_cmpxchg_long, exchange_value, dest, compare_value);
}
inline jlong Atomic::load(const volatile jlong* src) {
template<>
template<typename T>
inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
STATIC_ASSERT(8 == sizeof(T));
volatile jlong dest;
_Atomic_move_long(src, &dest);
return dest;
_Atomic_move_long(reinterpret_cast<const volatile jlong*>(src), reinterpret_cast<volatile jlong*>(&dest));
return PrimitiveConversions::cast<T>(dest);
}
inline void Atomic::store(jlong store_value, jlong* dest) {
_Atomic_move_long((volatile jlong*)&store_value, (volatile jlong*)dest);
}
inline void Atomic::store(jlong store_value, volatile jlong* dest) {
_Atomic_move_long((volatile jlong*)&store_value, dest);
template<>
template<typename T>
inline void Atomic::PlatformStore<8>::operator()(T store_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
_Atomic_move_long(reinterpret_cast<const volatile jlong*>(&store_value), reinterpret_cast<volatile jlong*>(dest));
}
#endif // AMD64

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -64,46 +64,57 @@ inline void OrderAccess::fence() {
}
template<>
inline void OrderAccess::specialized_release_store_fence<jbyte> (volatile jbyte* p, jbyte v) {
__asm__ volatile ( "xchgb (%2),%0"
: "=q" (v)
: "0" (v), "r" (p)
: "memory");
}
struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
void operator()(T v, volatile T* p) const {
__asm__ volatile ( "xchgb (%2),%0"
: "=q" (v)
: "0" (v), "r" (p)
: "memory");
}
};
template<>
inline void OrderAccess::specialized_release_store_fence<jshort>(volatile jshort* p, jshort v) {
__asm__ volatile ( "xchgw (%2),%0"
: "=r" (v)
: "0" (v), "r" (p)
: "memory");
}
struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
void operator()(T v, volatile T* p) const {
__asm__ volatile ( "xchgw (%2),%0"
: "=r" (v)
: "0" (v), "r" (p)
: "memory");
}
};
template<>
inline void OrderAccess::specialized_release_store_fence<jint> (volatile jint* p, jint v) {
__asm__ volatile ( "xchgl (%2),%0"
: "=r" (v)
: "0" (v), "r" (p)
: "memory");
}
struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
void operator()(T v, volatile T* p) const {
__asm__ volatile ( "xchgl (%2),%0"
: "=r" (v)
: "0" (v), "r" (p)
: "memory");
}
};
#ifdef AMD64
template<>
inline void OrderAccess::specialized_release_store_fence<jlong> (volatile jlong* p, jlong v) {
__asm__ volatile ( "xchgq (%2), %0"
: "=r" (v)
: "0" (v), "r" (p)
: "memory");
}
struct OrderAccess::PlatformOrderedStore<8, RELEASE_X_FENCE>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
void operator()(T v, volatile T* p) const {
__asm__ volatile ( "xchgq (%2), %0"
: "=r" (v)
: "0" (v), "r" (p)
: "memory");
}
};
#endif // AMD64
template<>
inline void OrderAccess::specialized_release_store_fence<jfloat> (volatile jfloat* p, jfloat v) {
release_store_fence((volatile jint*)p, jint_cast(v));
}
template<>
inline void OrderAccess::specialized_release_store_fence<jdouble>(volatile jdouble* p, jdouble v) {
release_store_fence((volatile jlong*)p, jlong_cast(v));
}
#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
#endif // OS_CPU_BSD_X86_VM_ORDERACCESS_BSD_X86_INLINE_HPP

View File

@ -87,7 +87,7 @@ static inline int m68k_add_and_fetch(int add_value, volatile int *ptr) {
/* Atomically write VALUE into `*PTR' and returns the previous
contents of `*PTR'. */
static inline int m68k_lock_test_and_set(volatile int *ptr, int newval) {
static inline int m68k_lock_test_and_set(int newval, volatile int *ptr) {
for (;;) {
// Loop until success.
int prev = *ptr;
@ -148,7 +148,7 @@ static inline int arm_add_and_fetch(int add_value, volatile int *ptr) {
/* Atomically write VALUE into `*PTR' and returns the previous
contents of `*PTR'. */
static inline int arm_lock_test_and_set(volatile int *ptr, int newval) {
static inline int arm_lock_test_and_set(int newval, volatile int *ptr) {
for (;;) {
// Loop until a __kernel_cmpxchg succeeds.
int prev = *ptr;
@ -159,20 +159,6 @@ static inline int arm_lock_test_and_set(volatile int *ptr, int newval) {
}
#endif // ARM
inline void Atomic::store(jint store_value, volatile jint* dest) {
#if !defined(ARM) && !defined(M68K)
__sync_synchronize();
#endif
*dest = store_value;
}
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) {
#if !defined(ARM) && !defined(M68K)
__sync_synchronize();
#endif
*dest = store_value;
}
template<size_t byte_size>
struct Atomic::PlatformAdd
: Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
@ -207,42 +193,22 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
return __sync_add_and_fetch(dest, add_value);
}
inline void Atomic::inc(volatile jint* dest) {
add(1, dest);
}
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
add_ptr(1, dest);
}
inline void Atomic::inc_ptr(volatile void* dest) {
add_ptr(1, dest);
}
inline void Atomic::dec(volatile jint* dest) {
add(-1, dest);
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
add_ptr(-1, dest);
}
inline void Atomic::dec_ptr(volatile void* dest) {
add_ptr(-1, dest);
}
inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(4 == sizeof(T));
#ifdef ARM
return arm_lock_test_and_set(dest, exchange_value);
return xchg_using_helper<int>(arm_lock_test_and_set, exchange_value, dest);
#else
#ifdef M68K
return m68k_lock_test_and_set(dest, exchange_value);
return xchg_using_helper<int>(m68k_lock_test_and_set, exchange_value, dest);
#else
// __sync_lock_test_and_set is a bizarrely named atomic exchange
// operation. Note that some platforms only support this with the
// limitation that the only valid value to store is the immediate
// constant 1. There is a test for this in JNI_CreateJavaVM().
jint result = __sync_lock_test_and_set (dest, exchange_value);
T result = __sync_lock_test_and_set (dest, exchange_value);
// All atomic operations are expected to be full memory barriers
// (see atomic.hpp). However, __sync_lock_test_and_set is not
// a full memory barrier, but an acquire barrier. Hence, this added
@ -253,24 +219,14 @@ inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
#endif // ARM
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value,
volatile intptr_t* dest) {
#ifdef ARM
return arm_lock_test_and_set(dest, exchange_value);
#else
#ifdef M68K
return m68k_lock_test_and_set(dest, exchange_value);
#else
intptr_t result = __sync_lock_test_and_set (dest, exchange_value);
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
T result = __sync_lock_test_and_set (dest, exchange_value);
__sync_synchronize();
return result;
#endif // M68K
#endif // ARM
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
return (void *) xchg_ptr((intptr_t) exchange_value,
(volatile intptr_t*) dest);
}
// No direct support for cmpxchg of bytes; emulate using int.
@ -305,18 +261,21 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
}
inline jlong Atomic::load(const volatile jlong* src) {
template<>
template<typename T>
inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
STATIC_ASSERT(8 == sizeof(T));
volatile jlong dest;
os::atomic_copy64(src, &dest);
return dest;
os::atomic_copy64(reinterpret_cast<const volatile jlong*>(src), reinterpret_cast<volatile jlong*>(&dest));
return PrimitiveConversions::cast<T>(dest);
}
inline void Atomic::store(jlong store_value, jlong* dest) {
os::atomic_copy64((volatile jlong*)&store_value, (volatile jlong*)dest);
}
inline void Atomic::store(jlong store_value, volatile jlong* dest) {
os::atomic_copy64((volatile jlong*)&store_value, dest);
template<>
template<typename T>
inline void Atomic::PlatformStore<8>::operator()(T store_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
os::atomic_copy64(reinterpret_cast<const volatile jlong*>(&store_value), reinterpret_cast<volatile jlong*>(dest));
}
#endif // OS_CPU_BSD_ZERO_VM_ATOMIC_BSD_ZERO_HPP

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright 2007, 2008, 2009 Red Hat, Inc.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -74,6 +74,4 @@ inline void OrderAccess::acquire() { LIGHT_MEM_BARRIER; }
inline void OrderAccess::release() { LIGHT_MEM_BARRIER; }
inline void OrderAccess::fence() { FULL_MEM_BARRIER; }
#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
#endif // OS_CPU_BSD_ZERO_VM_ORDERACCESS_BSD_ZERO_INLINE_HPP

View File

@ -34,19 +34,6 @@
#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE);
#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
template<size_t byte_size>
struct Atomic::PlatformAdd
: Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
@ -57,39 +44,16 @@ struct Atomic::PlatformAdd
}
};
inline void Atomic::inc(volatile jint* dest)
{
add(1, dest);
}
inline void Atomic::inc_ptr(volatile void* dest)
{
add_ptr(1, dest);
}
inline void Atomic::dec (volatile jint* dest)
{
add(-1, dest);
}
inline void Atomic::dec_ptr(volatile void* dest)
{
add_ptr(-1, dest);
}
inline jint Atomic::xchg (jint exchange_value, volatile jint* dest)
{
jint res = __sync_lock_test_and_set (dest, exchange_value);
template<size_t byte_size>
template<typename T>
inline T Atomic::PlatformXchg<byte_size>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(byte_size == sizeof(T));
T res = __sync_lock_test_and_set(dest, exchange_value);
FULL_MEM_BARRIER;
return res;
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest)
{
return (void *) xchg_ptr((intptr_t) exchange_value,
(volatile intptr_t*) dest);
}
template<size_t byte_size>
template<typename T>
inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T exchange_value,
@ -107,26 +71,4 @@ inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T exchange_value,
}
}
inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
inline void Atomic::inc_ptr(volatile intptr_t* dest)
{
add_ptr(1, dest);
}
inline void Atomic::dec_ptr(volatile intptr_t* dest)
{
add_ptr(-1, dest);
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest)
{
intptr_t res = __sync_lock_test_and_set (dest, exchange_value);
FULL_MEM_BARRIER;
return res;
}
inline jlong Atomic::load(const volatile jlong* src) { return *src; }
#endif // OS_CPU_LINUX_AARCH64_VM_ATOMIC_LINUX_AARCH64_HPP

View File

@ -50,93 +50,28 @@ inline void OrderAccess::fence() {
FULL_MEM_BARRIER;
}
inline jbyte OrderAccess::load_acquire(const volatile jbyte* p)
{ jbyte data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
inline jshort OrderAccess::load_acquire(const volatile jshort* p)
{ jshort data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
inline jint OrderAccess::load_acquire(const volatile jint* p)
{ jint data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
inline jlong OrderAccess::load_acquire(const volatile jlong* p)
{ jlong data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
inline jubyte OrderAccess::load_acquire(const volatile jubyte* p)
{ jubyte data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
inline jushort OrderAccess::load_acquire(const volatile jushort* p)
{ jushort data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
inline juint OrderAccess::load_acquire(const volatile juint* p)
{ juint data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
inline julong OrderAccess::load_acquire(const volatile julong* p)
{ julong data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
inline jfloat OrderAccess::load_acquire(const volatile jfloat* p)
{ jfloat data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
inline jdouble OrderAccess::load_acquire(const volatile jdouble* p)
{ jdouble data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
inline intptr_t OrderAccess::load_ptr_acquire(const volatile intptr_t* p)
{ intptr_t data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
inline void* OrderAccess::load_ptr_acquire(const volatile void* p)
{ void* data; __atomic_load((void* const volatile *)p, &data, __ATOMIC_ACQUIRE); return data; }
template<size_t byte_size>
struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
T operator()(const volatile T* p) const { T data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
};
inline void OrderAccess::release_store(volatile jbyte* p, jbyte v)
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
inline void OrderAccess::release_store(volatile jshort* p, jshort v)
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
inline void OrderAccess::release_store(volatile jint* p, jint v)
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
inline void OrderAccess::release_store(volatile jlong* p, jlong v)
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
inline void OrderAccess::release_store(volatile jubyte* p, jubyte v)
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
inline void OrderAccess::release_store(volatile jushort* p, jushort v)
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
inline void OrderAccess::release_store(volatile juint* p, juint v)
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
inline void OrderAccess::release_store(volatile julong* p, julong v)
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
inline void OrderAccess::release_store(volatile jfloat* p, jfloat v)
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
inline void OrderAccess::release_store(volatile jdouble* p, jdouble v)
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v)
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
inline void OrderAccess::release_store_ptr(volatile void* p, void* v)
{ __atomic_store((void* volatile *)p, &v, __ATOMIC_RELEASE); }
template<size_t byte_size>
struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
void operator()(T v, volatile T* p) const { __atomic_store(p, &v, __ATOMIC_RELEASE); }
};
inline void OrderAccess::store_fence(jbyte* p, jbyte v)
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
inline void OrderAccess::store_fence(jshort* p, jshort v)
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
inline void OrderAccess::store_fence(jint* p, jint v)
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
inline void OrderAccess::store_fence(jlong* p, jlong v)
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
inline void OrderAccess::store_fence(jubyte* p, jubyte v)
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
inline void OrderAccess::store_fence(jushort* p, jushort v)
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
inline void OrderAccess::store_fence(juint* p, juint v)
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
inline void OrderAccess::store_fence(julong* p, julong v)
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
inline void OrderAccess::store_fence(jfloat* p, jfloat v)
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
inline void OrderAccess::store_fence(jdouble* p, jdouble v)
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v)
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
inline void OrderAccess::store_ptr_fence(void** p, void* v)
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { release_store(p, v); fence(); }
inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { release_store(p, v); fence(); }
inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { release_store(p, v); fence(); }
inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { release_store(p, v); fence(); }
inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { release_store(p, v); fence(); }
inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { release_store(p, v); fence(); }
inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { release_store(p, v); fence(); }
inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { release_store(p, v); fence(); }
inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { release_store(p, v); fence(); }
inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store(p, v); fence(); }
inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { release_store_ptr(p, v); fence(); }
inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { release_store_ptr(p, v); fence(); }
template<size_t byte_size>
struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
void operator()(T v, volatile T* p) const { release_store(p, v); fence(); }
};
#endif // OS_CPU_LINUX_AARCH64_VM_ORDERACCESS_LINUX_AARCH64_INLINE_HPP

View File

@ -44,39 +44,24 @@
* kernel source or kernel_user_helpers.txt in Linux Doc.
*/
inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
#ifndef AARCH64
template<>
template<typename T>
inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
STATIC_ASSERT(8 == sizeof(T));
return PrimitiveConversions::cast<T>(
(*os::atomic_load_long_func)(reinterpret_cast<const volatile jlong*>(src)));
}
inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
inline jlong Atomic::load (const volatile jlong* src) {
assert(((intx)src & (sizeof(jlong)-1)) == 0, "Atomic load jlong mis-aligned");
#ifdef AARCH64
return *src;
#else
return (*os::atomic_load_long_func)(src);
template<>
template<typename T>
inline void Atomic::PlatformStore<8>::operator()(T store_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
(*os::atomic_store_long_func)(
PrimitiveConversions::cast<jlong>(store_value), reinterpret_cast<volatile jlong*>(dest));
}
#endif
}
inline void Atomic::store (jlong value, volatile jlong* dest) {
assert(((intx)dest & (sizeof(jlong)-1)) == 0, "Atomic store jlong mis-aligned");
#ifdef AARCH64
*dest = value;
#else
(*os::atomic_store_long_func)(value, dest);
#endif
}
inline void Atomic::store (jlong value, jlong* dest) {
store(value, (volatile jlong*)dest);
}
// As per atomic.hpp all read-modify-write operations have to provide two-way
// barriers semantics. For AARCH64 we are using load-acquire-with-reservation and
@ -122,14 +107,6 @@ inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) co
#endif
}
inline void Atomic::inc(volatile jint* dest) {
Atomic::add(1, (volatile jint *)dest);
}
inline void Atomic::dec(volatile jint* dest) {
Atomic::add(-1, (volatile jint *)dest);
}
#ifdef AARCH64
template<>
template<typename I, typename D>
@ -149,28 +126,15 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
: "memory");
return val;
}
#endif // AARCH64
#endif
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
Atomic::add_ptr(1, dest);
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
Atomic::add_ptr(-1, dest);
}
inline void Atomic::inc_ptr(volatile void* dest) {
inc_ptr((volatile intptr_t*)dest);
}
inline void Atomic::dec_ptr(volatile void* dest) {
dec_ptr((volatile intptr_t*)dest);
}
inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(4 == sizeof(T));
#ifdef AARCH64
jint old_val;
T old_val;
int tmp;
__asm__ volatile(
"1:\n\t"
@ -182,13 +146,17 @@ inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
: "memory");
return old_val;
#else
return (*os::atomic_xchg_func)(exchange_value, dest);
return xchg_using_helper<jint>(os::atomic_xchg_func, exchange_value, dest);
#endif
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
#ifdef AARCH64
intptr_t old_val;
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
T old_val;
int tmp;
__asm__ volatile(
"1:\n\t"
@ -199,14 +167,8 @@ inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* des
: [new_val] "r" (exchange_value), [dest] "r" (dest)
: "memory");
return old_val;
#else
return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
#endif
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
}
#endif // AARCH64
// The memory_order parameter is ignored - we always provide the strongest/most-conservative ordering

View File

@ -33,7 +33,6 @@
// - we define the high level barriers below and use the general
// implementation in orderAccess.inline.hpp, with customizations
// on AARCH64 via the specialized_* template functions
#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
// Memory Ordering on ARM is weak.
//
@ -131,91 +130,126 @@ inline void OrderAccess::fence() { dmb_sy(); }
#ifdef AARCH64
template<> inline jbyte OrderAccess::specialized_load_acquire<jbyte>(const volatile jbyte* p) {
volatile jbyte result;
__asm__ volatile(
"ldarb %w[res], [%[ptr]]"
: [res] "=&r" (result)
: [ptr] "r" (p)
: "memory");
return result;
}
template<>
struct OrderAccess::PlatformOrderedLoad<1, X_ACQUIRE>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
T operator()(const volatile T* p) const {
volatile T result;
__asm__ volatile(
"ldarb %w[res], [%[ptr]]"
: [res] "=&r" (result)
: [ptr] "r" (p)
: "memory");
return result;
}
};
template<> inline jshort OrderAccess::specialized_load_acquire<jshort>(const volatile jshort* p) {
volatile jshort result;
__asm__ volatile(
"ldarh %w[res], [%[ptr]]"
: [res] "=&r" (result)
: [ptr] "r" (p)
: "memory");
return result;
}
template<>
struct OrderAccess::PlatformOrderedLoad<2, X_ACQUIRE>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
T operator()(const volatile T* p) const {
volatile T result;
__asm__ volatile(
"ldarh %w[res], [%[ptr]]"
: [res] "=&r" (result)
: [ptr] "r" (p)
: "memory");
return result;
}
};
template<> inline jint OrderAccess::specialized_load_acquire<jint>(const volatile jint* p) {
volatile jint result;
__asm__ volatile(
"ldar %w[res], [%[ptr]]"
: [res] "=&r" (result)
: [ptr] "r" (p)
: "memory");
return result;
}
template<>
struct OrderAccess::PlatformOrderedLoad<4, X_ACQUIRE>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
T operator()(const volatile T* p) const {
volatile T result;
__asm__ volatile(
"ldar %w[res], [%[ptr]]"
: [res] "=&r" (result)
: [ptr] "r" (p)
: "memory");
return result;
}
};
template<> inline jfloat OrderAccess::specialized_load_acquire<jfloat>(const volatile jfloat* p) {
return jfloat_cast(specialized_load_acquire((const volatile jint*)p));
}
template<>
struct OrderAccess::PlatformOrderedLoad<8, X_ACQUIRE>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
T operator()(const volatile T* p) const {
volatile T result;
__asm__ volatile(
"ldar %[res], [%[ptr]]"
: [res] "=&r" (result)
: [ptr] "r" (p)
: "memory");
return result;
}
};
// This is implicit as jlong and intptr_t are both "long int"
//template<> inline jlong OrderAccess::specialized_load_acquire(const volatile jlong* p) {
// return (volatile jlong)specialized_load_acquire((const volatile intptr_t*)p);
//}
template<>
struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
void operator()(T v, volatile T* p) const {
__asm__ volatile(
"stlrb %w[val], [%[ptr]]"
:
: [ptr] "r" (p), [val] "r" (v)
: "memory");
}
};
template<> inline intptr_t OrderAccess::specialized_load_acquire<intptr_t>(const volatile intptr_t* p) {
volatile intptr_t result;
__asm__ volatile(
"ldar %[res], [%[ptr]]"
: [res] "=&r" (result)
: [ptr] "r" (p)
: "memory");
return result;
}
template<>
struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
void operator()(T v, volatile T* p) const {
__asm__ volatile(
"stlrh %w[val], [%[ptr]]"
:
: [ptr] "r" (p), [val] "r" (v)
: "memory");
}
};
template<> inline jdouble OrderAccess::specialized_load_acquire<jdouble>(const volatile jdouble* p) {
return jdouble_cast(specialized_load_acquire((const volatile intptr_t*)p));
}
template<>
struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
void operator()(T v, volatile T* p) const {
__asm__ volatile(
"stlr %w[val], [%[ptr]]"
:
: [ptr] "r" (p), [val] "r" (v)
: "memory");
}
};
template<>
struct OrderAccess::PlatformOrderedStore<8, RELEASE_X_FENCE>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
void operator()(T v, volatile T* p) const {
__asm__ volatile(
"stlr %[val], [%[ptr]]"
:
: [ptr] "r" (p), [val] "r" (v)
: "memory");
}
};
template<> inline void OrderAccess::specialized_release_store<jbyte>(volatile jbyte* p, jbyte v) {
__asm__ volatile(
"stlrb %w[val], [%[ptr]]"
:
: [ptr] "r" (p), [val] "r" (v)
: "memory");
}
template<> inline void OrderAccess::specialized_release_store<jshort>(volatile jshort* p, jshort v) {
__asm__ volatile(
"stlrh %w[val], [%[ptr]]"
:
: [ptr] "r" (p), [val] "r" (v)
: "memory");
}
template<> inline void OrderAccess::specialized_release_store<jint>(volatile jint* p, jint v) {
__asm__ volatile(
"stlr %w[val], [%[ptr]]"
:
: [ptr] "r" (p), [val] "r" (v)
: "memory");
}
template<> inline void OrderAccess::specialized_release_store<jlong>(volatile jlong* p, jlong v) {
__asm__ volatile(
"stlr %[val], [%[ptr]]"
:
: [ptr] "r" (p), [val] "r" (v)
: "memory");
}
#endif // AARCH64
#endif // OS_CPU_LINUX_ARM_VM_ORDERACCESS_LINUX_ARM_INLINE_HPP

View File

@ -32,22 +32,6 @@
// Implementation of class atomic
inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
inline jlong Atomic::load(const volatile jlong* src) { return *src; }
//
// machine barrier instructions:
//
@ -146,90 +130,14 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
return result;
}
inline void Atomic::inc (volatile jint* dest) {
unsigned int temp;
__asm__ __volatile__ (
strasm_nobarrier
"1: lwarx %0, 0, %2 \n"
" addic %0, %0, 1 \n"
" stwcx. %0, 0, %2 \n"
" bne- 1b \n"
strasm_nobarrier
: /*%0*/"=&r" (temp), "=m" (*dest)
: /*%2*/"r" (dest), "m" (*dest)
: "cc" strasm_nobarrier_clobber_memory);
}
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
long temp;
__asm__ __volatile__ (
strasm_nobarrier
"1: ldarx %0, 0, %2 \n"
" addic %0, %0, 1 \n"
" stdcx. %0, 0, %2 \n"
" bne- 1b \n"
strasm_nobarrier
: /*%0*/"=&r" (temp), "=m" (*dest)
: /*%2*/"r" (dest), "m" (*dest)
: "cc" strasm_nobarrier_clobber_memory);
}
inline void Atomic::inc_ptr(volatile void* dest) {
inc_ptr((volatile intptr_t*)dest);
}
inline void Atomic::dec (volatile jint* dest) {
unsigned int temp;
__asm__ __volatile__ (
strasm_nobarrier
"1: lwarx %0, 0, %2 \n"
" addic %0, %0, -1 \n"
" stwcx. %0, 0, %2 \n"
" bne- 1b \n"
strasm_nobarrier
: /*%0*/"=&r" (temp), "=m" (*dest)
: /*%2*/"r" (dest), "m" (*dest)
: "cc" strasm_nobarrier_clobber_memory);
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
long temp;
__asm__ __volatile__ (
strasm_nobarrier
"1: ldarx %0, 0, %2 \n"
" addic %0, %0, -1 \n"
" stdcx. %0, 0, %2 \n"
" bne- 1b \n"
strasm_nobarrier
: /*%0*/"=&r" (temp), "=m" (*dest)
: /*%2*/"r" (dest), "m" (*dest)
: "cc" strasm_nobarrier_clobber_memory);
}
inline void Atomic::dec_ptr(volatile void* dest) {
dec_ptr((volatile intptr_t*)dest);
}
inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
// Note that xchg_ptr doesn't necessarily do an acquire
// (see synchronizer.cpp).
unsigned int old_value;
T old_value;
const uint64_t zero = 0;
__asm__ __volatile__ (
@ -257,15 +165,18 @@ inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
"memory"
);
return (jint) old_value;
return old_value;
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
// Note that xchg_ptr doesn't necessarily do an acquire
// (see synchronizer.cpp).
long old_value;
T old_value;
const uint64_t zero = 0;
__asm__ __volatile__ (
@ -293,11 +204,7 @@ inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* des
"memory"
);
return (intptr_t) old_value;
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
return old_value;
}
inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {

View File

@ -80,10 +80,14 @@ inline void OrderAccess::acquire() { inlasm_lwsync(); }
inline void OrderAccess::release() { inlasm_lwsync(); }
inline void OrderAccess::fence() { inlasm_sync(); }
template<> inline jbyte OrderAccess::specialized_load_acquire<jbyte> (const volatile jbyte* p) { register jbyte t = load(p); inlasm_acquire_reg(t); return t; }
template<> inline jshort OrderAccess::specialized_load_acquire<jshort>(const volatile jshort* p) { register jshort t = load(p); inlasm_acquire_reg(t); return t; }
template<> inline jint OrderAccess::specialized_load_acquire<jint> (const volatile jint* p) { register jint t = load(p); inlasm_acquire_reg(t); return t; }
template<> inline jlong OrderAccess::specialized_load_acquire<jlong> (const volatile jlong* p) { register jlong t = load(p); inlasm_acquire_reg(t); return t; }
template<size_t byte_size>
struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
T operator()(const volatile T* p) const { register T t = Atomic::load(p); inlasm_acquire_reg(t); return t; }
};
#undef inlasm_sync
#undef inlasm_lwsync
@ -91,6 +95,4 @@ template<> inline jlong OrderAccess::specialized_load_acquire<jlong> (const vol
#undef inlasm_isync
#undef inlasm_acquire_reg
#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
#endif // OS_CPU_LINUX_PPC_VM_ORDERACCESS_LINUX_PPC_INLINE_HPP

View File

@ -53,20 +53,6 @@
// is an integer multiple of the data length. Furthermore, all stores are ordered:
// a store which occurs conceptually before another store becomes visible to other CPUs
// before the other store becomes visible.
inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
//------------
// Atomic::add
@ -192,219 +178,6 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I inc, D volatile* dest) const {
}
//------------
// Atomic::inc
//------------
// These methods force the value in memory to be incremented (augmented by 1).
// Both, memory value and increment, are treated as 32bit signed binary integers.
// No overflow exceptions are recognized, and the condition code does not hold
// information about the value in memory.
//
// The value in memory is updated by using a compare-and-swap instruction. The
// instruction is retried as often as required.
inline void Atomic::inc(volatile jint* dest) {
unsigned int old, upd;
if (VM_Version::has_LoadAndALUAtomicV1()) {
// tty->print_cr("Atomic::inc called... dest @%p", dest);
__asm__ __volatile__ (
" LGHI 2,1 \n\t" // load increment
" LA 3,%[mem] \n\t" // force data address into ARG2
// " LAA %[upd],%[inc],%[mem] \n\t" // increment and get old value
// " LAA 2,2,0(3) \n\t" // actually coded instruction
" .byte 0xeb \n\t" // LAA main opcode
" .byte 0x22 \n\t" // R1,R3
" .byte 0x30 \n\t" // R2,disp1
" .byte 0x00 \n\t" // disp2,disp3
" .byte 0x00 \n\t" // disp4,disp5
" .byte 0xf8 \n\t" // LAA minor opcode
" AGHI 2,1 \n\t" // calc new value in register
" LR %[upd],2 \n\t" // move to result register
//---< outputs >---
: [upd] "=&d" (upd) // write-only, updated counter value
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
:
// : [inc] "a" (inc) // read-only.
//---< clobbered >---
: "cc", "r2", "r3", "memory"
);
} else {
__asm__ __volatile__ (
" LLGF %[old],%[mem] \n\t" // get old value
"0: LA %[upd],1(,%[old]) \n\t" // calc result
" CS %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
" JNE 0b \n\t" // no success? -> retry
//---< outputs >---
: [old] "=&a" (old) // write-only, old counter value
, [upd] "=&d" (upd) // write-only, updated counter value
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
:
//---< clobbered >---
: "cc", "memory"
);
}
}
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
unsigned long old, upd;
if (VM_Version::has_LoadAndALUAtomicV1()) {
__asm__ __volatile__ (
" LGHI 2,1 \n\t" // load increment
" LA 3,%[mem] \n\t" // force data address into ARG2
// " LAAG %[upd],%[inc],%[mem] \n\t" // increment and get old value
// " LAAG 2,2,0(3) \n\t" // actually coded instruction
" .byte 0xeb \n\t" // LAA main opcode
" .byte 0x22 \n\t" // R1,R3
" .byte 0x30 \n\t" // R2,disp1
" .byte 0x00 \n\t" // disp2,disp3
" .byte 0x00 \n\t" // disp4,disp5
" .byte 0xe8 \n\t" // LAA minor opcode
" AGHI 2,1 \n\t" // calc new value in register
" LR %[upd],2 \n\t" // move to result register
//---< outputs >---
: [upd] "=&d" (upd) // write-only, updated counter value
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
:
// : [inc] "a" (inc) // read-only.
//---< clobbered >---
: "cc", "r2", "r3", "memory"
);
} else {
__asm__ __volatile__ (
" LG %[old],%[mem] \n\t" // get old value
"0: LA %[upd],1(,%[old]) \n\t" // calc result
" CSG %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
" JNE 0b \n\t" // no success? -> retry
//---< outputs >---
: [old] "=&a" (old) // write-only, old counter value
, [upd] "=&d" (upd) // write-only, updated counter value
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
:
//---< clobbered >---
: "cc", "memory"
);
}
}
inline void Atomic::inc_ptr(volatile void* dest) {
inc_ptr((volatile intptr_t*)dest);
}
//------------
// Atomic::dec
//------------
// These methods force the value in memory to be decremented (augmented by -1).
// Both, memory value and decrement, are treated as 32bit signed binary integers.
// No overflow exceptions are recognized, and the condition code does not hold
// information about the value in memory.
//
// The value in memory is updated by using a compare-and-swap instruction. The
// instruction is retried as often as required.
inline void Atomic::dec(volatile jint* dest) {
unsigned int old, upd;
if (VM_Version::has_LoadAndALUAtomicV1()) {
__asm__ __volatile__ (
" LGHI 2,-1 \n\t" // load increment
" LA 3,%[mem] \n\t" // force data address into ARG2
// " LAA %[upd],%[inc],%[mem] \n\t" // increment and get old value
// " LAA 2,2,0(3) \n\t" // actually coded instruction
" .byte 0xeb \n\t" // LAA main opcode
" .byte 0x22 \n\t" // R1,R3
" .byte 0x30 \n\t" // R2,disp1
" .byte 0x00 \n\t" // disp2,disp3
" .byte 0x00 \n\t" // disp4,disp5
" .byte 0xf8 \n\t" // LAA minor opcode
" AGHI 2,-1 \n\t" // calc new value in register
" LR %[upd],2 \n\t" // move to result register
//---< outputs >---
: [upd] "=&d" (upd) // write-only, updated counter value
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
:
// : [inc] "a" (inc) // read-only.
//---< clobbered >---
: "cc", "r2", "r3", "memory"
);
} else {
__asm__ __volatile__ (
" LLGF %[old],%[mem] \n\t" // get old value
// LAY not supported by inline assembler
// "0: LAY %[upd],-1(,%[old]) \n\t" // calc result
"0: LR %[upd],%[old] \n\t" // calc result
" AHI %[upd],-1 \n\t"
" CS %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
" JNE 0b \n\t" // no success? -> retry
//---< outputs >---
: [old] "=&a" (old) // write-only, old counter value
, [upd] "=&d" (upd) // write-only, updated counter value
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
:
//---< clobbered >---
: "cc", "memory"
);
}
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
unsigned long old, upd;
if (VM_Version::has_LoadAndALUAtomicV1()) {
__asm__ __volatile__ (
" LGHI 2,-1 \n\t" // load increment
" LA 3,%[mem] \n\t" // force data address into ARG2
// " LAAG %[upd],%[inc],%[mem] \n\t" // increment and get old value
// " LAAG 2,2,0(3) \n\t" // actually coded instruction
" .byte 0xeb \n\t" // LAA main opcode
" .byte 0x22 \n\t" // R1,R3
" .byte 0x30 \n\t" // R2,disp1
" .byte 0x00 \n\t" // disp2,disp3
" .byte 0x00 \n\t" // disp4,disp5
" .byte 0xe8 \n\t" // LAA minor opcode
" AGHI 2,-1 \n\t" // calc new value in register
" LR %[upd],2 \n\t" // move to result register
//---< outputs >---
: [upd] "=&d" (upd) // write-only, updated counter value
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
:
// : [inc] "a" (inc) // read-only.
//---< clobbered >---
: "cc", "r2", "r3", "memory"
);
} else {
__asm__ __volatile__ (
" LG %[old],%[mem] \n\t" // get old value
// LAY not supported by inline assembler
// "0: LAY %[upd],-1(,%[old]) \n\t" // calc result
"0: LGR %[upd],%[old] \n\t" // calc result
" AGHI %[upd],-1 \n\t"
" CSG %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
" JNE 0b \n\t" // no success? -> retry
//---< outputs >---
: [old] "=&a" (old) // write-only, old counter value
, [upd] "=&d" (upd) // write-only, updated counter value
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
:
//---< clobbered >---
: "cc", "memory"
);
}
}
inline void Atomic::dec_ptr(volatile void* dest) {
dec_ptr((volatile intptr_t*)dest);
}
//-------------
// Atomic::xchg
//-------------
@ -421,8 +194,12 @@ inline void Atomic::dec_ptr(volatile void* dest) {
//
// The return value is the (unchanged) value from memory as it was when the
// replacement succeeded.
inline jint Atomic::xchg (jint xchg_val, volatile jint* dest) {
unsigned int old;
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(4 == sizeof(T));
T old;
__asm__ __volatile__ (
" LLGF %[old],%[mem] \n\t" // get old value
@ -432,16 +209,20 @@ inline jint Atomic::xchg (jint xchg_val, volatile jint* dest) {
: [old] "=&d" (old) // write-only, prev value irrelevant
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
: [upd] "d" (xchg_val) // read-only, value to be written to memory
: [upd] "d" (exchange_value) // read-only, value to be written to memory
//---< clobbered >---
: "cc", "memory"
);
return (jint)old;
return old;
}
inline intptr_t Atomic::xchg_ptr(intptr_t xchg_val, volatile intptr_t* dest) {
unsigned long old;
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
T old;
__asm__ __volatile__ (
" LG %[old],%[mem] \n\t" // get old value
@ -451,16 +232,12 @@ inline intptr_t Atomic::xchg_ptr(intptr_t xchg_val, volatile intptr_t* dest) {
: [old] "=&d" (old) // write-only, init from memory
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
: [upd] "d" (xchg_val) // read-only, value to be written to memory
: [upd] "d" (exchange_value) // read-only, value to be written to memory
//---< clobbered >---
: "cc", "memory"
);
return (intptr_t)old;
}
inline void *Atomic::xchg_ptr(void *exchange_value, volatile void *dest) {
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
return old;
}
//----------------
@ -544,6 +321,4 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T xchg_val,
return old;
}
inline jlong Atomic::load(const volatile jlong* src) { return *src; }
#endif // OS_CPU_LINUX_S390_VM_ATOMIC_LINUX_S390_INLINE_HPP

View File

@ -74,10 +74,13 @@ inline void OrderAccess::acquire() { inlasm_zarch_acquire(); }
inline void OrderAccess::release() { inlasm_zarch_release(); }
inline void OrderAccess::fence() { inlasm_zarch_sync(); }
template<> inline jbyte OrderAccess::specialized_load_acquire<jbyte> (const volatile jbyte* p) { register jbyte t = *p; inlasm_zarch_acquire(); return t; }
template<> inline jshort OrderAccess::specialized_load_acquire<jshort>(const volatile jshort* p) { register jshort t = *p; inlasm_zarch_acquire(); return t; }
template<> inline jint OrderAccess::specialized_load_acquire<jint> (const volatile jint* p) { register jint t = *p; inlasm_zarch_acquire(); return t; }
template<> inline jlong OrderAccess::specialized_load_acquire<jlong> (const volatile jlong* p) { register jlong t = *p; inlasm_zarch_acquire(); return t; }
template<size_t byte_size>
struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
T operator()(const volatile T* p) const { register T t = *p; inlasm_zarch_acquire(); return t; }
};
#undef inlasm_compiler_barrier
#undef inlasm_zarch_sync
@ -85,8 +88,4 @@ template<> inline jlong OrderAccess::specialized_load_acquire<jlong> (const vol
#undef inlasm_zarch_acquire
#undef inlasm_zarch_fence
#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
#endif // OS_CPU_LINUX_S390_VM_ORDERACCESS_LINUX_S390_INLINE_HPP

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -448,11 +448,17 @@ JVM_handle_linux_signal(int sig,
}
else { // thread->thread_state() != _thread_in_Java
if (sig == SIGILL && VM_Version::is_determine_features_test_running()) {
// SIGILL must be caused by VM_Version::determine_features().
if ((sig == SIGILL) && VM_Version::is_determine_features_test_running()) {
// SIGILL must be caused by VM_Version::determine_features()
// when attempting to execute a non-existing instruction.
//*(int *) (pc-6)=0; // Patch instruction to 0 to indicate that it causes a SIGILL.
// Flushing of icache is not necessary.
stub = pc; // Continue with next instruction.
} else if ((sig == SIGFPE) && VM_Version::is_determine_features_test_running()) {
// SIGFPE is known to be caused by trying to execute a vector instruction
// when the vector facility is installed, but operating system support is missing.
VM_Version::reset_has_VectorFacility();
stub = pc; // Continue with next instruction.
} else if (thread->thread_state() == _thread_in_vm &&
sig == SIGBUS && thread->doing_unsafe_access()) {
// We don't really need a stub here! Just set the pending exeption and
@ -471,7 +477,7 @@ JVM_handle_linux_signal(int sig,
// Info->si_addr need not be the exact address, it is only
// guaranteed to be on the same page as the address that caused
// the SIGSEGV.
if ((sig == SIGSEGV) &&
if ((sig == SIGSEGV) && !UseMembar &&
(os::get_memory_serialize_page() ==
(address)((uintptr_t)info->si_addr & ~(os::vm_page_size()-1)))) {
return true;
@ -510,7 +516,7 @@ JVM_handle_linux_signal(int sig,
// Note: this should be combined with the trap_pc handling above,
// because it handles the same issue.
if (sig == SIGILL || sig == SIGFPE) {
pc = (address) info->si_addr;
pc = (address)info->si_addr;
}
VMError::report_and_die(t, sig, pc, info, ucVoid);

View File

@ -27,30 +27,6 @@
// Implementation of class atomic
inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
inline void Atomic::inc (volatile jint* dest) { (void)add (1, dest); }
inline void Atomic::inc_ptr(volatile intptr_t* dest) { (void)add_ptr(1, dest); }
inline void Atomic::inc_ptr(volatile void* dest) { (void)add_ptr(1, dest); }
inline void Atomic::dec (volatile jint* dest) { (void)add (-1, dest); }
inline void Atomic::dec_ptr(volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
inline void Atomic::dec_ptr(volatile void* dest) { (void)add_ptr(-1, dest); }
inline jlong Atomic::load(const volatile jlong* src) { return *src; }
template<size_t byte_size>
struct Atomic::PlatformAdd
: Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
@ -103,9 +79,12 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
return rv;
}
inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
intptr_t rv = exchange_value;
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(4 == sizeof(T));
T rv = exchange_value;
__asm__ volatile(
" swap [%2],%1\n\t"
: "=r" (rv)
@ -114,8 +93,12 @@ inline jint Atomic::xchg (jint exchange_value, volatile jint* des
return rv;
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
intptr_t rv = exchange_value;
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
T rv = exchange_value;
__asm__ volatile(
"1:\n\t"
" mov %1, %%o3\n\t"
@ -131,10 +114,6 @@ inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* des
return rv;
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
}
// No direct support for cmpxchg of bytes; emulate using int.
template<>
struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -48,6 +48,4 @@ inline void OrderAccess::fence() {
__asm__ volatile ("membar #StoreLoad" : : : "memory");
}
#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
#endif // OS_CPU_LINUX_SPARC_VM_ORDERACCESS_LINUX_SPARC_INLINE_HPP

View File

@ -27,19 +27,6 @@
// Implementation of class atomic
inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
template<size_t byte_size>
struct Atomic::PlatformAdd
: Atomic::FetchAndAdd<Atomic::PlatformAdd<byte_size> >
@ -61,25 +48,11 @@ inline D Atomic::PlatformAdd<4>::fetch_and_add(I add_value, D volatile* dest) co
return old_value;
}
inline void Atomic::inc (volatile jint* dest) {
__asm__ volatile ( "lock addl $1,(%0)" :
: "r" (dest) : "cc", "memory");
}
inline void Atomic::inc_ptr(volatile void* dest) {
inc_ptr((volatile intptr_t*)dest);
}
inline void Atomic::dec (volatile jint* dest) {
__asm__ volatile ( "lock subl $1,(%0)" :
: "r" (dest) : "cc", "memory");
}
inline void Atomic::dec_ptr(volatile void* dest) {
dec_ptr((volatile intptr_t*)dest);
}
inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(4 == sizeof(T));
__asm__ volatile ( "xchgl (%2),%0"
: "=r" (exchange_value)
: "0" (exchange_value), "r" (dest)
@ -87,10 +60,6 @@ inline jint Atomic::xchg (jint exchange_value, volatile jint* des
return exchange_value;
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
}
template<>
template<typename T>
inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
@ -120,8 +89,6 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
}
#ifdef AMD64
inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
template<>
template<typename I, typename D>
@ -136,21 +103,11 @@ inline D Atomic::PlatformAdd<8>::fetch_and_add(I add_value, D volatile* dest) co
return old_value;
}
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
__asm__ __volatile__ ("lock addq $1,(%0)"
:
: "r" (dest)
: "cc", "memory");
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
__asm__ __volatile__ ("lock subq $1,(%0)"
:
: "r" (dest)
: "cc", "memory");
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
__asm__ __volatile__ ("xchgq (%2),%0"
: "=r" (exchange_value)
: "0" (exchange_value), "r" (dest)
@ -172,22 +129,8 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
return exchange_value;
}
inline jlong Atomic::load(const volatile jlong* src) { return *src; }
#else // !AMD64
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
inc((volatile jint*)dest);
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
dec((volatile jint*)dest);
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
}
extern "C" {
// defined in linux_x86.s
jlong _Atomic_cmpxchg_long(jlong, volatile jlong*, jlong);
@ -204,18 +147,21 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
return cmpxchg_using_helper<jlong>(_Atomic_cmpxchg_long, exchange_value, dest, compare_value);
}
inline jlong Atomic::load(const volatile jlong* src) {
template<>
template<typename T>
inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
STATIC_ASSERT(8 == sizeof(T));
volatile jlong dest;
_Atomic_move_long(src, &dest);
return dest;
_Atomic_move_long(reinterpret_cast<const volatile jlong*>(src), reinterpret_cast<volatile jlong*>(&dest));
return PrimitiveConversions::cast<T>(dest);
}
inline void Atomic::store(jlong store_value, jlong* dest) {
_Atomic_move_long((volatile jlong*)&store_value, (volatile jlong*)dest);
}
inline void Atomic::store(jlong store_value, volatile jlong* dest) {
_Atomic_move_long((volatile jlong*)&store_value, dest);
template<>
template<typename T>
inline void Atomic::PlatformStore<8>::operator()(T store_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
_Atomic_move_long(reinterpret_cast<const volatile jlong*>(&store_value), reinterpret_cast<volatile jlong*>(dest));
}
#endif // AMD64

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -60,46 +60,57 @@ inline void OrderAccess::fence() {
}
template<>
inline void OrderAccess::specialized_release_store_fence<jbyte> (volatile jbyte* p, jbyte v) {
__asm__ volatile ( "xchgb (%2),%0"
: "=q" (v)
: "0" (v), "r" (p)
: "memory");
}
struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
void operator()(T v, volatile T* p) const {
__asm__ volatile ( "xchgb (%2),%0"
: "=q" (v)
: "0" (v), "r" (p)
: "memory");
}
};
template<>
inline void OrderAccess::specialized_release_store_fence<jshort>(volatile jshort* p, jshort v) {
__asm__ volatile ( "xchgw (%2),%0"
: "=r" (v)
: "0" (v), "r" (p)
: "memory");
}
struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
void operator()(T v, volatile T* p) const {
__asm__ volatile ( "xchgw (%2),%0"
: "=r" (v)
: "0" (v), "r" (p)
: "memory");
}
};
template<>
inline void OrderAccess::specialized_release_store_fence<jint> (volatile jint* p, jint v) {
__asm__ volatile ( "xchgl (%2),%0"
: "=r" (v)
: "0" (v), "r" (p)
: "memory");
}
struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
void operator()(T v, volatile T* p) const {
__asm__ volatile ( "xchgl (%2),%0"
: "=r" (v)
: "0" (v), "r" (p)
: "memory");
}
};
#ifdef AMD64
template<>
inline void OrderAccess::specialized_release_store_fence<jlong> (volatile jlong* p, jlong v) {
__asm__ volatile ( "xchgq (%2), %0"
: "=r" (v)
: "0" (v), "r" (p)
: "memory");
}
struct OrderAccess::PlatformOrderedStore<8, RELEASE_X_FENCE>
VALUE_OBJ_CLASS_SPEC
{
template <typename T>
void operator()(T v, volatile T* p) const {
__asm__ volatile ( "xchgq (%2), %0"
: "=r" (v)
: "0" (v), "r" (p)
: "memory");
}
};
#endif // AMD64
template<>
inline void OrderAccess::specialized_release_store_fence<jfloat> (volatile jfloat* p, jfloat v) {
release_store_fence((volatile jint*)p, jint_cast(v));
}
template<>
inline void OrderAccess::specialized_release_store_fence<jdouble>(volatile jdouble* p, jdouble v) {
release_store_fence((volatile jlong*)p, jlong_cast(v));
}
#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
#endif // OS_CPU_LINUX_X86_VM_ORDERACCESS_LINUX_X86_INLINE_HPP

View File

@ -87,7 +87,7 @@ static inline int m68k_add_and_fetch(int add_value, volatile int *ptr) {
/* Atomically write VALUE into `*PTR' and returns the previous
contents of `*PTR'. */
static inline int m68k_lock_test_and_set(volatile int *ptr, int newval) {
static inline int m68k_lock_test_and_set(int newval, volatile int *ptr) {
for (;;) {
// Loop until success.
int prev = *ptr;
@ -148,7 +148,7 @@ static inline int arm_add_and_fetch(int add_value, volatile int *ptr) {
/* Atomically write VALUE into `*PTR' and returns the previous
contents of `*PTR'. */
static inline int arm_lock_test_and_set(volatile int *ptr, int newval) {
static inline int arm_lock_test_and_set(int newval, volatile int *ptr) {
for (;;) {
// Loop until a __kernel_cmpxchg succeeds.
int prev = *ptr;
@ -159,14 +159,6 @@ static inline int arm_lock_test_and_set(volatile int *ptr, int newval) {
}
#endif // ARM
inline void Atomic::store(jint store_value, volatile jint* dest) {
*dest = store_value;
}
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) {
*dest = store_value;
}
template<size_t byte_size>
struct Atomic::PlatformAdd
: Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
@ -201,42 +193,22 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
return __sync_add_and_fetch(dest, add_value);
}
inline void Atomic::inc(volatile jint* dest) {
add(1, dest);
}
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
add_ptr(1, dest);
}
inline void Atomic::inc_ptr(volatile void* dest) {
add_ptr(1, dest);
}
inline void Atomic::dec(volatile jint* dest) {
add(-1, dest);
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
add_ptr(-1, dest);
}
inline void Atomic::dec_ptr(volatile void* dest) {
add_ptr(-1, dest);
}
inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(4 == sizeof(T));
#ifdef ARM
return arm_lock_test_and_set(dest, exchange_value);
return xchg_using_helper<int>(arm_lock_test_and_set, exchange_value, dest);
#else
#ifdef M68K
return m68k_lock_test_and_set(dest, exchange_value);
return xchg_using_helper<int>(m68k_lock_test_and_set, exchange_value, dest);
#else
// __sync_lock_test_and_set is a bizarrely named atomic exchange
// operation. Note that some platforms only support this with the
// limitation that the only valid value to store is the immediate
// constant 1. There is a test for this in JNI_CreateJavaVM().
jint result = __sync_lock_test_and_set (dest, exchange_value);
T result = __sync_lock_test_and_set (dest, exchange_value);
// All atomic operations are expected to be full memory barriers
// (see atomic.hpp). However, __sync_lock_test_and_set is not
// a full memory barrier, but an acquire barrier. Hence, this added
@ -247,24 +219,14 @@ inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
#endif // ARM
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value,
volatile intptr_t* dest) {
#ifdef ARM
return arm_lock_test_and_set(dest, exchange_value);
#else
#ifdef M68K
return m68k_lock_test_and_set(dest, exchange_value);
#else
intptr_t result = __sync_lock_test_and_set (dest, exchange_value);
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
T result = __sync_lock_test_and_set (dest, exchange_value);
__sync_synchronize();
return result;
#endif // M68K
#endif // ARM
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
return (void *) xchg_ptr((intptr_t) exchange_value,
(volatile intptr_t*) dest);
}
// No direct support for cmpxchg of bytes; emulate using int.
@ -299,18 +261,21 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
}
inline jlong Atomic::load(const volatile jlong* src) {
template<>
template<typename T>
inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
STATIC_ASSERT(8 == sizeof(T));
volatile jlong dest;
os::atomic_copy64(src, &dest);
return dest;
os::atomic_copy64(reinterpret_cast<const volatile jlong*>(src), reinterpret_cast<volatile jlong*>(&dest));
return PrimitiveConversions::cast<T>(dest);
}
inline void Atomic::store(jlong store_value, jlong* dest) {
os::atomic_copy64((volatile jlong*)&store_value, (volatile jlong*)dest);
}
inline void Atomic::store(jlong store_value, volatile jlong* dest) {
os::atomic_copy64((volatile jlong*)&store_value, dest);
template<>
template<typename T>
inline void Atomic::PlatformStore<8>::operator()(T store_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
os::atomic_copy64(reinterpret_cast<const volatile jlong*>(&store_value), reinterpret_cast<volatile jlong*>(dest));
}
#endif // OS_CPU_LINUX_ZERO_VM_ATOMIC_LINUX_ZERO_HPP

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright 2007, 2008, 2009 Red Hat, Inc.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -56,8 +56,16 @@ typedef void (__kernel_dmb_t) (void);
#else // PPC
#ifdef ALPHA
#define LIGHT_MEM_BARRIER __sync_synchronize()
#else // ALPHA
#define LIGHT_MEM_BARRIER __asm __volatile ("":::"memory")
#endif // ALPHA
#endif // PPC
#endif // ARM
@ -75,6 +83,4 @@ inline void OrderAccess::release() { LIGHT_MEM_BARRIER; }
inline void OrderAccess::fence() { FULL_MEM_BARRIER; }
#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
#endif // OS_CPU_LINUX_ZERO_VM_ORDERACCESS_LINUX_ZERO_INLINE_HPP

View File

@ -27,41 +27,6 @@
// Implementation of class atomic
inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
inline void Atomic::inc (volatile jint* dest) { (void)add (1, dest); }
inline void Atomic::inc_ptr(volatile intptr_t* dest) { (void)add_ptr(1, dest); }
inline void Atomic::inc_ptr(volatile void* dest) { (void)add_ptr(1, dest); }
inline void Atomic::dec (volatile jint* dest) { (void)add (-1, dest); }
inline void Atomic::dec_ptr(volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
inline void Atomic::dec_ptr(volatile void* dest) { (void)add_ptr(-1, dest); }
inline void Atomic::store(jlong store_value, jlong* dest) { *dest = store_value; }
inline void Atomic::store(jlong store_value, volatile jlong* dest) { *dest = store_value; }
inline jlong Atomic::load(const volatile jlong* src) { return *src; }
// This is the interface to the atomic instructions in solaris_sparc.il.
// It's very messy because we need to support v8 and these instructions
// are illegal there. When sparc v8 is dropped, we can drop out lots of
// this code. Also compiler2 does not support v8 so the conditional code
// omits the instruction set check.
extern "C" jint _Atomic_swap32(jint exchange_value, volatile jint* dest);
extern "C" intptr_t _Atomic_swap64(intptr_t exchange_value, volatile intptr_t* dest);
// Implement ADD using a CAS loop.
template<size_t byte_size>
struct Atomic::PlatformAdd VALUE_OBJ_CLASS_SPEC {
@ -78,16 +43,30 @@ struct Atomic::PlatformAdd VALUE_OBJ_CLASS_SPEC {
}
};
inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
return _Atomic_swap32(exchange_value, dest);
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(4 == sizeof(T));
__asm__ volatile ( "swap [%2],%0"
: "=r" (exchange_value)
: "0" (exchange_value), "r" (dest)
: "memory");
return exchange_value;
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
return _Atomic_swap64(exchange_value, dest);
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
T old_value = *dest;
while (true) {
T result = cmpxchg(exchange_value, dest, old_value);
if (result == old_value) break;
old_value = result;
}
return old_value;
}
// No direct support for cmpxchg of bytes; emulate using int.

Some files were not shown because too many files have changed in this diff Show More