Merge
This commit is contained in:
commit
947fc09db0
@ -113,6 +113,7 @@ PLATFORM_MODULES += \
|
||||
jdk.dynalink \
|
||||
jdk.httpserver \
|
||||
jdk.incubator.httpclient \
|
||||
jdk.internal.vm.compiler.management \
|
||||
jdk.jsobject \
|
||||
jdk.localedata \
|
||||
jdk.naming.dns \
|
||||
@ -215,6 +216,7 @@ endif
|
||||
|
||||
ifeq ($(INCLUDE_GRAAL), false)
|
||||
MODULES_FILTER += jdk.internal.vm.compiler
|
||||
MODULES_FILTER += jdk.internal.vm.compiler.management
|
||||
endif
|
||||
|
||||
################################################################################
|
||||
|
@ -1063,7 +1063,7 @@ var getJibProfilesDependencies = function (input, common) {
|
||||
jtreg: {
|
||||
server: "javare",
|
||||
revision: "4.2",
|
||||
build_number: "b08",
|
||||
build_number: "b09",
|
||||
checksum_file: "MD5_VALUES",
|
||||
file: "jtreg_bin-4.2.zip",
|
||||
environment_name: "JT_HOME",
|
||||
|
@ -54,15 +54,4 @@ $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/module/ModuleLoaderMap.java:
|
||||
|
||||
GENSRC_JAVA_BASE += $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/module/ModuleLoaderMap.java
|
||||
|
||||
$(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat: \
|
||||
$(TOPDIR)/src/java.base/share/classes/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat \
|
||||
$(VARDEPS_FILE) $(BUILD_TOOLS_JDK)
|
||||
$(MKDIR) -p $(@D)
|
||||
$(RM) $@ $@.tmp
|
||||
$(TOOL_GENCLASSLOADERMAP) -boot $(BOOT_MODULES_LIST) \
|
||||
-platform $(PLATFORM_MODULES_LIST) -o $@.tmp $<
|
||||
$(MV) $@.tmp $@
|
||||
|
||||
GENSRC_JAVA_BASE += $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat
|
||||
|
||||
################################################################################
|
||||
|
@ -47,6 +47,9 @@ endif
|
||||
ifeq ($(call check-jvm-feature, zero), true)
|
||||
JVM_CFLAGS_FEATURES += -DZERO -DCC_INTERP -DZERO_LIBARCH='"$(OPENJDK_TARGET_CPU_LEGACY_LIB)"' $(LIBFFI_CFLAGS)
|
||||
JVM_LIBS_FEATURES += $(LIBFFI_LIBS)
|
||||
ifeq ($(OPENJDK_TARGET_CPU), sparcv9)
|
||||
BUILD_LIBJVM_EXTRA_FILES := $(TOPDIR)/src/hotspot/cpu/sparc/memset_with_concurrent_readers_sparc.cpp
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(call check-jvm-feature, shark), true)
|
||||
|
@ -77,30 +77,22 @@ public class GenModuleLoaderMap {
|
||||
throw new IllegalArgumentException(source + " not exist");
|
||||
}
|
||||
|
||||
boolean needsQuotes = outfile.toString().contains(".java.tmp");
|
||||
|
||||
try (BufferedWriter bw = Files.newBufferedWriter(outfile, StandardCharsets.UTF_8);
|
||||
PrintWriter writer = new PrintWriter(bw)) {
|
||||
for (String line : Files.readAllLines(source)) {
|
||||
if (line.contains("@@BOOT_MODULE_NAMES@@")) {
|
||||
line = patch(line, "@@BOOT_MODULE_NAMES@@", bootModules, needsQuotes);
|
||||
line = patch(line, "@@BOOT_MODULE_NAMES@@", bootModules);
|
||||
} else if (line.contains("@@PLATFORM_MODULE_NAMES@@")) {
|
||||
line = patch(line, "@@PLATFORM_MODULE_NAMES@@", platformModules, needsQuotes);
|
||||
line = patch(line, "@@PLATFORM_MODULE_NAMES@@", platformModules);
|
||||
}
|
||||
writer.println(line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static String patch(String s, String tag, Stream<String> stream, boolean needsQuotes) {
|
||||
String mns = null;
|
||||
if (needsQuotes) {
|
||||
mns = stream.sorted()
|
||||
.collect(Collectors.joining("\",\n \""));
|
||||
} else {
|
||||
mns = stream.sorted()
|
||||
.collect(Collectors.joining("\n"));
|
||||
}
|
||||
private static String patch(String s, String tag, Stream<String> stream) {
|
||||
String mns = stream.sorted()
|
||||
.collect(Collectors.joining("\",\n \""));
|
||||
return s.replace(tag, mns);
|
||||
}
|
||||
|
||||
|
@ -59,6 +59,7 @@ BUILD_HOTSPOT_JTREG_NATIVE_SRC += \
|
||||
$(TOPDIR)/test/hotspot/jtreg/runtime/SameObject \
|
||||
$(TOPDIR)/test/hotspot/jtreg/runtime/BoolReturn \
|
||||
$(TOPDIR)/test/hotspot/jtreg/runtime/noClassDefFoundMsg \
|
||||
$(TOPDIR)/test/hotspot/jtreg/runtime/RedefineTests \
|
||||
$(TOPDIR)/test/hotspot/jtreg/compiler/floatingpoint/ \
|
||||
$(TOPDIR)/test/hotspot/jtreg/compiler/calls \
|
||||
$(TOPDIR)/test/hotspot/jtreg/serviceability/jvmti/GetOwnedMonitorInfo \
|
||||
@ -103,6 +104,7 @@ ifeq ($(TOOLCHAIN_TYPE), solstudio)
|
||||
BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libMAAClassLoadPrepare := -lc
|
||||
BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libMAAThreadStart := -lc
|
||||
BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libAllowedFunctions := -lc
|
||||
BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libRedefineDoubleDelete := -lc
|
||||
endif
|
||||
|
||||
ifeq ($(OPENJDK_TARGET_OS), linux)
|
||||
|
@ -70,7 +70,7 @@
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
<sourceEntries>
|
||||
<entry excluding="cpu/vm/templateTable_x86_32.cpp|cpu/vm/templateInterpreter_x86_32.cpp|cpu/vm/stubRoutines_x86_32.cpp|cpu/vm/stubGenerator_x86_32.cpp|cpu/vm/sharedRuntime_x86_32.cpp|cpu/vm/jniFastGetField_x86_32.cpp|cpu/vm/interpreterRT_x86_32.cpp|cpu/vm/interpreter_x86_32.cpp|cpu/vm/interp_masm_x86_32.cpp|cpu/vm/vtableStubs_x86_32.cpp" flags="VALUE_WORKSPACE_PATH" kind="sourcePath" name=""/>
|
||||
<entry excluding="cpu/x86/templateTable_x86_32.cpp|cpu/x86/templateInterpreter_x86_32.cpp|cpu/x86/stubRoutines_x86_32.cpp|cpu/x86/stubGenerator_x86_32.cpp|cpu/x86/sharedRuntime_x86_32.cpp|cpu/x86/jniFastGetField_x86_32.cpp|cpu/x86/interpreterRT_x86_32.cpp|cpu/x86/interpreter_x86_32.cpp|cpu/x86/interp_masm_x86_32.cpp|cpu/x86/vtableStubs_x86_32.cpp" flags="VALUE_WORKSPACE_PATH" kind="sourcePath" name=""/>
|
||||
</sourceEntries>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
|
@ -256,14 +256,10 @@ class HotSpotProject(mx.NativeProject):
|
||||
"""
|
||||
|
||||
roots = [
|
||||
'ASSEMBLY_EXCEPTION',
|
||||
'LICENSE',
|
||||
'README',
|
||||
'THIRD_PARTY_README',
|
||||
'agent',
|
||||
'make',
|
||||
'src',
|
||||
'test'
|
||||
'cpu',
|
||||
'os',
|
||||
'os_cpu',
|
||||
'share'
|
||||
]
|
||||
|
||||
for jvmVariant in _jdkJvmVariants:
|
||||
@ -605,6 +601,16 @@ def _get_openjdk_cpu():
|
||||
def _get_openjdk_os_cpu():
|
||||
return _get_openjdk_os() + '-' + _get_openjdk_cpu()
|
||||
|
||||
def _get_jdk_dir():
|
||||
suiteParentDir = dirname(_suite.dir)
|
||||
# suitParentDir is now something like: /some_prefix/jdk10-hs/open/src
|
||||
pathComponents = suiteParentDir.split(os.sep)
|
||||
for i in range(0, len(pathComponents)):
|
||||
if pathComponents[i] in ["open", "src"]:
|
||||
del pathComponents[i:]
|
||||
break
|
||||
return os.path.join(os.sep, *pathComponents)
|
||||
|
||||
def _get_jdk_build_dir(debugLevel=None):
|
||||
"""
|
||||
Gets the directory into which the JDK is built. This directory contains
|
||||
@ -613,7 +619,7 @@ def _get_jdk_build_dir(debugLevel=None):
|
||||
if debugLevel is None:
|
||||
debugLevel = _vm.debugLevel
|
||||
name = '{}-{}-{}-{}'.format(_get_openjdk_os_cpu(), 'normal', _vm.jvmVariant, debugLevel)
|
||||
return join(dirname(_suite.dir), 'build', name)
|
||||
return join(_get_jdk_dir(), 'build', name)
|
||||
|
||||
_jvmci_bootclasspath_prepends = []
|
||||
|
||||
|
@ -24,9 +24,7 @@ suite = {
|
||||
|
||||
"defaultLicense" : "GPLv2-CPE",
|
||||
|
||||
# This puts mx/ as a sibling of the JDK build configuration directories
|
||||
# (e.g., macosx-x86_64-normal-server-release).
|
||||
"outputRoot" : "../build/mx/hotspot",
|
||||
"outputRoot" : "../../build/mx/hotspot",
|
||||
|
||||
# ------------- Libraries -------------
|
||||
|
||||
@ -43,7 +41,7 @@ suite = {
|
||||
# ------------- JVMCI:Service -------------
|
||||
|
||||
"jdk.vm.ci.services" : {
|
||||
"subDir" : "src/jdk.internal.vm.ci/share/classes",
|
||||
"subDir" : "../jdk.internal.vm.ci/share/classes",
|
||||
"sourceDirs" : ["src"],
|
||||
"javaCompliance" : "9",
|
||||
"workingSets" : "API,JVMCI",
|
||||
@ -52,7 +50,7 @@ suite = {
|
||||
# ------------- JVMCI:API -------------
|
||||
|
||||
"jdk.vm.ci.common" : {
|
||||
"subDir" : "src/jdk.internal.vm.ci/share/classes",
|
||||
"subDir" : "../jdk.internal.vm.ci/share/classes",
|
||||
"sourceDirs" : ["src"],
|
||||
"checkstyle" : "jdk.vm.ci.services",
|
||||
"javaCompliance" : "9",
|
||||
@ -60,7 +58,7 @@ suite = {
|
||||
},
|
||||
|
||||
"jdk.vm.ci.meta" : {
|
||||
"subDir" : "src/jdk.internal.vm.ci/share/classes",
|
||||
"subDir" : "../jdk.internal.vm.ci/share/classes",
|
||||
"sourceDirs" : ["src"],
|
||||
"checkstyle" : "jdk.vm.ci.services",
|
||||
"javaCompliance" : "9",
|
||||
@ -68,7 +66,7 @@ suite = {
|
||||
},
|
||||
|
||||
"jdk.vm.ci.code" : {
|
||||
"subDir" : "src/jdk.internal.vm.ci/share/classes",
|
||||
"subDir" : "../jdk.internal.vm.ci/share/classes",
|
||||
"sourceDirs" : ["src"],
|
||||
"dependencies" : ["jdk.vm.ci.meta"],
|
||||
"checkstyle" : "jdk.vm.ci.services",
|
||||
@ -77,7 +75,7 @@ suite = {
|
||||
},
|
||||
|
||||
"jdk.vm.ci.code.test" : {
|
||||
"subDir" : "test/compiler/jvmci",
|
||||
"subDir" : "../../test/hotspot/jtreg/compiler/jvmci",
|
||||
"sourceDirs" : ["src"],
|
||||
"dependencies" : [
|
||||
"mx:JUNIT",
|
||||
@ -92,7 +90,7 @@ suite = {
|
||||
},
|
||||
|
||||
"jdk.vm.ci.runtime" : {
|
||||
"subDir" : "src/jdk.internal.vm.ci/share/classes",
|
||||
"subDir" : "../jdk.internal.vm.ci/share/classes",
|
||||
"sourceDirs" : ["src"],
|
||||
"dependencies" : [
|
||||
"jdk.vm.ci.code",
|
||||
@ -104,7 +102,7 @@ suite = {
|
||||
},
|
||||
|
||||
"jdk.vm.ci.runtime.test" : {
|
||||
"subDir" : "test/compiler/jvmci",
|
||||
"subDir" : "../../test/hotspot/jtreg/compiler/jvmci",
|
||||
"sourceDirs" : ["src"],
|
||||
"dependencies" : [
|
||||
"mx:JUNIT",
|
||||
@ -119,7 +117,7 @@ suite = {
|
||||
# ------------- JVMCI:HotSpot -------------
|
||||
|
||||
"jdk.vm.ci.aarch64" : {
|
||||
"subDir" : "src/jdk.internal.vm.ci/share/classes",
|
||||
"subDir" : "../jdk.internal.vm.ci/share/classes",
|
||||
"sourceDirs" : ["src"],
|
||||
"dependencies" : ["jdk.vm.ci.code"],
|
||||
"checkstyle" : "jdk.vm.ci.services",
|
||||
@ -128,7 +126,7 @@ suite = {
|
||||
},
|
||||
|
||||
"jdk.vm.ci.amd64" : {
|
||||
"subDir" : "src/jdk.internal.vm.ci/share/classes",
|
||||
"subDir" : "../jdk.internal.vm.ci/share/classes",
|
||||
"sourceDirs" : ["src"],
|
||||
"dependencies" : ["jdk.vm.ci.code"],
|
||||
"checkstyle" : "jdk.vm.ci.services",
|
||||
@ -137,7 +135,7 @@ suite = {
|
||||
},
|
||||
|
||||
"jdk.vm.ci.sparc" : {
|
||||
"subDir" : "src/jdk.internal.vm.ci/share/classes",
|
||||
"subDir" : "../jdk.internal.vm.ci/share/classes",
|
||||
"sourceDirs" : ["src"],
|
||||
"dependencies" : ["jdk.vm.ci.code"],
|
||||
"checkstyle" : "jdk.vm.ci.services",
|
||||
@ -146,7 +144,7 @@ suite = {
|
||||
},
|
||||
|
||||
"jdk.vm.ci.hotspot" : {
|
||||
"subDir" : "src/jdk.internal.vm.ci/share/classes",
|
||||
"subDir" : "../jdk.internal.vm.ci/share/classes",
|
||||
"sourceDirs" : ["src"],
|
||||
"dependencies" : [
|
||||
"jdk.vm.ci.common",
|
||||
@ -163,7 +161,7 @@ suite = {
|
||||
},
|
||||
|
||||
"jdk.vm.ci.hotspot.test" : {
|
||||
"subDir" : "test/compiler/jvmci",
|
||||
"subDir" : "../../test/hotspot/jtreg/compiler/jvmci",
|
||||
"sourceDirs" : ["src"],
|
||||
"dependencies" : [
|
||||
"TESTNG",
|
||||
@ -175,7 +173,7 @@ suite = {
|
||||
},
|
||||
|
||||
"jdk.vm.ci.hotspot.aarch64" : {
|
||||
"subDir" : "src/jdk.internal.vm.ci/share/classes",
|
||||
"subDir" : "../jdk.internal.vm.ci/share/classes",
|
||||
"sourceDirs" : ["src"],
|
||||
"dependencies" : [
|
||||
"jdk.vm.ci.aarch64",
|
||||
@ -187,7 +185,7 @@ suite = {
|
||||
},
|
||||
|
||||
"jdk.vm.ci.hotspot.amd64" : {
|
||||
"subDir" : "src/jdk.internal.vm.ci/share/classes",
|
||||
"subDir" : "../jdk.internal.vm.ci/share/classes",
|
||||
"sourceDirs" : ["src"],
|
||||
"dependencies" : [
|
||||
"jdk.vm.ci.amd64",
|
||||
@ -199,7 +197,7 @@ suite = {
|
||||
},
|
||||
|
||||
"jdk.vm.ci.hotspot.sparc" : {
|
||||
"subDir" : "src/jdk.internal.vm.ci/share/classes",
|
||||
"subDir" : "../jdk.internal.vm.ci/share/classes",
|
||||
"sourceDirs" : ["src"],
|
||||
"dependencies" : [
|
||||
"jdk.vm.ci.sparc",
|
||||
@ -221,12 +219,12 @@ suite = {
|
||||
# ------------- Distributions -------------
|
||||
|
||||
"JVMCI_SERVICES" : {
|
||||
"subDir" : "src/jdk.internal.vm.ci/share/classes",
|
||||
"subDir" : "../jdk.internal.vm.ci/share/classes",
|
||||
"dependencies" : ["jdk.vm.ci.services"],
|
||||
},
|
||||
|
||||
"JVMCI_API" : {
|
||||
"subDir" : "src/jdk.internal.vm.ci/share/classes",
|
||||
"subDir" : "../jdk.internal.vm.ci/share/classes",
|
||||
"dependencies" : [
|
||||
"jdk.vm.ci.runtime",
|
||||
"jdk.vm.ci.common",
|
||||
@ -240,7 +238,7 @@ suite = {
|
||||
},
|
||||
|
||||
"JVMCI_HOTSPOT" : {
|
||||
"subDir" : "src/jdk.internal.vm.ci/share/classes",
|
||||
"subDir" : "../jdk.internal.vm.ci/share/classes",
|
||||
"dependencies" : [
|
||||
"jdk.vm.ci.hotspot.aarch64",
|
||||
"jdk.vm.ci.hotspot.amd64",
|
||||
@ -253,7 +251,7 @@ suite = {
|
||||
},
|
||||
|
||||
"JVMCI_TEST" : {
|
||||
"subDir" : "test/compiler/jvmci",
|
||||
"subDir" : "../../test/hotspot/jtreg/compiler/jvmci",
|
||||
"dependencies" : [
|
||||
"jdk.vm.ci.runtime.test",
|
||||
],
|
||||
|
@ -2840,6 +2840,44 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi
|
||||
bind(L_done);
|
||||
}
|
||||
|
||||
// Code for BigInteger::mulAdd instrinsic
|
||||
// out = r0
|
||||
// in = r1
|
||||
// offset = r2 (already out.length-offset)
|
||||
// len = r3
|
||||
// k = r4
|
||||
//
|
||||
// pseudo code from java implementation:
|
||||
// carry = 0;
|
||||
// offset = out.length-offset - 1;
|
||||
// for (int j=len-1; j >= 0; j--) {
|
||||
// product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
|
||||
// out[offset--] = (int)product;
|
||||
// carry = product >>> 32;
|
||||
// }
|
||||
// return (int)carry;
|
||||
void MacroAssembler::mul_add(Register out, Register in, Register offset,
|
||||
Register len, Register k) {
|
||||
Label LOOP, END;
|
||||
// pre-loop
|
||||
cmp(len, zr); // cmp, not cbz/cbnz: to use condition twice => less branches
|
||||
csel(out, zr, out, Assembler::EQ);
|
||||
br(Assembler::EQ, END);
|
||||
add(in, in, len, LSL, 2); // in[j+1] address
|
||||
add(offset, out, offset, LSL, 2); // out[offset + 1] address
|
||||
mov(out, zr); // used to keep carry now
|
||||
BIND(LOOP);
|
||||
ldrw(rscratch1, Address(pre(in, -4)));
|
||||
madd(rscratch1, rscratch1, k, out);
|
||||
ldrw(rscratch2, Address(pre(offset, -4)));
|
||||
add(rscratch1, rscratch1, rscratch2);
|
||||
strw(rscratch1, Address(offset));
|
||||
lsr(out, rscratch1, 32);
|
||||
subs(len, len, 1);
|
||||
br(Assembler::NE, LOOP);
|
||||
BIND(END);
|
||||
}
|
||||
|
||||
/**
|
||||
* Emits code to update CRC-32 with a byte value according to constants in table
|
||||
*
|
||||
@ -3291,6 +3329,7 @@ void MacroAssembler::load_mirror(Register dst, Register method) {
|
||||
ldr(dst, Address(dst, ConstMethod::constants_offset()));
|
||||
ldr(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes()));
|
||||
ldr(dst, Address(dst, mirror_offset));
|
||||
resolve_oop_handle(dst);
|
||||
}
|
||||
|
||||
void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) {
|
||||
|
@ -1265,6 +1265,7 @@ public:
|
||||
void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z,
|
||||
Register zlen, Register tmp1, Register tmp2, Register tmp3,
|
||||
Register tmp4, Register tmp5, Register tmp6, Register tmp7);
|
||||
void mul_add(Register out, Register in, Register offs, Register len, Register k);
|
||||
// ISB may be needed because of a safepoint
|
||||
void maybe_isb() { isb(); }
|
||||
|
||||
|
@ -3607,6 +3607,63 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_squareToLen() {
|
||||
// squareToLen algorithm for sizes 1..127 described in java code works
|
||||
// faster than multiply_to_len on some CPUs and slower on others, but
|
||||
// multiply_to_len shows a bit better overall results
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "squareToLen");
|
||||
address start = __ pc();
|
||||
|
||||
const Register x = r0;
|
||||
const Register xlen = r1;
|
||||
const Register z = r2;
|
||||
const Register zlen = r3;
|
||||
const Register y = r4; // == x
|
||||
const Register ylen = r5; // == xlen
|
||||
|
||||
const Register tmp1 = r10;
|
||||
const Register tmp2 = r11;
|
||||
const Register tmp3 = r12;
|
||||
const Register tmp4 = r13;
|
||||
const Register tmp5 = r14;
|
||||
const Register tmp6 = r15;
|
||||
const Register tmp7 = r16;
|
||||
|
||||
RegSet spilled_regs = RegSet::of(y, ylen);
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ enter();
|
||||
__ push(spilled_regs, sp);
|
||||
__ mov(y, x);
|
||||
__ mov(ylen, xlen);
|
||||
__ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
|
||||
__ pop(spilled_regs, sp);
|
||||
__ leave();
|
||||
__ ret(lr);
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_mulAdd() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "mulAdd");
|
||||
|
||||
address start = __ pc();
|
||||
|
||||
const Register out = r0;
|
||||
const Register in = r1;
|
||||
const Register offset = r2;
|
||||
const Register len = r3;
|
||||
const Register k = r4;
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ enter();
|
||||
__ mul_add(out, in, offset, len, k);
|
||||
__ leave();
|
||||
__ ret(lr);
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
void ghash_multiply(FloatRegister result_lo, FloatRegister result_hi,
|
||||
FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0,
|
||||
FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3, FloatRegister tmp4) {
|
||||
@ -4913,6 +4970,14 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_multiplyToLen = generate_multiplyToLen();
|
||||
}
|
||||
|
||||
if (UseSquareToLenIntrinsic) {
|
||||
StubRoutines::_squareToLen = generate_squareToLen();
|
||||
}
|
||||
|
||||
if (UseMulAddIntrinsic) {
|
||||
StubRoutines::_mulAdd = generate_mulAdd();
|
||||
}
|
||||
|
||||
if (UseMontgomeryMultiplyIntrinsic) {
|
||||
StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
|
||||
MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
|
||||
|
@ -2297,6 +2297,7 @@ void TemplateTable::load_field_cp_cache_entry(Register obj,
|
||||
ConstantPoolCacheEntry::f1_offset())));
|
||||
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
|
||||
__ ldr(obj, Address(obj, mirror_offset));
|
||||
__ resolve_oop_handle(obj);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -340,6 +340,14 @@ void VM_Version::get_processor_features() {
|
||||
UseMultiplyToLenIntrinsic = true;
|
||||
}
|
||||
|
||||
if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
|
||||
UseSquareToLenIntrinsic = true;
|
||||
}
|
||||
|
||||
if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
|
||||
UseMulAddIntrinsic = true;
|
||||
}
|
||||
|
||||
if (FLAG_IS_DEFAULT(UseBarriersForVolatile)) {
|
||||
UseBarriersForVolatile = (_features & CPU_DMB_ATOMICS) != 0;
|
||||
}
|
||||
|
@ -2899,6 +2899,7 @@ void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp)
|
||||
ldr(tmp, Address(tmp, ConstMethod::constants_offset()));
|
||||
ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes()));
|
||||
ldr(mirror, Address(tmp, mirror_offset));
|
||||
resolve_oop_handle(mirror);
|
||||
}
|
||||
|
||||
|
||||
|
@ -2963,6 +2963,7 @@ void TemplateTable::load_field_cp_cache_entry(Register Rcache,
|
||||
cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
|
||||
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
|
||||
__ ldr(Robj, Address(Robj, mirror_offset));
|
||||
__ resolve_oop_handle(Robj);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -517,6 +517,9 @@ class Assembler : public AbstractAssembler {
|
||||
XXPERMDI_OPCODE= (60u << OPCODE_SHIFT | 10u << 3),
|
||||
XXMRGHW_OPCODE = (60u << OPCODE_SHIFT | 18u << 3),
|
||||
XXMRGLW_OPCODE = (60u << OPCODE_SHIFT | 50u << 3),
|
||||
XXSPLTW_OPCODE = (60u << OPCODE_SHIFT | 164u << 2),
|
||||
XXLXOR_OPCODE = (60u << OPCODE_SHIFT | 154u << 3),
|
||||
XXLEQV_OPCODE = (60u << OPCODE_SHIFT | 186u << 3),
|
||||
|
||||
// Vector Permute and Formatting
|
||||
VPKPX_OPCODE = (4u << OPCODE_SHIFT | 782u ),
|
||||
@ -1125,6 +1128,7 @@ class Assembler : public AbstractAssembler {
|
||||
static int vsplti_sim(int x) { return opp_u_field(x, 15, 11); } // for vsplti* instructions
|
||||
static int vsldoi_shb(int x) { return opp_u_field(x, 25, 22); } // for vsldoi instruction
|
||||
static int vcmp_rc( int x) { return opp_u_field(x, 21, 21); } // for vcmp* instructions
|
||||
static int xxsplt_uim(int x) { return opp_u_field(x, 15, 14); } // for xxsplt* instructions
|
||||
|
||||
//static int xo1( int x) { return opp_u_field(x, 29, 21); }// is contained in our opcodes
|
||||
//static int xo2( int x) { return opp_u_field(x, 30, 21); }// is contained in our opcodes
|
||||
@ -1308,6 +1312,7 @@ class Assembler : public AbstractAssembler {
|
||||
inline void li( Register d, int si16);
|
||||
inline void lis( Register d, int si16);
|
||||
inline void addir(Register d, int si16, Register a);
|
||||
inline void subi( Register d, Register a, int si16);
|
||||
|
||||
static bool is_addi(int x) {
|
||||
return ADDI_OPCODE == (x & ADDI_OPCODE_MASK);
|
||||
@ -2154,6 +2159,11 @@ class Assembler : public AbstractAssembler {
|
||||
inline void xxpermdi( VectorSRegister d, VectorSRegister a, VectorSRegister b, int dm);
|
||||
inline void xxmrghw( VectorSRegister d, VectorSRegister a, VectorSRegister b);
|
||||
inline void xxmrglw( VectorSRegister d, VectorSRegister a, VectorSRegister b);
|
||||
inline void mtvsrd( VectorSRegister d, Register a);
|
||||
inline void mtvsrwz( VectorSRegister d, Register a);
|
||||
inline void xxspltw( VectorSRegister d, VectorSRegister b, int ui2);
|
||||
inline void xxlxor( VectorSRegister d, VectorSRegister a, VectorSRegister b);
|
||||
inline void xxleqv( VectorSRegister d, VectorSRegister a, VectorSRegister b);
|
||||
|
||||
// VSX Extended Mnemonics
|
||||
inline void xxspltd( VectorSRegister d, VectorSRegister a, int x);
|
||||
@ -2174,7 +2184,8 @@ class Assembler : public AbstractAssembler {
|
||||
inline void vsbox( VectorRegister d, VectorRegister a);
|
||||
|
||||
// SHA (introduced with Power 8)
|
||||
// Not yet implemented.
|
||||
inline void vshasigmad(VectorRegister d, VectorRegister a, bool st, int six);
|
||||
inline void vshasigmaw(VectorRegister d, VectorRegister a, bool st, int six);
|
||||
|
||||
// Vector Binary Polynomial Multiplication (introduced with Power 8)
|
||||
inline void vpmsumb( VectorRegister d, VectorRegister a, VectorRegister b);
|
||||
@ -2285,6 +2296,11 @@ class Assembler : public AbstractAssembler {
|
||||
inline void lvsl( VectorRegister d, Register s2);
|
||||
inline void lvsr( VectorRegister d, Register s2);
|
||||
|
||||
// Endianess specific concatenation of 2 loaded vectors.
|
||||
inline void load_perm(VectorRegister perm, Register addr);
|
||||
inline void vec_perm(VectorRegister first_dest, VectorRegister second, VectorRegister perm);
|
||||
inline void vec_perm(VectorRegister dest, VectorRegister first, VectorRegister second, VectorRegister perm);
|
||||
|
||||
// RegisterOrConstant versions.
|
||||
// These emitters choose between the versions using two registers and
|
||||
// those with register and immediate, depending on the content of roc.
|
||||
|
@ -164,6 +164,7 @@ inline void Assembler::divwo_( Register d, Register a, Register b) { emit_int32
|
||||
inline void Assembler::li( Register d, int si16) { Assembler::addi_r0ok( d, R0, si16); }
|
||||
inline void Assembler::lis( Register d, int si16) { Assembler::addis_r0ok(d, R0, si16); }
|
||||
inline void Assembler::addir(Register d, int si16, Register a) { Assembler::addi(d, a, si16); }
|
||||
inline void Assembler::subi( Register d, Register a, int si16) { Assembler::addi(d, a, -si16); }
|
||||
|
||||
// PPC 1, section 3.3.9, Fixed-Point Compare Instructions
|
||||
inline void Assembler::cmpi( ConditionRegister f, int l, Register a, int si16) { emit_int32( CMPI_OPCODE | bf(f) | l10(l) | ra(a) | simm(si16,16)); }
|
||||
@ -760,9 +761,14 @@ inline void Assembler::lvsr( VectorRegister d, Register s1, Register s2) { emit
|
||||
// Vector-Scalar (VSX) instructions.
|
||||
inline void Assembler::lxvd2x( VectorSRegister d, Register s1) { emit_int32( LXVD2X_OPCODE | vsrt(d) | ra(0) | rb(s1)); }
|
||||
inline void Assembler::lxvd2x( VectorSRegister d, Register s1, Register s2) { emit_int32( LXVD2X_OPCODE | vsrt(d) | ra0mem(s1) | rb(s2)); }
|
||||
inline void Assembler::stxvd2x( VectorSRegister d, Register s1) { emit_int32( STXVD2X_OPCODE | vsrt(d) | ra(0) | rb(s1)); }
|
||||
inline void Assembler::stxvd2x( VectorSRegister d, Register s1, Register s2) { emit_int32( STXVD2X_OPCODE | vsrt(d) | ra0mem(s1) | rb(s2)); }
|
||||
inline void Assembler::mtvrd( VectorRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
|
||||
inline void Assembler::stxvd2x( VectorSRegister d, Register s1) { emit_int32( STXVD2X_OPCODE | vsrs(d) | ra(0) | rb(s1)); }
|
||||
inline void Assembler::stxvd2x( VectorSRegister d, Register s1, Register s2) { emit_int32( STXVD2X_OPCODE | vsrs(d) | ra0mem(s1) | rb(s2)); }
|
||||
inline void Assembler::mtvsrd( VectorSRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vsrt(d) | ra(a)); }
|
||||
inline void Assembler::mtvsrwz( VectorSRegister d, Register a) { emit_int32( MTVSRWZ_OPCODE | vsrt(d) | ra(a)); }
|
||||
inline void Assembler::xxspltw( VectorSRegister d, VectorSRegister b, int ui2) { emit_int32( XXSPLTW_OPCODE | vsrt(d) | vsrb(b) | xxsplt_uim(uimm(ui2,2))); }
|
||||
inline void Assembler::xxlxor( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XXLXOR_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
|
||||
inline void Assembler::xxleqv( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XXLEQV_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
|
||||
inline void Assembler::mtvrd( VectorRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
|
||||
inline void Assembler::mfvrd( Register a, VectorRegister d) { emit_int32( MFVSRD_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
|
||||
inline void Assembler::mtvrwz( VectorRegister d, Register a) { emit_int32( MTVSRWZ_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
|
||||
inline void Assembler::mfvrwz( Register a, VectorRegister d) { emit_int32( MFVSRWZ_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
|
||||
@ -925,7 +931,8 @@ inline void Assembler::vncipherlast(VectorRegister d, VectorRegister a, VectorRe
|
||||
inline void Assembler::vsbox( VectorRegister d, VectorRegister a) { emit_int32( VSBOX_OPCODE | vrt(d) | vra(a) ); }
|
||||
|
||||
// SHA (introduced with Power 8)
|
||||
// Not yet implemented.
|
||||
inline void Assembler::vshasigmad(VectorRegister d, VectorRegister a, bool st, int six) { emit_int32( VSHASIGMAD_OPCODE | vrt(d) | vra(a) | vst(st) | vsix(six)); }
|
||||
inline void Assembler::vshasigmaw(VectorRegister d, VectorRegister a, bool st, int six) { emit_int32( VSHASIGMAW_OPCODE | vrt(d) | vra(a) | vst(st) | vsix(six)); }
|
||||
|
||||
// Vector Binary Polynomial Multiplication (introduced with Power 8)
|
||||
inline void Assembler::vpmsumb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPMSUMB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
|
||||
@ -1034,6 +1041,30 @@ inline void Assembler::stvxl( VectorRegister d, Register s2) { emit_int32( STVXL
|
||||
inline void Assembler::lvsl( VectorRegister d, Register s2) { emit_int32( LVSL_OPCODE | vrt(d) | rb(s2)); }
|
||||
inline void Assembler::lvsr( VectorRegister d, Register s2) { emit_int32( LVSR_OPCODE | vrt(d) | rb(s2)); }
|
||||
|
||||
inline void Assembler::load_perm(VectorRegister perm, Register addr) {
|
||||
#if defined(VM_LITTLE_ENDIAN)
|
||||
lvsr(perm, addr);
|
||||
#else
|
||||
lvsl(perm, addr);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void Assembler::vec_perm(VectorRegister first_dest, VectorRegister second, VectorRegister perm) {
|
||||
#if defined(VM_LITTLE_ENDIAN)
|
||||
vperm(first_dest, second, first_dest, perm);
|
||||
#else
|
||||
vperm(first_dest, first_dest, second, perm);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void Assembler::vec_perm(VectorRegister dest, VectorRegister first, VectorRegister second, VectorRegister perm) {
|
||||
#if defined(VM_LITTLE_ENDIAN)
|
||||
vperm(dest, second, first, perm);
|
||||
#else
|
||||
vperm(dest, first, second, perm);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void Assembler::load_const(Register d, void* x, Register tmp) {
|
||||
load_const(d, (long)x, tmp);
|
||||
}
|
||||
|
@ -32,7 +32,7 @@
|
||||
// Sets the default values for platform dependent flags used by the runtime system.
|
||||
// (see globals.hpp)
|
||||
|
||||
define_pd_global(bool, ShareVtableStubs, false); // Improves performance markedly for mtrt and compress.
|
||||
define_pd_global(bool, ShareVtableStubs, true);
|
||||
define_pd_global(bool, NeedsDeoptSuspend, false); // Only register window machines need this.
|
||||
|
||||
|
||||
@ -103,6 +103,9 @@ define_pd_global(intx, InitArrayShortSize, 9*BytesPerLong);
|
||||
"CPU Version: x for PowerX. Currently recognizes Power5 to " \
|
||||
"Power8. Default is 0. Newer CPUs will be recognized as Power8.") \
|
||||
\
|
||||
product(bool, SuperwordUseVSX, false, \
|
||||
"Use Power8 VSX instructions for superword optimization.") \
|
||||
\
|
||||
/* Reoptimize code-sequences of calls at runtime, e.g. replace an */ \
|
||||
/* indirect call by a direct call. */ \
|
||||
product(bool, ReoptimizeCallSequences, true, \
|
||||
|
@ -129,7 +129,7 @@ void MacroAssembler::calculate_address_from_global_toc(Register dst, address add
|
||||
}
|
||||
}
|
||||
|
||||
int MacroAssembler::patch_calculate_address_from_global_toc_at(address a, address bound, address addr) {
|
||||
address MacroAssembler::patch_calculate_address_from_global_toc_at(address a, address bound, address addr) {
|
||||
const int offset = MacroAssembler::offset_to_global_toc(addr);
|
||||
|
||||
const address inst2_addr = a;
|
||||
@ -155,7 +155,7 @@ int MacroAssembler::patch_calculate_address_from_global_toc_at(address a, addres
|
||||
assert(is_addis(inst1) && inv_ra_field(inst1) == 29 /* R29 */, "source must be global TOC");
|
||||
set_imm((int *)inst1_addr, MacroAssembler::largeoffset_si16_si16_hi(offset));
|
||||
set_imm((int *)inst2_addr, MacroAssembler::largeoffset_si16_si16_lo(offset));
|
||||
return (int)((intptr_t)addr - (intptr_t)inst1_addr);
|
||||
return inst1_addr;
|
||||
}
|
||||
|
||||
address MacroAssembler::get_address_of_calculate_address_from_global_toc_at(address a, address bound) {
|
||||
@ -201,7 +201,7 @@ address MacroAssembler::get_address_of_calculate_address_from_global_toc_at(addr
|
||||
// clrldi rx = rx & 0xFFFFffff // clearMS32b, optional
|
||||
// ori rx = rx | const.lo
|
||||
// Clrldi will be passed by.
|
||||
int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
|
||||
address MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
|
||||
assert(UseCompressedOops, "Should only patch compressed oops");
|
||||
|
||||
const address inst2_addr = a;
|
||||
@ -227,7 +227,7 @@ int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop dat
|
||||
|
||||
set_imm((int *)inst1_addr, (short)(xc)); // see enc_load_con_narrow_hi/_lo
|
||||
set_imm((int *)inst2_addr, (xd)); // unsigned int
|
||||
return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr);
|
||||
return inst1_addr;
|
||||
}
|
||||
|
||||
// Get compressed oop or klass constant.
|
||||
@ -3382,6 +3382,7 @@ void MacroAssembler::load_mirror_from_const_method(Register mirror, Register con
|
||||
ld(mirror, in_bytes(ConstMethod::constants_offset()), const_method);
|
||||
ld(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
|
||||
ld(mirror, in_bytes(Klass::java_mirror_offset()), mirror);
|
||||
resolve_oop_handle(mirror);
|
||||
}
|
||||
|
||||
// Clear Array
|
||||
@ -5234,6 +5235,40 @@ void MacroAssembler::multiply_128_x_128_loop(Register x_xstart,
|
||||
bind(L_post_third_loop_done);
|
||||
} // multiply_128_x_128_loop
|
||||
|
||||
void MacroAssembler::muladd(Register out, Register in,
|
||||
Register offset, Register len, Register k,
|
||||
Register tmp1, Register tmp2, Register carry) {
|
||||
|
||||
// Labels
|
||||
Label LOOP, SKIP;
|
||||
|
||||
// Make sure length is positive.
|
||||
cmpdi (CCR0, len, 0);
|
||||
|
||||
// Prepare variables
|
||||
subi (offset, offset, 4);
|
||||
li (carry, 0);
|
||||
ble (CCR0, SKIP);
|
||||
|
||||
mtctr (len);
|
||||
subi (len, len, 1 );
|
||||
sldi (len, len, 2 );
|
||||
|
||||
// Main loop
|
||||
bind(LOOP);
|
||||
lwzx (tmp1, len, in );
|
||||
lwzx (tmp2, offset, out );
|
||||
mulld (tmp1, tmp1, k );
|
||||
add (tmp2, carry, tmp2 );
|
||||
add (tmp2, tmp1, tmp2 );
|
||||
stwx (tmp2, offset, out );
|
||||
srdi (carry, tmp2, 32 );
|
||||
subi (offset, offset, 4 );
|
||||
subi (len, len, 4 );
|
||||
bdnz (LOOP);
|
||||
bind(SKIP);
|
||||
}
|
||||
|
||||
void MacroAssembler::multiply_to_len(Register x, Register xlen,
|
||||
Register y, Register ylen,
|
||||
Register z, Register zlen,
|
||||
|
@ -105,13 +105,15 @@ class MacroAssembler: public Assembler {
|
||||
};
|
||||
|
||||
inline static bool is_calculate_address_from_global_toc_at(address a, address bound);
|
||||
static int patch_calculate_address_from_global_toc_at(address a, address addr, address bound);
|
||||
// Returns address of first instruction in sequence.
|
||||
static address patch_calculate_address_from_global_toc_at(address a, address bound, address addr);
|
||||
static address get_address_of_calculate_address_from_global_toc_at(address a, address addr);
|
||||
|
||||
#ifdef _LP64
|
||||
// Patch narrow oop constant.
|
||||
inline static bool is_set_narrow_oop(address a, address bound);
|
||||
static int patch_set_narrow_oop(address a, address bound, narrowOop data);
|
||||
// Returns address of first instruction in sequence.
|
||||
static address patch_set_narrow_oop(address a, address bound, narrowOop data);
|
||||
static narrowOop get_narrow_oop(address a, address bound);
|
||||
#endif
|
||||
|
||||
@ -813,6 +815,8 @@ class MacroAssembler: public Assembler {
|
||||
Register yz_idx, Register idx, Register carry,
|
||||
Register product_high, Register product,
|
||||
Register carry2, Register tmp);
|
||||
void muladd(Register out, Register in, Register offset, Register len, Register k,
|
||||
Register tmp1, Register tmp2, Register carry);
|
||||
void multiply_to_len(Register x, Register xlen,
|
||||
Register y, Register ylen,
|
||||
Register z, Register zlen,
|
||||
@ -862,6 +866,40 @@ class MacroAssembler: public Assembler {
|
||||
void kernel_crc32_singleByteReg(Register crc, Register val, Register table,
|
||||
bool invertCRC);
|
||||
|
||||
// SHA-2 auxiliary functions and public interfaces
|
||||
private:
|
||||
void sha256_deque(const VectorRegister src,
|
||||
const VectorRegister dst1, const VectorRegister dst2, const VectorRegister dst3);
|
||||
void sha256_load_h_vec(const VectorRegister a, const VectorRegister e, const Register hptr);
|
||||
void sha256_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
|
||||
void sha256_load_w_plus_k_vec(const Register buf_in, const VectorRegister* ws,
|
||||
const int total_ws, const Register k, const VectorRegister* kpws,
|
||||
const int total_kpws);
|
||||
void sha256_calc_4w(const VectorRegister w0, const VectorRegister w1,
|
||||
const VectorRegister w2, const VectorRegister w3, const VectorRegister kpw0,
|
||||
const VectorRegister kpw1, const VectorRegister kpw2, const VectorRegister kpw3,
|
||||
const Register j, const Register k);
|
||||
void sha256_update_sha_state(const VectorRegister a, const VectorRegister b,
|
||||
const VectorRegister c, const VectorRegister d, const VectorRegister e,
|
||||
const VectorRegister f, const VectorRegister g, const VectorRegister h,
|
||||
const Register hptr);
|
||||
|
||||
void sha512_load_w_vec(const Register buf_in, const VectorRegister* ws, const int total_ws);
|
||||
void sha512_update_sha_state(const Register state, const VectorRegister* hs, const int total_hs);
|
||||
void sha512_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
|
||||
void sha512_load_h_vec(const Register state, const VectorRegister* hs, const int total_hs);
|
||||
void sha512_calc_2w(const VectorRegister w0, const VectorRegister w1,
|
||||
const VectorRegister w2, const VectorRegister w3,
|
||||
const VectorRegister w4, const VectorRegister w5,
|
||||
const VectorRegister w6, const VectorRegister w7,
|
||||
const VectorRegister kpw0, const VectorRegister kpw1, const Register j,
|
||||
const VectorRegister vRb, const Register k);
|
||||
|
||||
public:
|
||||
void sha256(bool multi_block);
|
||||
void sha512(bool multi_block);
|
||||
|
||||
|
||||
//
|
||||
// Debugging
|
||||
//
|
||||
|
1136
src/hotspot/cpu/ppc/macroAssembler_ppc_sha.cpp
Normal file
1136
src/hotspot/cpu/ppc/macroAssembler_ppc_sha.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -221,13 +221,13 @@ address NativeMovConstReg::set_data_plain(intptr_t data, CodeBlob *cb) {
|
||||
// A calculation relative to the global TOC.
|
||||
if (MacroAssembler::get_address_of_calculate_address_from_global_toc_at(addr, cb->content_begin()) !=
|
||||
(address)data) {
|
||||
const int invalidated_range =
|
||||
MacroAssembler::patch_calculate_address_from_global_toc_at(addr, cb->content_begin(),
|
||||
const address inst2_addr = addr;
|
||||
const address inst1_addr =
|
||||
MacroAssembler::patch_calculate_address_from_global_toc_at(inst2_addr, cb->content_begin(),
|
||||
(address)data);
|
||||
const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
|
||||
// FIXME:
|
||||
const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
|
||||
ICache::ppc64_flush_icache_bytes(start, range);
|
||||
assert(inst1_addr != NULL && inst1_addr < inst2_addr, "first instruction must be found");
|
||||
const int range = inst2_addr - inst1_addr + BytesPerInstWord;
|
||||
ICache::ppc64_flush_icache_bytes(inst1_addr, range);
|
||||
}
|
||||
next_address = addr + 1 * BytesPerInstWord;
|
||||
} else if (MacroAssembler::is_load_const_at(addr)) {
|
||||
@ -288,15 +288,15 @@ void NativeMovConstReg::set_data(intptr_t data) {
|
||||
}
|
||||
|
||||
void NativeMovConstReg::set_narrow_oop(narrowOop data, CodeBlob *code /* = NULL */) {
|
||||
address addr = addr_at(0);
|
||||
address inst2_addr = addr_at(0);
|
||||
CodeBlob* cb = (code) ? code : CodeCache::find_blob(instruction_address());
|
||||
if (MacroAssembler::get_narrow_oop(addr, cb->content_begin()) == (long)data) return;
|
||||
const int invalidated_range =
|
||||
MacroAssembler::patch_set_narrow_oop(addr, cb->content_begin(), (long)data);
|
||||
const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
|
||||
// FIXME:
|
||||
const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
|
||||
ICache::ppc64_flush_icache_bytes(start, range);
|
||||
if (MacroAssembler::get_narrow_oop(inst2_addr, cb->content_begin()) == (long)data)
|
||||
return;
|
||||
const address inst1_addr =
|
||||
MacroAssembler::patch_set_narrow_oop(inst2_addr, cb->content_begin(), (long)data);
|
||||
assert(inst1_addr != NULL && inst1_addr < inst2_addr, "first instruction must be found");
|
||||
const int range = inst2_addr - inst1_addr + BytesPerInstWord;
|
||||
ICache::ppc64_flush_icache_bytes(inst1_addr, range);
|
||||
}
|
||||
|
||||
// Do not use an assertion here. Let clients decide whether they only
|
||||
|
@ -254,6 +254,73 @@ register %{
|
||||
reg_def SR_SPEFSCR(SOC, SOC, Op_RegP, 4, SR_SPEFSCR->as_VMReg()); // v
|
||||
reg_def SR_PPR( SOC, SOC, Op_RegP, 5, SR_PPR->as_VMReg()); // v
|
||||
|
||||
// ----------------------------
|
||||
// Vector-Scalar Registers
|
||||
// ----------------------------
|
||||
reg_def VSR0 ( SOC, SOC, Op_VecX, 0, NULL);
|
||||
reg_def VSR1 ( SOC, SOC, Op_VecX, 1, NULL);
|
||||
reg_def VSR2 ( SOC, SOC, Op_VecX, 2, NULL);
|
||||
reg_def VSR3 ( SOC, SOC, Op_VecX, 3, NULL);
|
||||
reg_def VSR4 ( SOC, SOC, Op_VecX, 4, NULL);
|
||||
reg_def VSR5 ( SOC, SOC, Op_VecX, 5, NULL);
|
||||
reg_def VSR6 ( SOC, SOC, Op_VecX, 6, NULL);
|
||||
reg_def VSR7 ( SOC, SOC, Op_VecX, 7, NULL);
|
||||
reg_def VSR8 ( SOC, SOC, Op_VecX, 8, NULL);
|
||||
reg_def VSR9 ( SOC, SOC, Op_VecX, 9, NULL);
|
||||
reg_def VSR10 ( SOC, SOC, Op_VecX, 10, NULL);
|
||||
reg_def VSR11 ( SOC, SOC, Op_VecX, 11, NULL);
|
||||
reg_def VSR12 ( SOC, SOC, Op_VecX, 12, NULL);
|
||||
reg_def VSR13 ( SOC, SOC, Op_VecX, 13, NULL);
|
||||
reg_def VSR14 ( SOC, SOC, Op_VecX, 14, NULL);
|
||||
reg_def VSR15 ( SOC, SOC, Op_VecX, 15, NULL);
|
||||
reg_def VSR16 ( SOC, SOC, Op_VecX, 16, NULL);
|
||||
reg_def VSR17 ( SOC, SOC, Op_VecX, 17, NULL);
|
||||
reg_def VSR18 ( SOC, SOC, Op_VecX, 18, NULL);
|
||||
reg_def VSR19 ( SOC, SOC, Op_VecX, 19, NULL);
|
||||
reg_def VSR20 ( SOC, SOC, Op_VecX, 20, NULL);
|
||||
reg_def VSR21 ( SOC, SOC, Op_VecX, 21, NULL);
|
||||
reg_def VSR22 ( SOC, SOC, Op_VecX, 22, NULL);
|
||||
reg_def VSR23 ( SOC, SOC, Op_VecX, 23, NULL);
|
||||
reg_def VSR24 ( SOC, SOC, Op_VecX, 24, NULL);
|
||||
reg_def VSR25 ( SOC, SOC, Op_VecX, 25, NULL);
|
||||
reg_def VSR26 ( SOC, SOC, Op_VecX, 26, NULL);
|
||||
reg_def VSR27 ( SOC, SOC, Op_VecX, 27, NULL);
|
||||
reg_def VSR28 ( SOC, SOC, Op_VecX, 28, NULL);
|
||||
reg_def VSR29 ( SOC, SOC, Op_VecX, 29, NULL);
|
||||
reg_def VSR30 ( SOC, SOC, Op_VecX, 30, NULL);
|
||||
reg_def VSR31 ( SOC, SOC, Op_VecX, 31, NULL);
|
||||
reg_def VSR32 ( SOC, SOC, Op_VecX, 32, NULL);
|
||||
reg_def VSR33 ( SOC, SOC, Op_VecX, 33, NULL);
|
||||
reg_def VSR34 ( SOC, SOC, Op_VecX, 34, NULL);
|
||||
reg_def VSR35 ( SOC, SOC, Op_VecX, 35, NULL);
|
||||
reg_def VSR36 ( SOC, SOC, Op_VecX, 36, NULL);
|
||||
reg_def VSR37 ( SOC, SOC, Op_VecX, 37, NULL);
|
||||
reg_def VSR38 ( SOC, SOC, Op_VecX, 38, NULL);
|
||||
reg_def VSR39 ( SOC, SOC, Op_VecX, 39, NULL);
|
||||
reg_def VSR40 ( SOC, SOC, Op_VecX, 40, NULL);
|
||||
reg_def VSR41 ( SOC, SOC, Op_VecX, 41, NULL);
|
||||
reg_def VSR42 ( SOC, SOC, Op_VecX, 42, NULL);
|
||||
reg_def VSR43 ( SOC, SOC, Op_VecX, 43, NULL);
|
||||
reg_def VSR44 ( SOC, SOC, Op_VecX, 44, NULL);
|
||||
reg_def VSR45 ( SOC, SOC, Op_VecX, 45, NULL);
|
||||
reg_def VSR46 ( SOC, SOC, Op_VecX, 46, NULL);
|
||||
reg_def VSR47 ( SOC, SOC, Op_VecX, 47, NULL);
|
||||
reg_def VSR48 ( SOC, SOC, Op_VecX, 48, NULL);
|
||||
reg_def VSR49 ( SOC, SOC, Op_VecX, 49, NULL);
|
||||
reg_def VSR50 ( SOC, SOC, Op_VecX, 50, NULL);
|
||||
reg_def VSR51 ( SOC, SOC, Op_VecX, 51, NULL);
|
||||
reg_def VSR52 ( SOC, SOC, Op_VecX, 52, NULL);
|
||||
reg_def VSR53 ( SOC, SOC, Op_VecX, 53, NULL);
|
||||
reg_def VSR54 ( SOC, SOC, Op_VecX, 54, NULL);
|
||||
reg_def VSR55 ( SOC, SOC, Op_VecX, 55, NULL);
|
||||
reg_def VSR56 ( SOC, SOC, Op_VecX, 56, NULL);
|
||||
reg_def VSR57 ( SOC, SOC, Op_VecX, 57, NULL);
|
||||
reg_def VSR58 ( SOC, SOC, Op_VecX, 58, NULL);
|
||||
reg_def VSR59 ( SOC, SOC, Op_VecX, 59, NULL);
|
||||
reg_def VSR60 ( SOC, SOC, Op_VecX, 60, NULL);
|
||||
reg_def VSR61 ( SOC, SOC, Op_VecX, 61, NULL);
|
||||
reg_def VSR62 ( SOC, SOC, Op_VecX, 62, NULL);
|
||||
reg_def VSR63 ( SOC, SOC, Op_VecX, 63, NULL);
|
||||
|
||||
// ----------------------------
|
||||
// Specify priority of register selection within phases of register
|
||||
@ -385,6 +452,73 @@ alloc_class chunk2 (
|
||||
);
|
||||
|
||||
alloc_class chunk3 (
|
||||
VSR0,
|
||||
VSR1,
|
||||
VSR2,
|
||||
VSR3,
|
||||
VSR4,
|
||||
VSR5,
|
||||
VSR6,
|
||||
VSR7,
|
||||
VSR8,
|
||||
VSR9,
|
||||
VSR10,
|
||||
VSR11,
|
||||
VSR12,
|
||||
VSR13,
|
||||
VSR14,
|
||||
VSR15,
|
||||
VSR16,
|
||||
VSR17,
|
||||
VSR18,
|
||||
VSR19,
|
||||
VSR20,
|
||||
VSR21,
|
||||
VSR22,
|
||||
VSR23,
|
||||
VSR24,
|
||||
VSR25,
|
||||
VSR26,
|
||||
VSR27,
|
||||
VSR28,
|
||||
VSR29,
|
||||
VSR30,
|
||||
VSR31,
|
||||
VSR32,
|
||||
VSR33,
|
||||
VSR34,
|
||||
VSR35,
|
||||
VSR36,
|
||||
VSR37,
|
||||
VSR38,
|
||||
VSR39,
|
||||
VSR40,
|
||||
VSR41,
|
||||
VSR42,
|
||||
VSR43,
|
||||
VSR44,
|
||||
VSR45,
|
||||
VSR46,
|
||||
VSR47,
|
||||
VSR48,
|
||||
VSR49,
|
||||
VSR50,
|
||||
VSR51,
|
||||
VSR52,
|
||||
VSR53,
|
||||
VSR54,
|
||||
VSR55,
|
||||
VSR56,
|
||||
VSR57,
|
||||
VSR58,
|
||||
VSR59,
|
||||
VSR60,
|
||||
VSR61,
|
||||
VSR62,
|
||||
VSR63
|
||||
);
|
||||
|
||||
alloc_class chunk4 (
|
||||
// special registers
|
||||
// These registers are not allocated, but used for nodes generated by postalloc expand.
|
||||
SR_XER,
|
||||
@ -769,6 +903,45 @@ reg_class dbl_reg(
|
||||
F31, F31_H // nv!
|
||||
);
|
||||
|
||||
// ----------------------------
|
||||
// Vector-Scalar Register Class
|
||||
// ----------------------------
|
||||
|
||||
reg_class vs_reg(
|
||||
VSR32,
|
||||
VSR33,
|
||||
VSR34,
|
||||
VSR35,
|
||||
VSR36,
|
||||
VSR37,
|
||||
VSR38,
|
||||
VSR39,
|
||||
VSR40,
|
||||
VSR41,
|
||||
VSR42,
|
||||
VSR43,
|
||||
VSR44,
|
||||
VSR45,
|
||||
VSR46,
|
||||
VSR47,
|
||||
VSR48,
|
||||
VSR49,
|
||||
VSR50,
|
||||
VSR51
|
||||
// VSR52, // nv!
|
||||
// VSR53, // nv!
|
||||
// VSR54, // nv!
|
||||
// VSR55, // nv!
|
||||
// VSR56, // nv!
|
||||
// VSR57, // nv!
|
||||
// VSR58, // nv!
|
||||
// VSR59, // nv!
|
||||
// VSR60, // nv!
|
||||
// VSR61, // nv!
|
||||
// VSR62, // nv!
|
||||
// VSR63 // nv!
|
||||
);
|
||||
|
||||
%}
|
||||
|
||||
//----------DEFINITION BLOCK---------------------------------------------------
|
||||
@ -1502,7 +1675,7 @@ static enum RC rc_class(OptoReg::Name reg) {
|
||||
if (reg < 64+64) return rc_float;
|
||||
|
||||
// Between float regs & stack are the flags regs.
|
||||
assert(OptoReg::is_stack(reg), "blow up if spilling flags");
|
||||
assert(OptoReg::is_stack(reg) || reg < 64+64+64, "blow up if spilling flags");
|
||||
|
||||
return rc_stack;
|
||||
}
|
||||
@ -2048,14 +2221,24 @@ const bool Matcher::convL2FSupported(void) {
|
||||
|
||||
// Vector width in bytes.
|
||||
const int Matcher::vector_width_in_bytes(BasicType bt) {
|
||||
assert(MaxVectorSize == 8, "");
|
||||
return 8;
|
||||
if (SuperwordUseVSX) {
|
||||
assert(MaxVectorSize == 16, "");
|
||||
return 16;
|
||||
} else {
|
||||
assert(MaxVectorSize == 8, "");
|
||||
return 8;
|
||||
}
|
||||
}
|
||||
|
||||
// Vector ideal reg.
|
||||
const uint Matcher::vector_ideal_reg(int size) {
|
||||
assert(MaxVectorSize == 8 && size == 8, "");
|
||||
return Op_RegL;
|
||||
if (SuperwordUseVSX) {
|
||||
assert(MaxVectorSize == 16 && size == 16, "");
|
||||
return Op_VecX;
|
||||
} else {
|
||||
assert(MaxVectorSize == 8 && size == 8, "");
|
||||
return Op_RegL;
|
||||
}
|
||||
}
|
||||
|
||||
const uint Matcher::vector_shift_count_ideal_reg(int size) {
|
||||
@ -2075,7 +2258,7 @@ const int Matcher::min_vector_size(const BasicType bt) {
|
||||
|
||||
// PPC doesn't support misaligned vectors store/load.
|
||||
const bool Matcher::misaligned_vectors_ok() {
|
||||
return false;
|
||||
return !AlignVector; // can be changed by flag
|
||||
}
|
||||
|
||||
// PPC AES support not yet implemented
|
||||
@ -2217,10 +2400,31 @@ const MachRegisterNumbers farg_reg[13] = {
|
||||
F13_num
|
||||
};
|
||||
|
||||
const MachRegisterNumbers vsarg_reg[64] = {
|
||||
VSR0_num, VSR1_num, VSR2_num, VSR3_num,
|
||||
VSR4_num, VSR5_num, VSR6_num, VSR7_num,
|
||||
VSR8_num, VSR9_num, VSR10_num, VSR11_num,
|
||||
VSR12_num, VSR13_num, VSR14_num, VSR15_num,
|
||||
VSR16_num, VSR17_num, VSR18_num, VSR19_num,
|
||||
VSR20_num, VSR21_num, VSR22_num, VSR23_num,
|
||||
VSR24_num, VSR23_num, VSR24_num, VSR25_num,
|
||||
VSR28_num, VSR29_num, VSR30_num, VSR31_num,
|
||||
VSR32_num, VSR33_num, VSR34_num, VSR35_num,
|
||||
VSR36_num, VSR37_num, VSR38_num, VSR39_num,
|
||||
VSR40_num, VSR41_num, VSR42_num, VSR43_num,
|
||||
VSR44_num, VSR45_num, VSR46_num, VSR47_num,
|
||||
VSR48_num, VSR49_num, VSR50_num, VSR51_num,
|
||||
VSR52_num, VSR53_num, VSR54_num, VSR55_num,
|
||||
VSR56_num, VSR57_num, VSR58_num, VSR59_num,
|
||||
VSR60_num, VSR61_num, VSR62_num, VSR63_num
|
||||
};
|
||||
|
||||
const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]);
|
||||
|
||||
const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]);
|
||||
|
||||
const int num_vsarg_registers = sizeof(vsarg_reg) / sizeof(vsarg_reg[0]);
|
||||
|
||||
// Return whether or not this register is ever used as an argument. This
|
||||
// function is used on startup to build the trampoline stubs in generateOptoStub.
|
||||
// Registers not mentioned will be killed by the VM call in the trampoline, and
|
||||
@ -2552,6 +2756,115 @@ loadConLNodesTuple loadConLNodesTuple_create(PhaseRegAlloc *ra_, Node *toc, immL
|
||||
return nodes;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
loadConL_hiNode *_large_hi;
|
||||
loadConL_loNode *_large_lo;
|
||||
mtvsrdNode *_moved;
|
||||
xxspltdNode *_replicated;
|
||||
loadConLNode *_small;
|
||||
MachNode *_last;
|
||||
} loadConLReplicatedNodesTuple;
|
||||
|
||||
loadConLReplicatedNodesTuple loadConLReplicatedNodesTuple_create(Compile *C, PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc,
|
||||
vecXOper *dst, immI_0Oper *zero,
|
||||
OptoReg::Name reg_second, OptoReg::Name reg_first,
|
||||
OptoReg::Name reg_vec_second, OptoReg::Name reg_vec_first) {
|
||||
loadConLReplicatedNodesTuple nodes;
|
||||
|
||||
const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
|
||||
if (large_constant_pool) {
|
||||
// Create new nodes.
|
||||
loadConL_hiNode *m1 = new loadConL_hiNode();
|
||||
loadConL_loNode *m2 = new loadConL_loNode();
|
||||
mtvsrdNode *m3 = new mtvsrdNode();
|
||||
xxspltdNode *m4 = new xxspltdNode();
|
||||
|
||||
// inputs for new nodes
|
||||
m1->add_req(NULL, toc);
|
||||
m2->add_req(NULL, m1);
|
||||
m3->add_req(NULL, m2);
|
||||
m4->add_req(NULL, m3);
|
||||
|
||||
// operands for new nodes
|
||||
m1->_opnds[0] = new iRegLdstOper(); // dst
|
||||
m1->_opnds[1] = immSrc; // src
|
||||
m1->_opnds[2] = new iRegPdstOper(); // toc
|
||||
|
||||
m2->_opnds[0] = new iRegLdstOper(); // dst
|
||||
m2->_opnds[1] = immSrc; // src
|
||||
m2->_opnds[2] = new iRegLdstOper(); // base
|
||||
|
||||
m3->_opnds[0] = new vecXOper(); // dst
|
||||
m3->_opnds[1] = new iRegLdstOper(); // src
|
||||
|
||||
m4->_opnds[0] = new vecXOper(); // dst
|
||||
m4->_opnds[1] = new vecXOper(); // src
|
||||
m4->_opnds[2] = zero;
|
||||
|
||||
// Initialize ins_attrib TOC fields.
|
||||
m1->_const_toc_offset = -1;
|
||||
m2->_const_toc_offset_hi_node = m1;
|
||||
|
||||
// Initialize ins_attrib instruction offset.
|
||||
m1->_cbuf_insts_offset = -1;
|
||||
|
||||
// register allocation for new nodes
|
||||
ra_->set_pair(m1->_idx, reg_second, reg_first);
|
||||
ra_->set_pair(m2->_idx, reg_second, reg_first);
|
||||
ra_->set1(m3->_idx, reg_second);
|
||||
ra_->set2(m3->_idx, reg_vec_first);
|
||||
ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
|
||||
|
||||
// Create result.
|
||||
nodes._large_hi = m1;
|
||||
nodes._large_lo = m2;
|
||||
nodes._moved = m3;
|
||||
nodes._replicated = m4;
|
||||
nodes._small = NULL;
|
||||
nodes._last = nodes._replicated;
|
||||
assert(m2->bottom_type()->isa_long(), "must be long");
|
||||
} else {
|
||||
loadConLNode *m2 = new loadConLNode();
|
||||
mtvsrdNode *m3 = new mtvsrdNode();
|
||||
xxspltdNode *m4 = new xxspltdNode();
|
||||
|
||||
// inputs for new nodes
|
||||
m2->add_req(NULL, toc);
|
||||
|
||||
// operands for new nodes
|
||||
m2->_opnds[0] = new iRegLdstOper(); // dst
|
||||
m2->_opnds[1] = immSrc; // src
|
||||
m2->_opnds[2] = new iRegPdstOper(); // toc
|
||||
|
||||
m3->_opnds[0] = new vecXOper(); // dst
|
||||
m3->_opnds[1] = new iRegLdstOper(); // src
|
||||
|
||||
m4->_opnds[0] = new vecXOper(); // dst
|
||||
m4->_opnds[1] = new vecXOper(); // src
|
||||
m4->_opnds[2] = zero;
|
||||
|
||||
// Initialize ins_attrib instruction offset.
|
||||
m2->_cbuf_insts_offset = -1;
|
||||
ra_->set1(m3->_idx, reg_second);
|
||||
ra_->set2(m3->_idx, reg_vec_first);
|
||||
ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
|
||||
|
||||
// register allocation for new nodes
|
||||
ra_->set_pair(m2->_idx, reg_second, reg_first);
|
||||
|
||||
// Create result.
|
||||
nodes._large_hi = NULL;
|
||||
nodes._large_lo = NULL;
|
||||
nodes._small = m2;
|
||||
nodes._moved = m3;
|
||||
nodes._replicated = m4;
|
||||
nodes._last = nodes._replicated;
|
||||
assert(m2->bottom_type()->isa_long(), "must be long");
|
||||
}
|
||||
|
||||
return nodes;
|
||||
}
|
||||
|
||||
%} // source
|
||||
|
||||
encode %{
|
||||
@ -3212,6 +3525,27 @@ encode %{
|
||||
assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
|
||||
%}
|
||||
|
||||
enc_class postalloc_expand_load_replF_constant_vsx(vecX dst, immF src, iRegLdst toc) %{
|
||||
// Create new nodes.
|
||||
|
||||
// Make an operand with the bit pattern to load as float.
|
||||
immLOper *op_repl = new immLOper((jlong)replicate_immF(op_src->constantF()));
|
||||
immI_0Oper *op_zero = new immI_0Oper(0);
|
||||
|
||||
loadConLReplicatedNodesTuple loadConLNodes =
|
||||
loadConLReplicatedNodesTuple_create(C, ra_, n_toc, op_repl, op_dst, op_zero,
|
||||
OptoReg::Name(R20_H_num), OptoReg::Name(R20_num),
|
||||
OptoReg::Name(VSR11_num), OptoReg::Name(VSR10_num));
|
||||
|
||||
// Push new nodes.
|
||||
if (loadConLNodes._large_hi) { nodes->push(loadConLNodes._large_hi); }
|
||||
if (loadConLNodes._large_lo) { nodes->push(loadConLNodes._large_lo); }
|
||||
if (loadConLNodes._moved) { nodes->push(loadConLNodes._moved); }
|
||||
if (loadConLNodes._last) { nodes->push(loadConLNodes._last); }
|
||||
|
||||
assert(nodes->length() >= 1, "must have created at least 1 node");
|
||||
%}
|
||||
|
||||
// This enc_class is needed so that scheduler gets proper
|
||||
// input mapping for latency computation.
|
||||
enc_class enc_poll(immI dst, iRegLdst poll) %{
|
||||
@ -3840,6 +4174,14 @@ ins_attrib ins_field_load_ic_node(0);
|
||||
//
|
||||
// Formats are generated automatically for constants and base registers.
|
||||
|
||||
operand vecX() %{
|
||||
constraint(ALLOC_IN_RC(vs_reg));
|
||||
match(VecX);
|
||||
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
//----------Simple Operands----------------------------------------------------
|
||||
// Immediate Operands
|
||||
|
||||
@ -5372,6 +5714,20 @@ instruct loadV8(iRegLdst dst, memoryAlg4 mem) %{
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
// Load Aligned Packed Byte
|
||||
instruct loadV16(vecX dst, indirect mem) %{
|
||||
predicate(n->as_LoadVector()->memory_size() == 16);
|
||||
match(Set dst (LoadVector mem));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
format %{ "LXVD2X $dst, $mem \t// load 16-byte Vector" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ lxvd2x($dst$$VectorSRegister, $mem$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
// Load Range, range = array length (=jint)
|
||||
instruct loadRange(iRegIdst dst, memory mem) %{
|
||||
match(Set dst (LoadRange mem));
|
||||
@ -6368,6 +6724,20 @@ instruct storeA8B(memoryAlg4 mem, iRegLsrc src) %{
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
// Store Packed Byte long register to memory
|
||||
instruct storeV16(indirect mem, vecX src) %{
|
||||
predicate(n->as_StoreVector()->memory_size() == 16);
|
||||
match(Set mem (StoreVector mem src));
|
||||
ins_cost(MEMORY_REF_COST);
|
||||
|
||||
format %{ "STXVD2X $mem, $src \t// store 16-byte Vector" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ stxvd2x($src$$VectorSRegister, $mem$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
// Store Compressed Oop
|
||||
instruct storeN(memory dst, iRegN_P2N src) %{
|
||||
match(Set dst (StoreN dst src));
|
||||
@ -13239,6 +13609,26 @@ instruct storeS_reversed(iRegIsrc src, indirect mem) %{
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct mtvsrwz(vecX temp1, iRegIsrc src) %{
|
||||
effect(DEF temp1, USE src);
|
||||
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ mtvsrwz($temp1$$VectorSRegister, $src$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct xxspltw(vecX dst, vecX src, immI8 imm1) %{
|
||||
effect(DEF dst, USE src, USE imm1);
|
||||
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ xxspltw($dst$$VectorSRegister, $src$$VectorSRegister, $imm1$$constant);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
//---------- Replicate Vector Instructions ------------------------------------
|
||||
|
||||
// Insrdi does replicate if src == dst.
|
||||
@ -13318,6 +13708,46 @@ instruct repl8B_immIminus1(iRegLdst dst, immI_minus1 src) %{
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct repl16B_reg_Ex(vecX dst, iRegIsrc src) %{
|
||||
match(Set dst (ReplicateB src));
|
||||
predicate(n->as_Vector()->length() == 16);
|
||||
|
||||
expand %{
|
||||
iRegLdst tmpL;
|
||||
vecX tmpV;
|
||||
immI8 imm1 %{ (int) 1 %}
|
||||
moveReg(tmpL, src);
|
||||
repl56(tmpL);
|
||||
repl48(tmpL);
|
||||
mtvsrwz(tmpV, tmpL);
|
||||
xxspltw(dst, tmpV, imm1);
|
||||
%}
|
||||
%}
|
||||
|
||||
instruct repl16B_immI0(vecX dst, immI_0 zero) %{
|
||||
match(Set dst (ReplicateB zero));
|
||||
predicate(n->as_Vector()->length() == 16);
|
||||
|
||||
format %{ "XXLXOR $dst, $zero \t// replicate16B" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct repl16B_immIminus1(vecX dst, immI_minus1 src) %{
|
||||
match(Set dst (ReplicateB src));
|
||||
predicate(n->as_Vector()->length() == 16);
|
||||
|
||||
format %{ "XXLEQV $dst, $src \t// replicate16B" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct repl4S_reg_Ex(iRegLdst dst, iRegIsrc src) %{
|
||||
match(Set dst (ReplicateS src));
|
||||
predicate(n->as_Vector()->length() == 4);
|
||||
@ -13352,6 +13782,46 @@ instruct repl4S_immIminus1(iRegLdst dst, immI_minus1 src) %{
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct repl8S_reg_Ex(vecX dst, iRegIsrc src) %{
|
||||
match(Set dst (ReplicateS src));
|
||||
predicate(n->as_Vector()->length() == 8);
|
||||
|
||||
expand %{
|
||||
iRegLdst tmpL;
|
||||
vecX tmpV;
|
||||
immI8 zero %{ (int) 0 %}
|
||||
moveReg(tmpL, src);
|
||||
repl48(tmpL);
|
||||
repl32(tmpL);
|
||||
mtvsrd(tmpV, tmpL);
|
||||
xxpermdi(dst, tmpV, tmpV, zero);
|
||||
%}
|
||||
%}
|
||||
|
||||
instruct repl8S_immI0(vecX dst, immI_0 zero) %{
|
||||
match(Set dst (ReplicateS zero));
|
||||
predicate(n->as_Vector()->length() == 8);
|
||||
|
||||
format %{ "XXLXOR $dst, $zero \t// replicate8S" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct repl8S_immIminus1(vecX dst, immI_minus1 src) %{
|
||||
match(Set dst (ReplicateS src));
|
||||
predicate(n->as_Vector()->length() == 8);
|
||||
|
||||
format %{ "XXLEQV $dst, $src \t// replicate16B" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct repl2I_reg_Ex(iRegLdst dst, iRegIsrc src) %{
|
||||
match(Set dst (ReplicateI src));
|
||||
predicate(n->as_Vector()->length() == 2);
|
||||
@ -13386,6 +13856,46 @@ instruct repl2I_immIminus1(iRegLdst dst, immI_minus1 src) %{
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct repl4I_reg_Ex(vecX dst, iRegIsrc src) %{
|
||||
match(Set dst (ReplicateI src));
|
||||
predicate(n->as_Vector()->length() == 4);
|
||||
ins_cost(2 * DEFAULT_COST);
|
||||
|
||||
expand %{
|
||||
iRegLdst tmpL;
|
||||
vecX tmpV;
|
||||
immI8 zero %{ (int) 0 %}
|
||||
moveReg(tmpL, src);
|
||||
repl32(tmpL);
|
||||
mtvsrd(tmpV, tmpL);
|
||||
xxpermdi(dst, tmpV, tmpV, zero);
|
||||
%}
|
||||
%}
|
||||
|
||||
instruct repl4I_immI0(vecX dst, immI_0 zero) %{
|
||||
match(Set dst (ReplicateI zero));
|
||||
predicate(n->as_Vector()->length() == 4);
|
||||
|
||||
format %{ "XXLXOR $dst, $zero \t// replicate4I" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct repl4I_immIminus1(vecX dst, immI_minus1 src) %{
|
||||
match(Set dst (ReplicateI src));
|
||||
predicate(n->as_Vector()->length() == 4);
|
||||
|
||||
format %{ "XXLEQV $dst, $dst, $dst \t// replicate4I" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
// Move float to int register via stack, replicate.
|
||||
instruct repl2F_reg_Ex(iRegLdst dst, regF src) %{
|
||||
match(Set dst (ReplicateF src));
|
||||
@ -13484,6 +13994,154 @@ instruct overflowMulL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
|
||||
%}
|
||||
|
||||
|
||||
instruct repl4F_reg_Ex(vecX dst, regF src) %{
|
||||
match(Set dst (ReplicateF src));
|
||||
predicate(n->as_Vector()->length() == 4);
|
||||
ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST);
|
||||
expand %{
|
||||
stackSlotL tmpS;
|
||||
iRegIdst tmpI;
|
||||
iRegLdst tmpL;
|
||||
vecX tmpV;
|
||||
immI8 zero %{ (int) 0 %}
|
||||
|
||||
moveF2I_reg_stack(tmpS, src); // Move float to stack.
|
||||
moveF2I_stack_reg(tmpI, tmpS); // Move stack to int reg.
|
||||
moveReg(tmpL, tmpI); // Move int to long reg.
|
||||
repl32(tmpL); // Replicate bitpattern.
|
||||
mtvsrd(tmpV, tmpL);
|
||||
xxpermdi(dst, tmpV, tmpV, zero);
|
||||
%}
|
||||
%}
|
||||
|
||||
instruct repl4F_immF_Ex(vecX dst, immF src) %{
|
||||
match(Set dst (ReplicateF src));
|
||||
predicate(n->as_Vector()->length() == 4);
|
||||
ins_cost(10 * DEFAULT_COST);
|
||||
|
||||
postalloc_expand( postalloc_expand_load_replF_constant_vsx(dst, src, constanttablebase) );
|
||||
%}
|
||||
|
||||
instruct repl4F_immF0(vecX dst, immF_0 zero) %{
|
||||
match(Set dst (ReplicateF zero));
|
||||
predicate(n->as_Vector()->length() == 4);
|
||||
|
||||
format %{ "XXLXOR $dst, $zero \t// replicate4F" %}
|
||||
ins_encode %{
|
||||
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct repl2D_reg_Ex(vecX dst, regD src) %{
|
||||
match(Set dst (ReplicateD src));
|
||||
predicate(n->as_Vector()->length() == 2);
|
||||
expand %{
|
||||
stackSlotL tmpS;
|
||||
iRegLdst tmpL;
|
||||
iRegLdst tmp;
|
||||
vecX tmpV;
|
||||
immI8 zero %{ (int) 0 %}
|
||||
moveD2L_reg_stack(tmpS, src);
|
||||
moveD2L_stack_reg(tmpL, tmpS);
|
||||
mtvsrd(tmpV, tmpL);
|
||||
xxpermdi(dst, tmpV, tmpV, zero);
|
||||
%}
|
||||
%}
|
||||
|
||||
instruct repl2D_immI0(vecX dst, immI_0 zero) %{
|
||||
match(Set dst (ReplicateD zero));
|
||||
predicate(n->as_Vector()->length() == 2);
|
||||
|
||||
format %{ "XXLXOR $dst, $zero \t// replicate2D" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct repl2D_immIminus1(vecX dst, immI_minus1 src) %{
|
||||
match(Set dst (ReplicateD src));
|
||||
predicate(n->as_Vector()->length() == 2);
|
||||
|
||||
format %{ "XXLEQV $dst, $src \t// replicate16B" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct mtvsrd(vecX dst, iRegLsrc src) %{
|
||||
predicate(false);
|
||||
effect(DEF dst, USE src);
|
||||
|
||||
format %{ "MTVSRD $dst, $src \t// Move to 16-byte register"%}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ mtvsrd($dst$$VectorSRegister, $src$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct xxspltd(vecX dst, vecX src, immI8 zero) %{
|
||||
effect(DEF dst, USE src, USE zero);
|
||||
|
||||
format %{ "XXSPLATD $dst, $src, $zero \t// Permute 16-byte register"%}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ xxpermdi($dst$$VectorSRegister, $src$$VectorSRegister, $src$$VectorSRegister, $zero$$constant);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct xxpermdi(vecX dst, vecX src1, vecX src2, immI8 zero) %{
|
||||
effect(DEF dst, USE src1, USE src2, USE zero);
|
||||
|
||||
format %{ "XXPERMDI $dst, $src1, $src2, $zero \t// Permute 16-byte register"%}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ xxpermdi($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister, $zero$$constant);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct repl2L_reg_Ex(vecX dst, iRegLsrc src) %{
|
||||
match(Set dst (ReplicateL src));
|
||||
predicate(n->as_Vector()->length() == 2);
|
||||
expand %{
|
||||
vecX tmpV;
|
||||
immI8 zero %{ (int) 0 %}
|
||||
mtvsrd(tmpV, src);
|
||||
xxpermdi(dst, tmpV, tmpV, zero);
|
||||
%}
|
||||
%}
|
||||
|
||||
instruct repl2L_immI0(vecX dst, immI_0 zero) %{
|
||||
match(Set dst (ReplicateL zero));
|
||||
predicate(n->as_Vector()->length() == 2);
|
||||
|
||||
format %{ "XXLXOR $dst, $zero \t// replicate2L" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct repl2L_immIminus1(vecX dst, immI_minus1 src) %{
|
||||
match(Set dst (ReplicateL src));
|
||||
predicate(n->as_Vector()->length() == 2);
|
||||
|
||||
format %{ "XXLEQV $dst, $src \t// replicate16B" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
// ============================================================================
|
||||
// Safepoint Instruction
|
||||
|
||||
|
@ -31,3 +31,5 @@
|
||||
REGISTER_DEFINITION(Register, noreg);
|
||||
|
||||
REGISTER_DEFINITION(FloatRegister, fnoreg);
|
||||
|
||||
REGISTER_DEFINITION(VectorSRegister, vsnoreg);
|
||||
|
@ -677,7 +677,7 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
|
||||
* 2 // register halves
|
||||
+ ConditionRegisterImpl::number_of_registers // condition code registers
|
||||
+ SpecialRegisterImpl::number_of_registers // special registers
|
||||
+ VectorRegisterImpl::number_of_registers // VSX registers
|
||||
+ VectorSRegisterImpl::number_of_registers // VSX registers
|
||||
};
|
||||
|
||||
static const int max_gpr;
|
||||
|
@ -479,8 +479,8 @@ void RegisterSaver::restore_result_registers(MacroAssembler* masm, int frame_siz
|
||||
|
||||
// Is vector's size (in bytes) bigger than a size saved by default?
|
||||
bool SharedRuntime::is_wide_vector(int size) {
|
||||
// Note, MaxVectorSize == 8 on PPC64.
|
||||
assert(size <= 8, "%d bytes vectors are not supported", size);
|
||||
// Note, MaxVectorSize == 8/16 on PPC64.
|
||||
assert(size <= (SuperwordUseVSX ? 16 : 8), "%d bytes vectors are not supported", size);
|
||||
return size > 8;
|
||||
}
|
||||
|
||||
@ -2234,9 +2234,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
|
||||
__ release();
|
||||
// TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
|
||||
__ stw(R0, thread_(thread_state));
|
||||
if (UseMembar) {
|
||||
__ fence();
|
||||
}
|
||||
|
||||
|
||||
// The JNI call
|
||||
@ -2393,9 +2390,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
|
||||
__ release();
|
||||
// TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
|
||||
__ stw(R0, thread_(thread_state));
|
||||
if (UseMembar) {
|
||||
__ fence();
|
||||
}
|
||||
__ bind(after_transition);
|
||||
|
||||
// Reguard any pages if necessary.
|
||||
|
@ -2667,7 +2667,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
// Arguments for generated stub (little endian only):
|
||||
// Arguments for generated stub:
|
||||
// R3_ARG1 - source byte array address
|
||||
// R4_ARG2 - destination byte array address
|
||||
// R5_ARG3 - round key array
|
||||
@ -2686,7 +2686,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
Register keylen = R8;
|
||||
Register temp = R9;
|
||||
Register keypos = R10;
|
||||
Register hex = R11;
|
||||
Register fifteen = R12;
|
||||
|
||||
VectorRegister vRet = VR0;
|
||||
@ -2706,164 +2705,170 @@ class StubGenerator: public StubCodeGenerator {
|
||||
VectorRegister vTmp3 = VR11;
|
||||
VectorRegister vTmp4 = VR12;
|
||||
|
||||
VectorRegister vLow = VR13;
|
||||
VectorRegister vHigh = VR14;
|
||||
|
||||
__ li (hex, 16);
|
||||
__ li (fifteen, 15);
|
||||
__ vspltisb (fSplt, 0x0f);
|
||||
|
||||
// load unaligned from[0-15] to vsRet
|
||||
__ lvx (vRet, from);
|
||||
__ lvx (vTmp1, fifteen, from);
|
||||
__ lvsl (fromPerm, from);
|
||||
#ifdef VM_LITTLE_ENDIAN
|
||||
__ vspltisb (fSplt, 0x0f);
|
||||
__ vxor (fromPerm, fromPerm, fSplt);
|
||||
#endif
|
||||
__ vperm (vRet, vRet, vTmp1, fromPerm);
|
||||
|
||||
// load keylen (44 or 52 or 60)
|
||||
__ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
|
||||
|
||||
// to load keys
|
||||
__ lvsr (keyPerm, key);
|
||||
__ vxor (vTmp2, vTmp2, vTmp2);
|
||||
__ load_perm (keyPerm, key);
|
||||
#ifdef VM_LITTLE_ENDIAN
|
||||
__ vspltisb (vTmp2, -16);
|
||||
__ vrld (keyPerm, keyPerm, vTmp2);
|
||||
__ vrld (keyPerm, keyPerm, vTmp2);
|
||||
__ vsldoi (keyPerm, keyPerm, keyPerm, 8);
|
||||
#endif
|
||||
|
||||
// load the 1st round key to vKey1
|
||||
__ li (keypos, 0);
|
||||
// load the 1st round key to vTmp1
|
||||
__ lvx (vTmp1, key);
|
||||
__ li (keypos, 16);
|
||||
__ lvx (vKey1, keypos, key);
|
||||
__ addi (keypos, keypos, 16);
|
||||
__ lvx (vTmp1, keypos, key);
|
||||
__ vperm (vKey1, vTmp1, vKey1, keyPerm);
|
||||
__ vec_perm (vTmp1, vKey1, keyPerm);
|
||||
|
||||
// 1st round
|
||||
__ vxor (vRet, vRet, vKey1);
|
||||
__ vxor (vRet, vRet, vTmp1);
|
||||
|
||||
// load the 2nd round key to vKey1
|
||||
__ addi (keypos, keypos, 16);
|
||||
__ lvx (vTmp2, keypos, key);
|
||||
__ vperm (vKey1, vTmp2, vTmp1, keyPerm);
|
||||
__ li (keypos, 32);
|
||||
__ lvx (vKey2, keypos, key);
|
||||
__ vec_perm (vKey1, vKey2, keyPerm);
|
||||
|
||||
// load the 3rd round key to vKey2
|
||||
__ addi (keypos, keypos, 16);
|
||||
__ lvx (vTmp1, keypos, key);
|
||||
__ vperm (vKey2, vTmp1, vTmp2, keyPerm);
|
||||
__ li (keypos, 48);
|
||||
__ lvx (vKey3, keypos, key);
|
||||
__ vec_perm (vKey2, vKey3, keyPerm);
|
||||
|
||||
// load the 4th round key to vKey3
|
||||
__ addi (keypos, keypos, 16);
|
||||
__ lvx (vTmp2, keypos, key);
|
||||
__ vperm (vKey3, vTmp2, vTmp1, keyPerm);
|
||||
__ li (keypos, 64);
|
||||
__ lvx (vKey4, keypos, key);
|
||||
__ vec_perm (vKey3, vKey4, keyPerm);
|
||||
|
||||
// load the 5th round key to vKey4
|
||||
__ addi (keypos, keypos, 16);
|
||||
__ li (keypos, 80);
|
||||
__ lvx (vTmp1, keypos, key);
|
||||
__ vperm (vKey4, vTmp1, vTmp2, keyPerm);
|
||||
__ vec_perm (vKey4, vTmp1, keyPerm);
|
||||
|
||||
// 2nd - 5th rounds
|
||||
__ vcipher (vRet, vRet, vKey1);
|
||||
__ vcipher (vRet, vRet, vKey2);
|
||||
__ vcipher (vRet, vRet, vKey3);
|
||||
__ vcipher (vRet, vRet, vKey4);
|
||||
__ vcipher (vRet, vRet, vKey1);
|
||||
__ vcipher (vRet, vRet, vKey2);
|
||||
__ vcipher (vRet, vRet, vKey3);
|
||||
__ vcipher (vRet, vRet, vKey4);
|
||||
|
||||
// load the 6th round key to vKey1
|
||||
__ addi (keypos, keypos, 16);
|
||||
__ lvx (vTmp2, keypos, key);
|
||||
__ vperm (vKey1, vTmp2, vTmp1, keyPerm);
|
||||
__ li (keypos, 96);
|
||||
__ lvx (vKey2, keypos, key);
|
||||
__ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
|
||||
|
||||
// load the 7th round key to vKey2
|
||||
__ addi (keypos, keypos, 16);
|
||||
__ lvx (vTmp1, keypos, key);
|
||||
__ vperm (vKey2, vTmp1, vTmp2, keyPerm);
|
||||
__ li (keypos, 112);
|
||||
__ lvx (vKey3, keypos, key);
|
||||
__ vec_perm (vKey2, vKey3, keyPerm);
|
||||
|
||||
// load the 8th round key to vKey3
|
||||
__ addi (keypos, keypos, 16);
|
||||
__ lvx (vTmp2, keypos, key);
|
||||
__ vperm (vKey3, vTmp2, vTmp1, keyPerm);
|
||||
__ li (keypos, 128);
|
||||
__ lvx (vKey4, keypos, key);
|
||||
__ vec_perm (vKey3, vKey4, keyPerm);
|
||||
|
||||
// load the 9th round key to vKey4
|
||||
__ addi (keypos, keypos, 16);
|
||||
__ li (keypos, 144);
|
||||
__ lvx (vTmp1, keypos, key);
|
||||
__ vperm (vKey4, vTmp1, vTmp2, keyPerm);
|
||||
__ vec_perm (vKey4, vTmp1, keyPerm);
|
||||
|
||||
// 6th - 9th rounds
|
||||
__ vcipher (vRet, vRet, vKey1);
|
||||
__ vcipher (vRet, vRet, vKey2);
|
||||
__ vcipher (vRet, vRet, vKey3);
|
||||
__ vcipher (vRet, vRet, vKey4);
|
||||
__ vcipher (vRet, vRet, vKey1);
|
||||
__ vcipher (vRet, vRet, vKey2);
|
||||
__ vcipher (vRet, vRet, vKey3);
|
||||
__ vcipher (vRet, vRet, vKey4);
|
||||
|
||||
// load the 10th round key to vKey1
|
||||
__ addi (keypos, keypos, 16);
|
||||
__ lvx (vTmp2, keypos, key);
|
||||
__ vperm (vKey1, vTmp2, vTmp1, keyPerm);
|
||||
__ li (keypos, 160);
|
||||
__ lvx (vKey2, keypos, key);
|
||||
__ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
|
||||
|
||||
// load the 11th round key to vKey2
|
||||
__ addi (keypos, keypos, 16);
|
||||
__ li (keypos, 176);
|
||||
__ lvx (vTmp1, keypos, key);
|
||||
__ vperm (vKey2, vTmp1, vTmp2, keyPerm);
|
||||
__ vec_perm (vKey2, vTmp1, keyPerm);
|
||||
|
||||
// if all round keys are loaded, skip next 4 rounds
|
||||
__ cmpwi (CCR0, keylen, 44);
|
||||
__ beq (CCR0, L_doLast);
|
||||
|
||||
// 10th - 11th rounds
|
||||
__ vcipher (vRet, vRet, vKey1);
|
||||
__ vcipher (vRet, vRet, vKey2);
|
||||
__ vcipher (vRet, vRet, vKey1);
|
||||
__ vcipher (vRet, vRet, vKey2);
|
||||
|
||||
// load the 12th round key to vKey1
|
||||
__ addi (keypos, keypos, 16);
|
||||
__ lvx (vTmp2, keypos, key);
|
||||
__ vperm (vKey1, vTmp2, vTmp1, keyPerm);
|
||||
__ li (keypos, 192);
|
||||
__ lvx (vKey2, keypos, key);
|
||||
__ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
|
||||
|
||||
// load the 13th round key to vKey2
|
||||
__ addi (keypos, keypos, 16);
|
||||
__ li (keypos, 208);
|
||||
__ lvx (vTmp1, keypos, key);
|
||||
__ vperm (vKey2, vTmp1, vTmp2, keyPerm);
|
||||
__ vec_perm (vKey2, vTmp1, keyPerm);
|
||||
|
||||
// if all round keys are loaded, skip next 2 rounds
|
||||
__ cmpwi (CCR0, keylen, 52);
|
||||
__ beq (CCR0, L_doLast);
|
||||
|
||||
// 12th - 13th rounds
|
||||
__ vcipher (vRet, vRet, vKey1);
|
||||
__ vcipher (vRet, vRet, vKey2);
|
||||
__ vcipher (vRet, vRet, vKey1);
|
||||
__ vcipher (vRet, vRet, vKey2);
|
||||
|
||||
// load the 14th round key to vKey1
|
||||
__ addi (keypos, keypos, 16);
|
||||
__ lvx (vTmp2, keypos, key);
|
||||
__ vperm (vKey1, vTmp2, vTmp1, keyPerm);
|
||||
__ li (keypos, 224);
|
||||
__ lvx (vKey2, keypos, key);
|
||||
__ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
|
||||
|
||||
// load the 15th round key to vKey2
|
||||
__ addi (keypos, keypos, 16);
|
||||
__ li (keypos, 240);
|
||||
__ lvx (vTmp1, keypos, key);
|
||||
__ vperm (vKey2, vTmp1, vTmp2, keyPerm);
|
||||
__ vec_perm (vKey2, vTmp1, keyPerm);
|
||||
|
||||
__ bind(L_doLast);
|
||||
|
||||
// last two rounds
|
||||
__ vcipher (vRet, vRet, vKey1);
|
||||
__ vcipherlast (vRet, vRet, vKey2);
|
||||
__ vcipher (vRet, vRet, vKey1);
|
||||
__ vcipherlast (vRet, vRet, vKey2);
|
||||
|
||||
__ neg (temp, to);
|
||||
__ lvsr (toPerm, temp);
|
||||
__ vspltisb (vTmp2, -1);
|
||||
__ vxor (vTmp1, vTmp1, vTmp1);
|
||||
__ vperm (vTmp2, vTmp2, vTmp1, toPerm);
|
||||
__ vxor (toPerm, toPerm, fSplt);
|
||||
// store result (unaligned)
|
||||
#ifdef VM_LITTLE_ENDIAN
|
||||
__ lvsl (toPerm, to);
|
||||
#else
|
||||
__ lvsr (toPerm, to);
|
||||
#endif
|
||||
__ vspltisb (vTmp3, -1);
|
||||
__ vspltisb (vTmp4, 0);
|
||||
__ lvx (vTmp1, to);
|
||||
__ vperm (vRet, vRet, vRet, toPerm);
|
||||
__ vsel (vTmp1, vTmp1, vRet, vTmp2);
|
||||
__ lvx (vTmp4, fifteen, to);
|
||||
__ lvx (vTmp2, fifteen, to);
|
||||
#ifdef VM_LITTLE_ENDIAN
|
||||
__ vperm (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask
|
||||
__ vxor (toPerm, toPerm, fSplt); // swap bytes
|
||||
#else
|
||||
__ vperm (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask
|
||||
#endif
|
||||
__ vperm (vTmp4, vRet, vRet, toPerm); // rotate data
|
||||
__ vsel (vTmp2, vTmp4, vTmp2, vTmp3);
|
||||
__ vsel (vTmp1, vTmp1, vTmp4, vTmp3);
|
||||
__ stvx (vTmp2, fifteen, to); // store this one first (may alias)
|
||||
__ stvx (vTmp1, to);
|
||||
__ vsel (vRet, vRet, vTmp4, vTmp2);
|
||||
__ stvx (vRet, fifteen, to);
|
||||
|
||||
__ blr();
|
||||
return start;
|
||||
}
|
||||
|
||||
// Arguments for generated stub (little endian only):
|
||||
// Arguments for generated stub:
|
||||
// R3_ARG1 - source byte array address
|
||||
// R4_ARG2 - destination byte array address
|
||||
// R5_ARG3 - K (key) in little endian int array
|
||||
@ -2885,7 +2890,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
Register keylen = R8;
|
||||
Register temp = R9;
|
||||
Register keypos = R10;
|
||||
Register hex = R11;
|
||||
Register fifteen = R12;
|
||||
|
||||
VectorRegister vRet = VR0;
|
||||
@ -2906,30 +2910,30 @@ class StubGenerator: public StubCodeGenerator {
|
||||
VectorRegister vTmp3 = VR12;
|
||||
VectorRegister vTmp4 = VR13;
|
||||
|
||||
VectorRegister vLow = VR14;
|
||||
VectorRegister vHigh = VR15;
|
||||
|
||||
__ li (hex, 16);
|
||||
__ li (fifteen, 15);
|
||||
__ vspltisb (fSplt, 0x0f);
|
||||
|
||||
// load unaligned from[0-15] to vsRet
|
||||
__ lvx (vRet, from);
|
||||
__ lvx (vTmp1, fifteen, from);
|
||||
__ lvsl (fromPerm, from);
|
||||
#ifdef VM_LITTLE_ENDIAN
|
||||
__ vspltisb (fSplt, 0x0f);
|
||||
__ vxor (fromPerm, fromPerm, fSplt);
|
||||
#endif
|
||||
__ vperm (vRet, vRet, vTmp1, fromPerm); // align [and byte swap in LE]
|
||||
|
||||
// load keylen (44 or 52 or 60)
|
||||
__ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
|
||||
|
||||
// to load keys
|
||||
__ lvsr (keyPerm, key);
|
||||
__ load_perm (keyPerm, key);
|
||||
#ifdef VM_LITTLE_ENDIAN
|
||||
__ vxor (vTmp2, vTmp2, vTmp2);
|
||||
__ vspltisb (vTmp2, -16);
|
||||
__ vrld (keyPerm, keyPerm, vTmp2);
|
||||
__ vrld (keyPerm, keyPerm, vTmp2);
|
||||
__ vsldoi (keyPerm, keyPerm, keyPerm, 8);
|
||||
#endif
|
||||
|
||||
__ cmpwi (CCR0, keylen, 44);
|
||||
__ beq (CCR0, L_do44);
|
||||
@ -2937,32 +2941,32 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ cmpwi (CCR0, keylen, 52);
|
||||
__ beq (CCR0, L_do52);
|
||||
|
||||
// load the 15th round key to vKey11
|
||||
// load the 15th round key to vKey1
|
||||
__ li (keypos, 240);
|
||||
__ lvx (vKey1, keypos, key);
|
||||
__ li (keypos, 224);
|
||||
__ lvx (vKey2, keypos, key);
|
||||
__ vec_perm (vKey1, vKey2, vKey1, keyPerm);
|
||||
|
||||
// load the 14th round key to vKey2
|
||||
__ li (keypos, 208);
|
||||
__ lvx (vKey3, keypos, key);
|
||||
__ vec_perm (vKey2, vKey3, vKey2, keyPerm);
|
||||
|
||||
// load the 13th round key to vKey3
|
||||
__ li (keypos, 192);
|
||||
__ lvx (vKey4, keypos, key);
|
||||
__ vec_perm (vKey3, vKey4, vKey3, keyPerm);
|
||||
|
||||
// load the 12th round key to vKey4
|
||||
__ li (keypos, 176);
|
||||
__ lvx (vKey5, keypos, key);
|
||||
__ vec_perm (vKey4, vKey5, vKey4, keyPerm);
|
||||
|
||||
// load the 11th round key to vKey5
|
||||
__ li (keypos, 160);
|
||||
__ lvx (vTmp1, keypos, key);
|
||||
__ addi (keypos, keypos, -16);
|
||||
__ lvx (vTmp2, keypos, key);
|
||||
__ vperm (vKey1, vTmp1, vTmp2, keyPerm);
|
||||
|
||||
// load the 14th round key to vKey10
|
||||
__ addi (keypos, keypos, -16);
|
||||
__ lvx (vTmp1, keypos, key);
|
||||
__ vperm (vKey2, vTmp2, vTmp1, keyPerm);
|
||||
|
||||
// load the 13th round key to vKey10
|
||||
__ addi (keypos, keypos, -16);
|
||||
__ lvx (vTmp2, keypos, key);
|
||||
__ vperm (vKey3, vTmp1, vTmp2, keyPerm);
|
||||
|
||||
// load the 12th round key to vKey10
|
||||
__ addi (keypos, keypos, -16);
|
||||
__ lvx (vTmp1, keypos, key);
|
||||
__ vperm (vKey4, vTmp2, vTmp1, keyPerm);
|
||||
|
||||
// load the 11th round key to vKey10
|
||||
__ addi (keypos, keypos, -16);
|
||||
__ lvx (vTmp2, keypos, key);
|
||||
__ vperm (vKey5, vTmp1, vTmp2, keyPerm);
|
||||
__ vec_perm (vKey5, vTmp1, vKey5, keyPerm);
|
||||
|
||||
// 1st - 5th rounds
|
||||
__ vxor (vRet, vRet, vKey1);
|
||||
@ -2975,22 +2979,22 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
__ bind (L_do52);
|
||||
|
||||
// load the 13th round key to vKey11
|
||||
// load the 13th round key to vKey1
|
||||
__ li (keypos, 208);
|
||||
__ lvx (vTmp1, keypos, key);
|
||||
__ addi (keypos, keypos, -16);
|
||||
__ lvx (vTmp2, keypos, key);
|
||||
__ vperm (vKey1, vTmp1, vTmp2, keyPerm);
|
||||
__ lvx (vKey1, keypos, key);
|
||||
__ li (keypos, 192);
|
||||
__ lvx (vKey2, keypos, key);
|
||||
__ vec_perm (vKey1, vKey2, vKey1, keyPerm);
|
||||
|
||||
// load the 12th round key to vKey10
|
||||
__ addi (keypos, keypos, -16);
|
||||
__ lvx (vTmp1, keypos, key);
|
||||
__ vperm (vKey2, vTmp2, vTmp1, keyPerm);
|
||||
// load the 12th round key to vKey2
|
||||
__ li (keypos, 176);
|
||||
__ lvx (vKey3, keypos, key);
|
||||
__ vec_perm (vKey2, vKey3, vKey2, keyPerm);
|
||||
|
||||
// load the 11th round key to vKey10
|
||||
__ addi (keypos, keypos, -16);
|
||||
__ lvx (vTmp2, keypos, key);
|
||||
__ vperm (vKey3, vTmp1, vTmp2, keyPerm);
|
||||
// load the 11th round key to vKey3
|
||||
__ li (keypos, 160);
|
||||
__ lvx (vTmp1, keypos, key);
|
||||
__ vec_perm (vKey3, vTmp1, vKey3, keyPerm);
|
||||
|
||||
// 1st - 3rd rounds
|
||||
__ vxor (vRet, vRet, vKey1);
|
||||
@ -3001,42 +3005,42 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
__ bind (L_do44);
|
||||
|
||||
// load the 11th round key to vKey11
|
||||
// load the 11th round key to vKey1
|
||||
__ li (keypos, 176);
|
||||
__ lvx (vKey1, keypos, key);
|
||||
__ li (keypos, 160);
|
||||
__ lvx (vTmp1, keypos, key);
|
||||
__ addi (keypos, keypos, -16);
|
||||
__ lvx (vTmp2, keypos, key);
|
||||
__ vperm (vKey1, vTmp1, vTmp2, keyPerm);
|
||||
__ vec_perm (vKey1, vTmp1, vKey1, keyPerm);
|
||||
|
||||
// 1st round
|
||||
__ vxor (vRet, vRet, vKey1);
|
||||
|
||||
__ bind (L_doLast);
|
||||
|
||||
// load the 10th round key to vKey10
|
||||
__ addi (keypos, keypos, -16);
|
||||
// load the 10th round key to vKey1
|
||||
__ li (keypos, 144);
|
||||
__ lvx (vKey2, keypos, key);
|
||||
__ vec_perm (vKey1, vKey2, vTmp1, keyPerm);
|
||||
|
||||
// load the 9th round key to vKey2
|
||||
__ li (keypos, 128);
|
||||
__ lvx (vKey3, keypos, key);
|
||||
__ vec_perm (vKey2, vKey3, vKey2, keyPerm);
|
||||
|
||||
// load the 8th round key to vKey3
|
||||
__ li (keypos, 112);
|
||||
__ lvx (vKey4, keypos, key);
|
||||
__ vec_perm (vKey3, vKey4, vKey3, keyPerm);
|
||||
|
||||
// load the 7th round key to vKey4
|
||||
__ li (keypos, 96);
|
||||
__ lvx (vKey5, keypos, key);
|
||||
__ vec_perm (vKey4, vKey5, vKey4, keyPerm);
|
||||
|
||||
// load the 6th round key to vKey5
|
||||
__ li (keypos, 80);
|
||||
__ lvx (vTmp1, keypos, key);
|
||||
__ vperm (vKey1, vTmp2, vTmp1, keyPerm);
|
||||
|
||||
// load the 9th round key to vKey10
|
||||
__ addi (keypos, keypos, -16);
|
||||
__ lvx (vTmp2, keypos, key);
|
||||
__ vperm (vKey2, vTmp1, vTmp2, keyPerm);
|
||||
|
||||
// load the 8th round key to vKey10
|
||||
__ addi (keypos, keypos, -16);
|
||||
__ lvx (vTmp1, keypos, key);
|
||||
__ vperm (vKey3, vTmp2, vTmp1, keyPerm);
|
||||
|
||||
// load the 7th round key to vKey10
|
||||
__ addi (keypos, keypos, -16);
|
||||
__ lvx (vTmp2, keypos, key);
|
||||
__ vperm (vKey4, vTmp1, vTmp2, keyPerm);
|
||||
|
||||
// load the 6th round key to vKey10
|
||||
__ addi (keypos, keypos, -16);
|
||||
__ lvx (vTmp1, keypos, key);
|
||||
__ vperm (vKey5, vTmp2, vTmp1, keyPerm);
|
||||
__ vec_perm (vKey5, vTmp1, vKey5, keyPerm);
|
||||
|
||||
// last 10th - 6th rounds
|
||||
__ vncipher (vRet, vRet, vKey1);
|
||||
@ -3045,30 +3049,29 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ vncipher (vRet, vRet, vKey4);
|
||||
__ vncipher (vRet, vRet, vKey5);
|
||||
|
||||
// load the 5th round key to vKey10
|
||||
__ addi (keypos, keypos, -16);
|
||||
__ lvx (vTmp2, keypos, key);
|
||||
__ vperm (vKey1, vTmp1, vTmp2, keyPerm);
|
||||
// load the 5th round key to vKey1
|
||||
__ li (keypos, 64);
|
||||
__ lvx (vKey2, keypos, key);
|
||||
__ vec_perm (vKey1, vKey2, vTmp1, keyPerm);
|
||||
|
||||
// load the 4th round key to vKey10
|
||||
__ addi (keypos, keypos, -16);
|
||||
__ lvx (vTmp1, keypos, key);
|
||||
__ vperm (vKey2, vTmp2, vTmp1, keyPerm);
|
||||
// load the 4th round key to vKey2
|
||||
__ li (keypos, 48);
|
||||
__ lvx (vKey3, keypos, key);
|
||||
__ vec_perm (vKey2, vKey3, vKey2, keyPerm);
|
||||
|
||||
// load the 3rd round key to vKey10
|
||||
__ addi (keypos, keypos, -16);
|
||||
__ lvx (vTmp2, keypos, key);
|
||||
__ vperm (vKey3, vTmp1, vTmp2, keyPerm);
|
||||
// load the 3rd round key to vKey3
|
||||
__ li (keypos, 32);
|
||||
__ lvx (vKey4, keypos, key);
|
||||
__ vec_perm (vKey3, vKey4, vKey3, keyPerm);
|
||||
|
||||
// load the 2nd round key to vKey10
|
||||
__ addi (keypos, keypos, -16);
|
||||
__ lvx (vTmp1, keypos, key);
|
||||
__ vperm (vKey4, vTmp2, vTmp1, keyPerm);
|
||||
// load the 2nd round key to vKey4
|
||||
__ li (keypos, 16);
|
||||
__ lvx (vKey5, keypos, key);
|
||||
__ vec_perm (vKey4, vKey5, vKey4, keyPerm);
|
||||
|
||||
// load the 1st round key to vKey10
|
||||
__ addi (keypos, keypos, -16);
|
||||
__ lvx (vTmp2, keypos, key);
|
||||
__ vperm (vKey5, vTmp1, vTmp2, keyPerm);
|
||||
// load the 1st round key to vKey5
|
||||
__ lvx (vTmp1, key);
|
||||
__ vec_perm (vKey5, vTmp1, vKey5, keyPerm);
|
||||
|
||||
// last 5th - 1th rounds
|
||||
__ vncipher (vRet, vRet, vKey1);
|
||||
@ -3077,24 +3080,54 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ vncipher (vRet, vRet, vKey4);
|
||||
__ vncipherlast (vRet, vRet, vKey5);
|
||||
|
||||
__ neg (temp, to);
|
||||
__ lvsr (toPerm, temp);
|
||||
__ vspltisb (vTmp2, -1);
|
||||
__ vxor (vTmp1, vTmp1, vTmp1);
|
||||
__ vperm (vTmp2, vTmp2, vTmp1, toPerm);
|
||||
__ vxor (toPerm, toPerm, fSplt);
|
||||
// store result (unaligned)
|
||||
#ifdef VM_LITTLE_ENDIAN
|
||||
__ lvsl (toPerm, to);
|
||||
#else
|
||||
__ lvsr (toPerm, to);
|
||||
#endif
|
||||
__ vspltisb (vTmp3, -1);
|
||||
__ vspltisb (vTmp4, 0);
|
||||
__ lvx (vTmp1, to);
|
||||
__ vperm (vRet, vRet, vRet, toPerm);
|
||||
__ vsel (vTmp1, vTmp1, vRet, vTmp2);
|
||||
__ lvx (vTmp4, fifteen, to);
|
||||
__ lvx (vTmp2, fifteen, to);
|
||||
#ifdef VM_LITTLE_ENDIAN
|
||||
__ vperm (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask
|
||||
__ vxor (toPerm, toPerm, fSplt); // swap bytes
|
||||
#else
|
||||
__ vperm (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask
|
||||
#endif
|
||||
__ vperm (vTmp4, vRet, vRet, toPerm); // rotate data
|
||||
__ vsel (vTmp2, vTmp4, vTmp2, vTmp3);
|
||||
__ vsel (vTmp1, vTmp1, vTmp4, vTmp3);
|
||||
__ stvx (vTmp2, fifteen, to); // store this one first (may alias)
|
||||
__ stvx (vTmp1, to);
|
||||
__ vsel (vRet, vRet, vTmp4, vTmp2);
|
||||
__ stvx (vRet, fifteen, to);
|
||||
|
||||
__ blr();
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_sha256_implCompress(bool multi_block, const char *name) {
|
||||
assert(UseSHA, "need SHA instructions");
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ function_entry();
|
||||
|
||||
__ sha256 (multi_block);
|
||||
|
||||
__ blr();
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_sha512_implCompress(bool multi_block, const char *name) {
|
||||
assert(UseSHA, "need SHA instructions");
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ function_entry();
|
||||
|
||||
__ sha512 (multi_block);
|
||||
|
||||
__ blr();
|
||||
return start;
|
||||
}
|
||||
|
||||
void generate_arraycopy_stubs() {
|
||||
// Note: the disjoint stubs must be generated first, some of
|
||||
// the conjoint stubs use them.
|
||||
@ -3306,6 +3339,267 @@ class StubGenerator: public StubCodeGenerator {
|
||||
BLOCK_COMMENT("} Stub body");
|
||||
}
|
||||
|
||||
/**
|
||||
* Arguments:
|
||||
*
|
||||
* Input:
|
||||
* R3_ARG1 - out address
|
||||
* R4_ARG2 - in address
|
||||
* R5_ARG3 - offset
|
||||
* R6_ARG4 - len
|
||||
* R7_ARG5 - k
|
||||
* Output:
|
||||
* R3_RET - carry
|
||||
*/
|
||||
address generate_mulAdd() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "mulAdd");
|
||||
|
||||
address start = __ function_entry();
|
||||
|
||||
// C2 does not sign extend signed parameters to full 64 bits registers:
|
||||
__ rldic (R5_ARG3, R5_ARG3, 2, 32); // always positive
|
||||
__ clrldi(R6_ARG4, R6_ARG4, 32); // force zero bits on higher word
|
||||
__ clrldi(R7_ARG5, R7_ARG5, 32); // force zero bits on higher word
|
||||
|
||||
__ muladd(R3_ARG1, R4_ARG2, R5_ARG3, R6_ARG4, R7_ARG5, R8, R9, R10);
|
||||
|
||||
// Moves output carry to return register
|
||||
__ mr (R3_RET, R10);
|
||||
|
||||
__ blr();
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
/**
|
||||
* Arguments:
|
||||
*
|
||||
* Input:
|
||||
* R3_ARG1 - in address
|
||||
* R4_ARG2 - in length
|
||||
* R5_ARG3 - out address
|
||||
* R6_ARG4 - out length
|
||||
*/
|
||||
address generate_squareToLen() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "squareToLen");
|
||||
|
||||
address start = __ function_entry();
|
||||
|
||||
// args - higher word is cleaned (unsignedly) due to int to long casting
|
||||
const Register in = R3_ARG1;
|
||||
const Register in_len = R4_ARG2;
|
||||
__ clrldi(in_len, in_len, 32);
|
||||
const Register out = R5_ARG3;
|
||||
const Register out_len = R6_ARG4;
|
||||
__ clrldi(out_len, out_len, 32);
|
||||
|
||||
// output
|
||||
const Register ret = R3_RET;
|
||||
|
||||
// temporaries
|
||||
const Register lplw_s = R7;
|
||||
const Register in_aux = R8;
|
||||
const Register out_aux = R9;
|
||||
const Register piece = R10;
|
||||
const Register product = R14;
|
||||
const Register lplw = R15;
|
||||
const Register i_minus1 = R16;
|
||||
const Register carry = R17;
|
||||
const Register offset = R18;
|
||||
const Register off_aux = R19;
|
||||
const Register t = R20;
|
||||
const Register mlen = R21;
|
||||
const Register len = R22;
|
||||
const Register a = R23;
|
||||
const Register b = R24;
|
||||
const Register i = R25;
|
||||
const Register c = R26;
|
||||
const Register cs = R27;
|
||||
|
||||
// Labels
|
||||
Label SKIP_LSHIFT, SKIP_DIAGONAL_SUM, SKIP_ADDONE, SKIP_MULADD, SKIP_LOOP_SQUARE;
|
||||
Label LOOP_LSHIFT, LOOP_DIAGONAL_SUM, LOOP_ADDONE, LOOP_MULADD, LOOP_SQUARE;
|
||||
|
||||
// Save non-volatile regs (frameless).
|
||||
int current_offs = -8;
|
||||
__ std(R28, current_offs, R1_SP); current_offs -= 8;
|
||||
__ std(R27, current_offs, R1_SP); current_offs -= 8;
|
||||
__ std(R26, current_offs, R1_SP); current_offs -= 8;
|
||||
__ std(R25, current_offs, R1_SP); current_offs -= 8;
|
||||
__ std(R24, current_offs, R1_SP); current_offs -= 8;
|
||||
__ std(R23, current_offs, R1_SP); current_offs -= 8;
|
||||
__ std(R22, current_offs, R1_SP); current_offs -= 8;
|
||||
__ std(R21, current_offs, R1_SP); current_offs -= 8;
|
||||
__ std(R20, current_offs, R1_SP); current_offs -= 8;
|
||||
__ std(R19, current_offs, R1_SP); current_offs -= 8;
|
||||
__ std(R18, current_offs, R1_SP); current_offs -= 8;
|
||||
__ std(R17, current_offs, R1_SP); current_offs -= 8;
|
||||
__ std(R16, current_offs, R1_SP); current_offs -= 8;
|
||||
__ std(R15, current_offs, R1_SP); current_offs -= 8;
|
||||
__ std(R14, current_offs, R1_SP);
|
||||
|
||||
// Store the squares, right shifted one bit (i.e., divided by 2)
|
||||
__ subi (out_aux, out, 8);
|
||||
__ subi (in_aux, in, 4);
|
||||
__ cmpwi (CCR0, in_len, 0);
|
||||
// Initialize lplw outside of the loop
|
||||
__ xorr (lplw, lplw, lplw);
|
||||
__ ble (CCR0, SKIP_LOOP_SQUARE); // in_len <= 0
|
||||
__ mtctr (in_len);
|
||||
|
||||
__ bind(LOOP_SQUARE);
|
||||
__ lwzu (piece, 4, in_aux);
|
||||
__ mulld (product, piece, piece);
|
||||
// shift left 63 bits and only keep the MSB
|
||||
__ rldic (lplw_s, lplw, 63, 0);
|
||||
__ mr (lplw, product);
|
||||
// shift right 1 bit without sign extension
|
||||
__ srdi (product, product, 1);
|
||||
// join them to the same register and store it
|
||||
__ orr (product, lplw_s, product);
|
||||
#ifdef VM_LITTLE_ENDIAN
|
||||
// Swap low and high words for little endian
|
||||
__ rldicl (product, product, 32, 0);
|
||||
#endif
|
||||
__ stdu (product, 8, out_aux);
|
||||
__ bdnz (LOOP_SQUARE);
|
||||
|
||||
__ bind(SKIP_LOOP_SQUARE);
|
||||
|
||||
// Add in off-diagonal sums
|
||||
__ cmpwi (CCR0, in_len, 0);
|
||||
__ ble (CCR0, SKIP_DIAGONAL_SUM);
|
||||
// Avoid CTR usage here in order to use it at mulAdd
|
||||
__ subi (i_minus1, in_len, 1);
|
||||
__ li (offset, 4);
|
||||
|
||||
__ bind(LOOP_DIAGONAL_SUM);
|
||||
|
||||
__ sldi (off_aux, out_len, 2);
|
||||
__ sub (off_aux, off_aux, offset);
|
||||
|
||||
__ mr (len, i_minus1);
|
||||
__ sldi (mlen, i_minus1, 2);
|
||||
__ lwzx (t, in, mlen);
|
||||
|
||||
__ muladd (out, in, off_aux, len, t, a, b, carry);
|
||||
|
||||
// begin<addOne>
|
||||
// off_aux = out_len*4 - 4 - mlen - offset*4 - 4;
|
||||
__ addi (mlen, mlen, 4);
|
||||
__ sldi (a, out_len, 2);
|
||||
__ subi (a, a, 4);
|
||||
__ sub (a, a, mlen);
|
||||
__ subi (off_aux, offset, 4);
|
||||
__ sub (off_aux, a, off_aux);
|
||||
|
||||
__ lwzx (b, off_aux, out);
|
||||
__ add (b, b, carry);
|
||||
__ stwx (b, off_aux, out);
|
||||
|
||||
// if (((uint64_t)s >> 32) != 0) {
|
||||
__ srdi_ (a, b, 32);
|
||||
__ beq (CCR0, SKIP_ADDONE);
|
||||
|
||||
// while (--mlen >= 0) {
|
||||
__ bind(LOOP_ADDONE);
|
||||
__ subi (mlen, mlen, 4);
|
||||
__ cmpwi (CCR0, mlen, 0);
|
||||
__ beq (CCR0, SKIP_ADDONE);
|
||||
|
||||
// if (--offset_aux < 0) { // Carry out of number
|
||||
__ subi (off_aux, off_aux, 4);
|
||||
__ cmpwi (CCR0, off_aux, 0);
|
||||
__ blt (CCR0, SKIP_ADDONE);
|
||||
|
||||
// } else {
|
||||
__ lwzx (b, off_aux, out);
|
||||
__ addi (b, b, 1);
|
||||
__ stwx (b, off_aux, out);
|
||||
__ cmpwi (CCR0, b, 0);
|
||||
__ bne (CCR0, SKIP_ADDONE);
|
||||
__ b (LOOP_ADDONE);
|
||||
|
||||
__ bind(SKIP_ADDONE);
|
||||
// } } } end<addOne>
|
||||
|
||||
__ addi (offset, offset, 8);
|
||||
__ subi (i_minus1, i_minus1, 1);
|
||||
__ cmpwi (CCR0, i_minus1, 0);
|
||||
__ bge (CCR0, LOOP_DIAGONAL_SUM);
|
||||
|
||||
__ bind(SKIP_DIAGONAL_SUM);
|
||||
|
||||
// Shift back up and set low bit
|
||||
// Shifts 1 bit left up to len positions. Assumes no leading zeros
|
||||
// begin<primitiveLeftShift>
|
||||
__ cmpwi (CCR0, out_len, 0);
|
||||
__ ble (CCR0, SKIP_LSHIFT);
|
||||
__ li (i, 0);
|
||||
__ lwz (c, 0, out);
|
||||
__ subi (b, out_len, 1);
|
||||
__ mtctr (b);
|
||||
|
||||
__ bind(LOOP_LSHIFT);
|
||||
__ mr (b, c);
|
||||
__ addi (cs, i, 4);
|
||||
__ lwzx (c, out, cs);
|
||||
|
||||
__ sldi (b, b, 1);
|
||||
__ srwi (cs, c, 31);
|
||||
__ orr (b, b, cs);
|
||||
__ stwx (b, i, out);
|
||||
|
||||
__ addi (i, i, 4);
|
||||
__ bdnz (LOOP_LSHIFT);
|
||||
|
||||
__ sldi (c, out_len, 2);
|
||||
__ subi (c, c, 4);
|
||||
__ lwzx (b, out, c);
|
||||
__ sldi (b, b, 1);
|
||||
__ stwx (b, out, c);
|
||||
|
||||
__ bind(SKIP_LSHIFT);
|
||||
// end<primitiveLeftShift>
|
||||
|
||||
// Set low bit
|
||||
__ sldi (i, in_len, 2);
|
||||
__ subi (i, i, 4);
|
||||
__ lwzx (i, in, i);
|
||||
__ sldi (c, out_len, 2);
|
||||
__ subi (c, c, 4);
|
||||
__ lwzx (b, out, c);
|
||||
|
||||
__ andi (i, i, 1);
|
||||
__ orr (i, b, i);
|
||||
|
||||
__ stwx (i, out, c);
|
||||
|
||||
// Restore non-volatile regs.
|
||||
current_offs = -8;
|
||||
__ ld(R28, current_offs, R1_SP); current_offs -= 8;
|
||||
__ ld(R27, current_offs, R1_SP); current_offs -= 8;
|
||||
__ ld(R26, current_offs, R1_SP); current_offs -= 8;
|
||||
__ ld(R25, current_offs, R1_SP); current_offs -= 8;
|
||||
__ ld(R24, current_offs, R1_SP); current_offs -= 8;
|
||||
__ ld(R23, current_offs, R1_SP); current_offs -= 8;
|
||||
__ ld(R22, current_offs, R1_SP); current_offs -= 8;
|
||||
__ ld(R21, current_offs, R1_SP); current_offs -= 8;
|
||||
__ ld(R20, current_offs, R1_SP); current_offs -= 8;
|
||||
__ ld(R19, current_offs, R1_SP); current_offs -= 8;
|
||||
__ ld(R18, current_offs, R1_SP); current_offs -= 8;
|
||||
__ ld(R17, current_offs, R1_SP); current_offs -= 8;
|
||||
__ ld(R16, current_offs, R1_SP); current_offs -= 8;
|
||||
__ ld(R15, current_offs, R1_SP); current_offs -= 8;
|
||||
__ ld(R14, current_offs, R1_SP);
|
||||
|
||||
__ mr(ret, out);
|
||||
__ blr();
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
/**
|
||||
* Arguments:
|
||||
@ -3500,6 +3794,12 @@ class StubGenerator: public StubCodeGenerator {
|
||||
}
|
||||
#endif
|
||||
|
||||
if (UseSquareToLenIntrinsic) {
|
||||
StubRoutines::_squareToLen = generate_squareToLen();
|
||||
}
|
||||
if (UseMulAddIntrinsic) {
|
||||
StubRoutines::_mulAdd = generate_mulAdd();
|
||||
}
|
||||
if (UseMontgomeryMultiplyIntrinsic) {
|
||||
StubRoutines::_montgomeryMultiply
|
||||
= CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
|
||||
@ -3514,6 +3814,14 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
|
||||
}
|
||||
|
||||
if (UseSHA256Intrinsics) {
|
||||
StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
|
||||
StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
|
||||
}
|
||||
if (UseSHA512Intrinsics) {
|
||||
StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress");
|
||||
StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -34,7 +34,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _call_
|
||||
|
||||
enum platform_dependent_constants {
|
||||
code_size1 = 20000, // simply increase if too small (assembler will crash if too small)
|
||||
code_size2 = 20000 // simply increase if too small (assembler will crash if too small)
|
||||
code_size2 = 24000 // simply increase if too small (assembler will crash if too small)
|
||||
};
|
||||
|
||||
// CRC32 Intrinsics.
|
||||
|
@ -1470,10 +1470,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
|
||||
// TODO PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
|
||||
__ stw(R0, thread_(thread_state));
|
||||
|
||||
if (UseMembar) {
|
||||
__ fence();
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Call the native method. Argument registers must not have been
|
||||
// overwritten since "__ call_stub(signature_handler);" (except for
|
||||
@ -1594,9 +1590,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
|
||||
__ li(R0/*thread_state*/, _thread_in_Java);
|
||||
__ release();
|
||||
__ stw(R0/*thread_state*/, thread_(thread_state));
|
||||
if (UseMembar) {
|
||||
__ fence();
|
||||
}
|
||||
|
||||
if (CheckJNICalls) {
|
||||
// clear_pending_jni_exception_check
|
||||
|
@ -2224,6 +2224,7 @@ void TemplateTable::load_field_cp_cache_entry(Register Robj,
|
||||
if (is_static) {
|
||||
__ ld(Robj, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::f1_offset()), Rcache);
|
||||
__ ld(Robj, in_bytes(Klass::java_mirror_offset()), Robj);
|
||||
__ resolve_oop_handle(Robj);
|
||||
// Acquire not needed here. Following access has an address dependency on this value.
|
||||
}
|
||||
}
|
||||
|
@ -107,13 +107,23 @@ void VM_Version::initialize() {
|
||||
// TODO: PPC port PdScheduling::power6SectorSize = 0x20;
|
||||
}
|
||||
|
||||
MaxVectorSize = 8;
|
||||
if (PowerArchitecturePPC64 >= 8) {
|
||||
if (FLAG_IS_DEFAULT(SuperwordUseVSX)) {
|
||||
FLAG_SET_ERGO(bool, SuperwordUseVSX, true);
|
||||
}
|
||||
} else {
|
||||
if (SuperwordUseVSX) {
|
||||
warning("SuperwordUseVSX specified, but needs at least Power8.");
|
||||
FLAG_SET_DEFAULT(SuperwordUseVSX, false);
|
||||
}
|
||||
}
|
||||
MaxVectorSize = SuperwordUseVSX ? 16 : 8;
|
||||
#endif
|
||||
|
||||
// Create and print feature-string.
|
||||
char buf[(num_features+1) * 16]; // Max 16 chars per feature.
|
||||
jio_snprintf(buf, sizeof(buf),
|
||||
"ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
"ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
(has_fsqrt() ? " fsqrt" : ""),
|
||||
(has_isel() ? " isel" : ""),
|
||||
(has_lxarxeh() ? " lxarxeh" : ""),
|
||||
@ -130,7 +140,8 @@ void VM_Version::initialize() {
|
||||
(has_mfdscr() ? " mfdscr" : ""),
|
||||
(has_vsx() ? " vsx" : ""),
|
||||
(has_ldbrx() ? " ldbrx" : ""),
|
||||
(has_stdbrx() ? " stdbrx" : "")
|
||||
(has_stdbrx() ? " stdbrx" : ""),
|
||||
(has_vshasig() ? " sha" : "")
|
||||
// Make sure number of %s matches num_features!
|
||||
);
|
||||
_features_string = os::strdup(buf);
|
||||
@ -200,7 +211,6 @@ void VM_Version::initialize() {
|
||||
}
|
||||
|
||||
// The AES intrinsic stubs require AES instruction support.
|
||||
#if defined(VM_LITTLE_ENDIAN)
|
||||
if (has_vcipher()) {
|
||||
if (FLAG_IS_DEFAULT(UseAES)) {
|
||||
UseAES = true;
|
||||
@ -221,18 +231,6 @@ void VM_Version::initialize() {
|
||||
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
|
||||
}
|
||||
|
||||
#else
|
||||
if (UseAES) {
|
||||
warning("AES instructions are not available on this CPU");
|
||||
FLAG_SET_DEFAULT(UseAES, false);
|
||||
}
|
||||
if (UseAESIntrinsics) {
|
||||
if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
|
||||
warning("AES intrinsics are not available on this CPU");
|
||||
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (UseAESCTRIntrinsics) {
|
||||
warning("AES/CTR intrinsics are not available on this CPU");
|
||||
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
|
||||
@ -247,17 +245,49 @@ void VM_Version::initialize() {
|
||||
FLAG_SET_DEFAULT(UseFMA, true);
|
||||
}
|
||||
|
||||
if (UseSHA) {
|
||||
warning("SHA instructions are not available on this CPU");
|
||||
if (has_vshasig()) {
|
||||
if (FLAG_IS_DEFAULT(UseSHA)) {
|
||||
UseSHA = true;
|
||||
}
|
||||
} else if (UseSHA) {
|
||||
if (!FLAG_IS_DEFAULT(UseSHA))
|
||||
warning("SHA instructions are not available on this CPU");
|
||||
FLAG_SET_DEFAULT(UseSHA, false);
|
||||
}
|
||||
if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
|
||||
warning("SHA intrinsics are not available on this CPU");
|
||||
|
||||
if (UseSHA1Intrinsics) {
|
||||
warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
|
||||
FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
|
||||
}
|
||||
|
||||
if (UseSHA && has_vshasig()) {
|
||||
if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
|
||||
FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
|
||||
}
|
||||
} else if (UseSHA256Intrinsics) {
|
||||
warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
|
||||
FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
|
||||
}
|
||||
|
||||
if (UseSHA && has_vshasig()) {
|
||||
if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
|
||||
FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
|
||||
}
|
||||
} else if (UseSHA512Intrinsics) {
|
||||
warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
|
||||
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
|
||||
}
|
||||
|
||||
if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
|
||||
FLAG_SET_DEFAULT(UseSHA, false);
|
||||
}
|
||||
|
||||
if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
|
||||
UseSquareToLenIntrinsic = true;
|
||||
}
|
||||
if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
|
||||
UseMulAddIntrinsic = true;
|
||||
}
|
||||
if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
|
||||
UseMultiplyToLenIntrinsic = true;
|
||||
}
|
||||
@ -657,6 +687,7 @@ void VM_Version::determine_features() {
|
||||
a->lxvd2x(VSR0, R3_ARG1); // code[14] -> vsx
|
||||
a->ldbrx(R7, R3_ARG1, R4_ARG2); // code[15] -> ldbrx
|
||||
a->stdbrx(R7, R3_ARG1, R4_ARG2); // code[16] -> stdbrx
|
||||
a->vshasigmaw(VR0, VR1, 1, 0xF); // code[17] -> vshasig
|
||||
a->blr();
|
||||
|
||||
// Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
|
||||
@ -708,6 +739,7 @@ void VM_Version::determine_features() {
|
||||
if (code[feature_cntr++]) features |= vsx_m;
|
||||
if (code[feature_cntr++]) features |= ldbrx_m;
|
||||
if (code[feature_cntr++]) features |= stdbrx_m;
|
||||
if (code[feature_cntr++]) features |= vshasig_m;
|
||||
|
||||
// Print the detection code.
|
||||
if (PrintAssembly) {
|
||||
|
@ -49,6 +49,7 @@ protected:
|
||||
vsx,
|
||||
ldbrx,
|
||||
stdbrx,
|
||||
vshasig,
|
||||
num_features // last entry to count features
|
||||
};
|
||||
enum Feature_Flag_Set {
|
||||
@ -64,6 +65,7 @@ protected:
|
||||
vand_m = (1 << vand ),
|
||||
lqarx_m = (1 << lqarx ),
|
||||
vcipher_m = (1 << vcipher),
|
||||
vshasig_m = (1 << vshasig),
|
||||
vpmsumb_m = (1 << vpmsumb),
|
||||
tcheck_m = (1 << tcheck ),
|
||||
mfdscr_m = (1 << mfdscr ),
|
||||
@ -106,6 +108,7 @@ public:
|
||||
static bool has_vsx() { return (_features & vsx_m) != 0; }
|
||||
static bool has_ldbrx() { return (_features & ldbrx_m) != 0; }
|
||||
static bool has_stdbrx() { return (_features & stdbrx_m) != 0; }
|
||||
static bool has_vshasig() { return (_features & vshasig_m) != 0; }
|
||||
static bool has_mtfprd() { return has_vpmsumb(); } // alias for P8
|
||||
|
||||
// Assembler testing
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -250,7 +250,6 @@ class Address VALUE_OBJ_CLASS_SPEC {
|
||||
bool is_RSform() { return has_base() && !has_index() && is_disp12(); }
|
||||
bool is_RSYform() { return has_base() && !has_index() && is_disp20(); }
|
||||
bool is_RXform() { return has_base() && has_index() && is_disp12(); }
|
||||
bool is_RXEform() { return has_base() && has_index() && is_disp12(); }
|
||||
bool is_RXYform() { return has_base() && has_index() && is_disp20(); }
|
||||
|
||||
bool uses(Register r) { return _base == r || _index == r; };
|
||||
@ -1093,7 +1092,201 @@ class Assembler : public AbstractAssembler {
|
||||
#define TRTT_ZOPC (unsigned int)(0xb9 << 24 | 0x90 << 16)
|
||||
|
||||
|
||||
// Miscellaneous Operations
|
||||
//---------------------------
|
||||
//-- Vector Instructions --
|
||||
//---------------------------
|
||||
|
||||
//---< Vector Support Instructions >---
|
||||
|
||||
//--- Load (memory) ---
|
||||
|
||||
#define VLM_ZOPC (unsigned long)(0xe7L << 40 | 0x36L << 0) // load full vreg range (n * 128 bit)
|
||||
#define VL_ZOPC (unsigned long)(0xe7L << 40 | 0x06L << 0) // load full vreg (128 bit)
|
||||
#define VLEB_ZOPC (unsigned long)(0xe7L << 40 | 0x00L << 0) // load vreg element (8 bit)
|
||||
#define VLEH_ZOPC (unsigned long)(0xe7L << 40 | 0x01L << 0) // load vreg element (16 bit)
|
||||
#define VLEF_ZOPC (unsigned long)(0xe7L << 40 | 0x03L << 0) // load vreg element (32 bit)
|
||||
#define VLEG_ZOPC (unsigned long)(0xe7L << 40 | 0x02L << 0) // load vreg element (64 bit)
|
||||
|
||||
#define VLREP_ZOPC (unsigned long)(0xe7L << 40 | 0x05L << 0) // load and replicate into all vector elements
|
||||
#define VLLEZ_ZOPC (unsigned long)(0xe7L << 40 | 0x04L << 0) // load logical element and zero.
|
||||
|
||||
// vector register gather
|
||||
#define VGEF_ZOPC (unsigned long)(0xe7L << 40 | 0x13L << 0) // gather element (32 bit), V1(M3) = [D2(V2(M3),B2)]
|
||||
#define VGEG_ZOPC (unsigned long)(0xe7L << 40 | 0x12L << 0) // gather element (64 bit), V1(M3) = [D2(V2(M3),B2)]
|
||||
// vector register scatter
|
||||
#define VSCEF_ZOPC (unsigned long)(0xe7L << 40 | 0x1bL << 0) // vector scatter element FW
|
||||
#define VSCEG_ZOPC (unsigned long)(0xe7L << 40 | 0x1aL << 0) // vector scatter element DW
|
||||
|
||||
#define VLBB_ZOPC (unsigned long)(0xe7L << 40 | 0x07L << 0) // load vreg to block boundary (load to alignment).
|
||||
#define VLL_ZOPC (unsigned long)(0xe7L << 40 | 0x37L << 0) // load vreg with length.
|
||||
|
||||
//--- Load (register) ---
|
||||
|
||||
#define VLR_ZOPC (unsigned long)(0xe7L << 40 | 0x56L << 0) // copy full vreg (128 bit)
|
||||
#define VLGV_ZOPC (unsigned long)(0xe7L << 40 | 0x21L << 0) // copy vreg element -> GR
|
||||
#define VLVG_ZOPC (unsigned long)(0xe7L << 40 | 0x22L << 0) // copy GR -> vreg element
|
||||
#define VLVGP_ZOPC (unsigned long)(0xe7L << 40 | 0x62L << 0) // copy GR2, GR3 (disjoint pair) -> vreg
|
||||
|
||||
// vector register pack: cut in half the size the source vector elements
|
||||
#define VPK_ZOPC (unsigned long)(0xe7L << 40 | 0x94L << 0) // just cut
|
||||
#define VPKS_ZOPC (unsigned long)(0xe7L << 40 | 0x97L << 0) // saturate as signed values
|
||||
#define VPKLS_ZOPC (unsigned long)(0xe7L << 40 | 0x95L << 0) // saturate as unsigned values
|
||||
|
||||
// vector register unpack: double in size the source vector elements
|
||||
#define VUPH_ZOPC (unsigned long)(0xe7L << 40 | 0xd7L << 0) // signed, left half of the source vector elements
|
||||
#define VUPLH_ZOPC (unsigned long)(0xe7L << 40 | 0xd5L << 0) // unsigned, left half of the source vector elements
|
||||
#define VUPL_ZOPC (unsigned long)(0xe7L << 40 | 0xd6L << 0) // signed, right half of the source vector elements
|
||||
#define VUPLL_ZOPC (unsigned long)(0xe7L << 40 | 0xd4L << 0) // unsigned, right half of the source vector element
|
||||
|
||||
// vector register merge
|
||||
#define VMRH_ZOPC (unsigned long)(0xe7L << 40 | 0x61L << 0) // register merge high (left half of source registers)
|
||||
#define VMRL_ZOPC (unsigned long)(0xe7L << 40 | 0x60L << 0) // register merge low (right half of source registers)
|
||||
|
||||
// vector register permute
|
||||
#define VPERM_ZOPC (unsigned long)(0xe7L << 40 | 0x8cL << 0) // vector permute
|
||||
#define VPDI_ZOPC (unsigned long)(0xe7L << 40 | 0x84L << 0) // vector permute DW immediate
|
||||
|
||||
// vector register replicate
|
||||
#define VREP_ZOPC (unsigned long)(0xe7L << 40 | 0x4dL << 0) // vector replicate
|
||||
#define VREPI_ZOPC (unsigned long)(0xe7L << 40 | 0x45L << 0) // vector replicate immediate
|
||||
#define VSEL_ZOPC (unsigned long)(0xe7L << 40 | 0x8dL << 0) // vector select
|
||||
|
||||
#define VSEG_ZOPC (unsigned long)(0xe7L << 40 | 0x5fL << 0) // vector sign-extend to DW (rightmost element in each DW).
|
||||
|
||||
//--- Load (immediate) ---
|
||||
|
||||
#define VLEIB_ZOPC (unsigned long)(0xe7L << 40 | 0x40L << 0) // load vreg element (16 bit imm to 8 bit)
|
||||
#define VLEIH_ZOPC (unsigned long)(0xe7L << 40 | 0x41L << 0) // load vreg element (16 bit imm to 16 bit)
|
||||
#define VLEIF_ZOPC (unsigned long)(0xe7L << 40 | 0x43L << 0) // load vreg element (16 bit imm to 32 bit)
|
||||
#define VLEIG_ZOPC (unsigned long)(0xe7L << 40 | 0x42L << 0) // load vreg element (16 bit imm to 64 bit)
|
||||
|
||||
//--- Store ---
|
||||
|
||||
#define VSTM_ZOPC (unsigned long)(0xe7L << 40 | 0x3eL << 0) // store full vreg range (n * 128 bit)
|
||||
#define VST_ZOPC (unsigned long)(0xe7L << 40 | 0x0eL << 0) // store full vreg (128 bit)
|
||||
#define VSTEB_ZOPC (unsigned long)(0xe7L << 40 | 0x08L << 0) // store vreg element (8 bit)
|
||||
#define VSTEH_ZOPC (unsigned long)(0xe7L << 40 | 0x09L << 0) // store vreg element (16 bit)
|
||||
#define VSTEF_ZOPC (unsigned long)(0xe7L << 40 | 0x0bL << 0) // store vreg element (32 bit)
|
||||
#define VSTEG_ZOPC (unsigned long)(0xe7L << 40 | 0x0aL << 0) // store vreg element (64 bit)
|
||||
#define VSTL_ZOPC (unsigned long)(0xe7L << 40 | 0x3fL << 0) // store vreg with length.
|
||||
|
||||
//--- Misc ---
|
||||
|
||||
#define VGM_ZOPC (unsigned long)(0xe7L << 40 | 0x46L << 0) // generate bit mask, [start..end] = '1', else '0'
|
||||
#define VGBM_ZOPC (unsigned long)(0xe7L << 40 | 0x44L << 0) // generate byte mask, bits(imm16) -> bytes
|
||||
|
||||
//---< Vector Arithmetic Instructions >---
|
||||
|
||||
// Load
|
||||
#define VLC_ZOPC (unsigned long)(0xe7L << 40 | 0xdeL << 0) // V1 := -V2, element size = 2**m
|
||||
#define VLP_ZOPC (unsigned long)(0xe7L << 40 | 0xdfL << 0) // V1 := |V2|, element size = 2**m
|
||||
|
||||
// ADD
|
||||
#define VA_ZOPC (unsigned long)(0xe7L << 40 | 0xf3L << 0) // V1 := V2 + V3, element size = 2**m
|
||||
#define VACC_ZOPC (unsigned long)(0xe7L << 40 | 0xf1L << 0) // V1 := carry(V2 + V3), element size = 2**m
|
||||
|
||||
// SUB
|
||||
#define VS_ZOPC (unsigned long)(0xe7L << 40 | 0xf7L << 0) // V1 := V2 - V3, element size = 2**m
|
||||
#define VSCBI_ZOPC (unsigned long)(0xe7L << 40 | 0xf5L << 0) // V1 := borrow(V2 - V3), element size = 2**m
|
||||
|
||||
// MUL
|
||||
#define VML_ZOPC (unsigned long)(0xe7L << 40 | 0xa2L << 0) // V1 := V2 * V3, element size = 2**m
|
||||
#define VMH_ZOPC (unsigned long)(0xe7L << 40 | 0xa3L << 0) // V1 := V2 * V3, element size = 2**m
|
||||
#define VMLH_ZOPC (unsigned long)(0xe7L << 40 | 0xa1L << 0) // V1 := V2 * V3, element size = 2**m, unsigned
|
||||
#define VME_ZOPC (unsigned long)(0xe7L << 40 | 0xa6L << 0) // V1 := V2 * V3, element size = 2**m
|
||||
#define VMLE_ZOPC (unsigned long)(0xe7L << 40 | 0xa4L << 0) // V1 := V2 * V3, element size = 2**m, unsigned
|
||||
#define VMO_ZOPC (unsigned long)(0xe7L << 40 | 0xa7L << 0) // V1 := V2 * V3, element size = 2**m
|
||||
#define VMLO_ZOPC (unsigned long)(0xe7L << 40 | 0xa5L << 0) // V1 := V2 * V3, element size = 2**m, unsigned
|
||||
|
||||
// MUL & ADD
|
||||
#define VMAL_ZOPC (unsigned long)(0xe7L << 40 | 0xaaL << 0) // V1 := V2 * V3 + V4, element size = 2**m
|
||||
#define VMAH_ZOPC (unsigned long)(0xe7L << 40 | 0xabL << 0) // V1 := V2 * V3 + V4, element size = 2**m
|
||||
#define VMALH_ZOPC (unsigned long)(0xe7L << 40 | 0xa9L << 0) // V1 := V2 * V3 + V4, element size = 2**m, unsigned
|
||||
#define VMAE_ZOPC (unsigned long)(0xe7L << 40 | 0xaeL << 0) // V1 := V2 * V3 + V4, element size = 2**m
|
||||
#define VMALE_ZOPC (unsigned long)(0xe7L << 40 | 0xacL << 0) // V1 := V2 * V3 + V4, element size = 2**m, unsigned
|
||||
#define VMAO_ZOPC (unsigned long)(0xe7L << 40 | 0xafL << 0) // V1 := V2 * V3 + V4, element size = 2**m
|
||||
#define VMALO_ZOPC (unsigned long)(0xe7L << 40 | 0xadL << 0) // V1 := V2 * V3 + V4, element size = 2**m, unsigned
|
||||
|
||||
// Vector SUM
|
||||
#define VSUM_ZOPC (unsigned long)(0xe7L << 40 | 0x64L << 0) // V1[j] := toFW(sum(V2[i]) + V3[j]), subelements: byte or HW
|
||||
#define VSUMG_ZOPC (unsigned long)(0xe7L << 40 | 0x65L << 0) // V1[j] := toDW(sum(V2[i]) + V3[j]), subelements: HW or FW
|
||||
#define VSUMQ_ZOPC (unsigned long)(0xe7L << 40 | 0x67L << 0) // V1[j] := toQW(sum(V2[i]) + V3[j]), subelements: FW or DW
|
||||
|
||||
// Average
|
||||
#define VAVG_ZOPC (unsigned long)(0xe7L << 40 | 0xf2L << 0) // V1 := (V2+V3+1)/2, signed, element size = 2**m
|
||||
#define VAVGL_ZOPC (unsigned long)(0xe7L << 40 | 0xf0L << 0) // V1 := (V2+V3+1)/2, unsigned, element size = 2**m
|
||||
|
||||
// VECTOR Galois Field Multiply Sum
|
||||
#define VGFM_ZOPC (unsigned long)(0xe7L << 40 | 0xb4L << 0)
|
||||
#define VGFMA_ZOPC (unsigned long)(0xe7L << 40 | 0xbcL << 0)
|
||||
|
||||
//---< Vector Logical Instructions >---
|
||||
|
||||
// AND
|
||||
#define VN_ZOPC (unsigned long)(0xe7L << 40 | 0x68L << 0) // V1 := V2 & V3, element size = 2**m
|
||||
#define VNC_ZOPC (unsigned long)(0xe7L << 40 | 0x69L << 0) // V1 := V2 & ~V3, element size = 2**m
|
||||
|
||||
// XOR
|
||||
#define VX_ZOPC (unsigned long)(0xe7L << 40 | 0x6dL << 0) // V1 := V2 ^ V3, element size = 2**m
|
||||
|
||||
// NOR
|
||||
#define VNO_ZOPC (unsigned long)(0xe7L << 40 | 0x6bL << 0) // V1 := !(V2 | V3), element size = 2**m
|
||||
|
||||
// OR
|
||||
#define VO_ZOPC (unsigned long)(0xe7L << 40 | 0x6aL << 0) // V1 := V2 | V3, element size = 2**m
|
||||
|
||||
// Comparison (element-wise)
|
||||
#define VCEQ_ZOPC (unsigned long)(0xe7L << 40 | 0xf8L << 0) // V1 := (V2 == V3) ? 0xffff : 0x0000, element size = 2**m
|
||||
#define VCH_ZOPC (unsigned long)(0xe7L << 40 | 0xfbL << 0) // V1 := (V2 > V3) ? 0xffff : 0x0000, element size = 2**m, signed
|
||||
#define VCHL_ZOPC (unsigned long)(0xe7L << 40 | 0xf9L << 0) // V1 := (V2 > V3) ? 0xffff : 0x0000, element size = 2**m, unsigned
|
||||
|
||||
// Max/Min (element-wise)
|
||||
#define VMX_ZOPC (unsigned long)(0xe7L << 40 | 0xffL << 0) // V1 := (V2 > V3) ? V2 : V3, element size = 2**m, signed
|
||||
#define VMXL_ZOPC (unsigned long)(0xe7L << 40 | 0xfdL << 0) // V1 := (V2 > V3) ? V2 : V3, element size = 2**m, unsigned
|
||||
#define VMN_ZOPC (unsigned long)(0xe7L << 40 | 0xfeL << 0) // V1 := (V2 < V3) ? V2 : V3, element size = 2**m, signed
|
||||
#define VMNL_ZOPC (unsigned long)(0xe7L << 40 | 0xfcL << 0) // V1 := (V2 < V3) ? V2 : V3, element size = 2**m, unsigned
|
||||
|
||||
// Leading/Trailing Zeros, population count
|
||||
#define VCLZ_ZOPC (unsigned long)(0xe7L << 40 | 0x53L << 0) // V1 := leadingzeros(V2), element size = 2**m
|
||||
#define VCTZ_ZOPC (unsigned long)(0xe7L << 40 | 0x52L << 0) // V1 := trailingzeros(V2), element size = 2**m
|
||||
#define VPOPCT_ZOPC (unsigned long)(0xe7L << 40 | 0x50L << 0) // V1 := popcount(V2), bytewise!!
|
||||
|
||||
// Rotate/Shift
|
||||
#define VERLLV_ZOPC (unsigned long)(0xe7L << 40 | 0x73L << 0) // V1 := rotateleft(V2), rotate count in V3 element
|
||||
#define VERLL_ZOPC (unsigned long)(0xe7L << 40 | 0x33L << 0) // V1 := rotateleft(V3), rotate count from d2(b2).
|
||||
#define VERIM_ZOPC (unsigned long)(0xe7L << 40 | 0x72L << 0) // Rotate then insert under mask. Read Principles of Operation!!
|
||||
|
||||
#define VESLV_ZOPC (unsigned long)(0xe7L << 40 | 0x70L << 0) // V1 := SLL(V2, V3), unsigned, element-wise
|
||||
#define VESL_ZOPC (unsigned long)(0xe7L << 40 | 0x30L << 0) // V1 := SLL(V3), unsigned, shift count from d2(b2).
|
||||
|
||||
#define VESRAV_ZOPC (unsigned long)(0xe7L << 40 | 0x7AL << 0) // V1 := SRA(V2, V3), signed, element-wise
|
||||
#define VESRA_ZOPC (unsigned long)(0xe7L << 40 | 0x3AL << 0) // V1 := SRA(V3), signed, shift count from d2(b2).
|
||||
#define VESRLV_ZOPC (unsigned long)(0xe7L << 40 | 0x78L << 0) // V1 := SRL(V2, V3), unsigned, element-wise
|
||||
#define VESRL_ZOPC (unsigned long)(0xe7L << 40 | 0x38L << 0) // V1 := SRL(V3), unsigned, shift count from d2(b2).
|
||||
|
||||
#define VSL_ZOPC (unsigned long)(0xe7L << 40 | 0x74L << 0) // V1 := SLL(V2), unsigned, bit-count
|
||||
#define VSLB_ZOPC (unsigned long)(0xe7L << 40 | 0x75L << 0) // V1 := SLL(V2), unsigned, byte-count
|
||||
#define VSLDB_ZOPC (unsigned long)(0xe7L << 40 | 0x77L << 0) // V1 := SLL((V2,V3)), unsigned, byte-count
|
||||
|
||||
#define VSRA_ZOPC (unsigned long)(0xe7L << 40 | 0x7eL << 0) // V1 := SRA(V2), signed, bit-count
|
||||
#define VSRAB_ZOPC (unsigned long)(0xe7L << 40 | 0x7fL << 0) // V1 := SRA(V2), signed, byte-count
|
||||
#define VSRL_ZOPC (unsigned long)(0xe7L << 40 | 0x7cL << 0) // V1 := SRL(V2), unsigned, bit-count
|
||||
#define VSRLB_ZOPC (unsigned long)(0xe7L << 40 | 0x7dL << 0) // V1 := SRL(V2), unsigned, byte-count
|
||||
|
||||
// Test under Mask
|
||||
#define VTM_ZOPC (unsigned long)(0xe7L << 40 | 0xd8L << 0) // Like TM, set CC according to state of selected bits.
|
||||
|
||||
//---< Vector String Instructions >---
|
||||
#define VFAE_ZOPC (unsigned long)(0xe7L << 40 | 0x82L << 0) // Find any element
|
||||
#define VFEE_ZOPC (unsigned long)(0xe7L << 40 | 0x80L << 0) // Find element equal
|
||||
#define VFENE_ZOPC (unsigned long)(0xe7L << 40 | 0x81L << 0) // Find element not equal
|
||||
#define VSTRC_ZOPC (unsigned long)(0xe7L << 40 | 0x8aL << 0) // String range compare
|
||||
#define VISTR_ZOPC (unsigned long)(0xe7L << 40 | 0x5cL << 0) // Isolate String
|
||||
|
||||
|
||||
//--------------------------------
|
||||
//-- Miscellaneous Operations --
|
||||
//--------------------------------
|
||||
|
||||
// Execute
|
||||
#define EX_ZOPC (unsigned int)(68L << 24)
|
||||
@ -1244,10 +1437,18 @@ class Assembler : public AbstractAssembler {
|
||||
// unsigned arithmetic calculation instructions
|
||||
// Mask bit#0 is not used by these instructions.
|
||||
// There is no indication of overflow for these instr.
|
||||
bcondLogZero = 2,
|
||||
bcondLogNotZero = 5,
|
||||
bcondLogZero_NoCarry = 8,
|
||||
bcondLogZero_Carry = 2,
|
||||
// bcondLogZero_Borrow = 8, // This CC is never generated.
|
||||
bcondLogZero_NoBorrow = 2,
|
||||
bcondLogZero = bcondLogZero_Carry | bcondLogZero_NoCarry,
|
||||
bcondLogNotZero_NoCarry = 4,
|
||||
bcondLogNotZero_Carry = 1,
|
||||
bcondLogNotZero_Borrow = 4,
|
||||
bcondLogNotZero_NoBorrow = 1,
|
||||
bcondLogNotZero = bcondLogNotZero_Carry | bcondLogNotZero_NoCarry,
|
||||
bcondLogCarry = bcondLogZero_Carry | bcondLogNotZero_Carry,
|
||||
bcondLogBorrow = /* bcondLogZero_Borrow | */ bcondLogNotZero_Borrow,
|
||||
// string search instructions
|
||||
bcondFound = 4,
|
||||
bcondNotFound = 2,
|
||||
@ -1280,6 +1481,29 @@ class Assembler : public AbstractAssembler {
|
||||
to_minus_infinity = 7
|
||||
};
|
||||
|
||||
// Vector Register Element Type.
|
||||
enum VRegElemType {
|
||||
VRET_BYTE = 0,
|
||||
VRET_HW = 1,
|
||||
VRET_FW = 2,
|
||||
VRET_DW = 3,
|
||||
VRET_QW = 4
|
||||
};
|
||||
|
||||
// Vector Operation Result Control.
|
||||
// This is a set of flags used in some vector instructions to control
|
||||
// the result (side) effects of instruction execution.
|
||||
enum VOpRC {
|
||||
VOPRC_CCSET = 0b0001, // set the CC.
|
||||
VOPRC_CCIGN = 0b0000, // ignore, don't set CC.
|
||||
VOPRC_ZS = 0b0010, // Zero Search. Additional, elementwise, comparison against zero.
|
||||
VOPRC_NOZS = 0b0000, // No Zero Search.
|
||||
VOPRC_RTBYTEIX = 0b0100, // generate byte index to lowest element with true comparison.
|
||||
VOPRC_RTBITVEC = 0b0000, // generate bit vector, all 1s for true, all 0s for false element comparisons.
|
||||
VOPRC_INVERT = 0b1000, // invert comparison results.
|
||||
VOPRC_NOINVERT = 0b0000 // use comparison results as is, do not invert.
|
||||
};
|
||||
|
||||
// Inverse condition code, i.e. determine "15 - cc" for a given condition code cc.
|
||||
static branch_condition inverse_condition(branch_condition cc);
|
||||
static branch_condition inverse_float_condition(branch_condition cc);
|
||||
@ -1376,6 +1600,65 @@ class Assembler : public AbstractAssembler {
|
||||
return r;
|
||||
}
|
||||
|
||||
static int64_t rsmask_48( Address a) { assert(a.is_RSform(), "bad address format"); return rsmask_48( a.disp12(), a.base()); }
|
||||
static int64_t rxmask_48( Address a) { if (a.is_RXform()) { return rxmask_48( a.disp12(), a.index(), a.base()); }
|
||||
else if (a.is_RSform()) { return rsmask_48( a.disp12(), a.base()); }
|
||||
else { guarantee(false, "bad address format"); return 0; }
|
||||
}
|
||||
static int64_t rsymask_48(Address a) { assert(a.is_RSYform(), "bad address format"); return rsymask_48(a.disp20(), a.base()); }
|
||||
static int64_t rxymask_48(Address a) { if (a.is_RXYform()) { return rxymask_48( a.disp20(), a.index(), a.base()); }
|
||||
else if (a.is_RSYform()) { return rsymask_48( a.disp20(), a.base()); }
|
||||
else { guarantee(false, "bad address format"); return 0; }
|
||||
}
|
||||
|
||||
static int64_t rsmask_48( int64_t d2, Register b2) { return uimm12(d2, 20, 48) | regz(b2, 16, 48); }
|
||||
static int64_t rxmask_48( int64_t d2, Register x2, Register b2) { return uimm12(d2, 20, 48) | reg(x2, 12, 48) | regz(b2, 16, 48); }
|
||||
static int64_t rsymask_48(int64_t d2, Register b2) { return simm20(d2) | regz(b2, 16, 48); }
|
||||
static int64_t rxymask_48(int64_t d2, Register x2, Register b2) { return simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48); }
|
||||
|
||||
// Address calculated from d12(vx,b) - vx is vector index register.
|
||||
static int64_t rvmask_48( int64_t d2, VectorRegister x2, Register b2) { return uimm12(d2, 20, 48) | vreg(x2, 12) | regz(b2, 16, 48); }
|
||||
|
||||
static int64_t vreg_mask(VectorRegister v, int pos) {
|
||||
return vreg(v, pos) | v->RXB_mask(pos);
|
||||
}
|
||||
|
||||
// Vector Element Size Control. 4-bit field which indicates the size of the vector elements.
|
||||
static int64_t vesc_mask(int64_t size, int min_size, int max_size, int pos) {
|
||||
// min_size - minimum element size. Not all instructions support element sizes beginning with "byte".
|
||||
// max_size - maximum element size. Not all instructions support element sizes up to "QW".
|
||||
assert((min_size <= size) && (size <= max_size), "element size control out of range");
|
||||
return uimm4(size, pos, 48);
|
||||
}
|
||||
|
||||
// Vector Element IndeX. 4-bit field which indexes the target vector element.
|
||||
static int64_t veix_mask(int64_t ix, int el_size, int pos) {
|
||||
// el_size - size of the vector element. This is a VRegElemType enum value.
|
||||
// ix - vector element index.
|
||||
int max_ix = -1;
|
||||
switch (el_size) {
|
||||
case VRET_BYTE: max_ix = 15; break;
|
||||
case VRET_HW: max_ix = 7; break;
|
||||
case VRET_FW: max_ix = 3; break;
|
||||
case VRET_DW: max_ix = 1; break;
|
||||
case VRET_QW: max_ix = 0; break;
|
||||
default: guarantee(false, "bad vector element size %d", el_size); break;
|
||||
}
|
||||
assert((0 <= ix) && (ix <= max_ix), "element size out of range (0 <= %ld <= %d)", ix, max_ix);
|
||||
return uimm4(ix, pos, 48);
|
||||
}
|
||||
|
||||
// Vector Operation Result Control. 4-bit field.
|
||||
static int64_t voprc_any(int64_t flags, int pos, int64_t allowed_flags = 0b1111) {
|
||||
assert((flags & allowed_flags) == flags, "Invalid VOPRC_* flag combination: %d", (int)flags);
|
||||
return uimm4(flags, pos, 48);
|
||||
}
|
||||
|
||||
// Vector Operation Result Control. Condition code setting.
|
||||
static int64_t voprc_ccmask(int64_t flags, int pos) {
|
||||
return voprc_any(flags, pos, VOPRC_CCIGN | VOPRC_CCSET);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
//--------------------------------------------------
|
||||
@ -1453,6 +1736,8 @@ class Assembler : public AbstractAssembler {
|
||||
static long imm24(int64_t i24, int s, int len) { return imm(i24, 24) << (len-s-24); }
|
||||
static long imm32(int64_t i32, int s, int len) { return imm(i32, 32) << (len-s-32); }
|
||||
|
||||
static long vreg(VectorRegister v, int pos) { const int len = 48; return u_field(v->encoding()&0x0f, (len-pos)-1, (len-pos)-4) | v->RXB_mask(pos); }
|
||||
|
||||
static long fregt(FloatRegister r, int s, int len) { return freg(r,s,len); }
|
||||
static long freg( FloatRegister r, int s, int len) { return u_field(r->encoding(), (len-s)-1, (len-s)-4); }
|
||||
|
||||
@ -1840,13 +2125,16 @@ class Assembler : public AbstractAssembler {
|
||||
inline void z_alsi( const Address& d, int64_t i2); // add logical *(d) += i2_imm8 ; uint32 -- z10
|
||||
inline void z_algsi(const Address& d, int64_t i2); // add logical *(d) += i2_imm8 ; uint64 -- z10
|
||||
|
||||
// negate
|
||||
// sign adjustment
|
||||
inline void z_lcr( Register r1, Register r2 = noreg); // neg r1 = -r2 ; int32
|
||||
inline void z_lcgr( Register r1, Register r2 = noreg); // neg r1 = -r2 ; int64
|
||||
inline void z_lcgfr(Register r1, Register r2); // neg r1 = -r2 ; int64 <- int32
|
||||
inline void z_lnr( Register r1, Register r2 = noreg); // neg r1 = -|r2| ; int32
|
||||
inline void z_lngr( Register r1, Register r2 = noreg); // neg r1 = -|r2| ; int64
|
||||
inline void z_lngfr(Register r1, Register r2); // neg r1 = -|r2| ; int64 <- int32
|
||||
inline void z_lpr( Register r1, Register r2 = noreg); // r1 = |r2| ; int32
|
||||
inline void z_lpgr( Register r1, Register r2 = noreg); // r1 = |r2| ; int64
|
||||
inline void z_lpgfr(Register r1, Register r2); // r1 = |r2| ; int64 <- int32
|
||||
|
||||
// subtract intstructions
|
||||
// sub registers
|
||||
@ -2125,6 +2413,422 @@ class Assembler : public AbstractAssembler {
|
||||
inline void z_trtt(Register r1, Register r2, int64_t m3);
|
||||
|
||||
|
||||
//---------------------------
|
||||
//-- Vector Instructions --
|
||||
//---------------------------
|
||||
|
||||
//---< Vector Support Instructions >---
|
||||
|
||||
// Load (transfer from memory)
|
||||
inline void z_vlm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
|
||||
inline void z_vl( VectorRegister v1, int64_t d2, Register x2, Register b2);
|
||||
inline void z_vleb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
|
||||
inline void z_vleh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
|
||||
inline void z_vlef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
|
||||
inline void z_vleg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
|
||||
|
||||
// Gather/Scatter
|
||||
inline void z_vgef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
|
||||
inline void z_vgeg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
|
||||
|
||||
inline void z_vscef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
|
||||
inline void z_vsceg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
|
||||
|
||||
// load and replicate
|
||||
inline void z_vlrep( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
|
||||
inline void z_vlrepb(VectorRegister v1, int64_t d2, Register x2, Register b2);
|
||||
inline void z_vlreph(VectorRegister v1, int64_t d2, Register x2, Register b2);
|
||||
inline void z_vlrepf(VectorRegister v1, int64_t d2, Register x2, Register b2);
|
||||
inline void z_vlrepg(VectorRegister v1, int64_t d2, Register x2, Register b2);
|
||||
|
||||
inline void z_vllez( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
|
||||
inline void z_vllezb(VectorRegister v1, int64_t d2, Register x2, Register b2);
|
||||
inline void z_vllezh(VectorRegister v1, int64_t d2, Register x2, Register b2);
|
||||
inline void z_vllezf(VectorRegister v1, int64_t d2, Register x2, Register b2);
|
||||
inline void z_vllezg(VectorRegister v1, int64_t d2, Register x2, Register b2);
|
||||
|
||||
inline void z_vlbb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
|
||||
inline void z_vll( VectorRegister v1, Register r3, int64_t d2, Register b2);
|
||||
|
||||
// Load (register to register)
|
||||
inline void z_vlr( VectorRegister v1, VectorRegister v2);
|
||||
|
||||
inline void z_vlgv( Register r1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
|
||||
inline void z_vlgvb( Register r1, VectorRegister v3, int64_t d2, Register b2);
|
||||
inline void z_vlgvh( Register r1, VectorRegister v3, int64_t d2, Register b2);
|
||||
inline void z_vlgvf( Register r1, VectorRegister v3, int64_t d2, Register b2);
|
||||
inline void z_vlgvg( Register r1, VectorRegister v3, int64_t d2, Register b2);
|
||||
|
||||
inline void z_vlvg( VectorRegister v1, Register r3, int64_t d2, Register b2, int64_t m4);
|
||||
inline void z_vlvgb( VectorRegister v1, Register r3, int64_t d2, Register b2);
|
||||
inline void z_vlvgh( VectorRegister v1, Register r3, int64_t d2, Register b2);
|
||||
inline void z_vlvgf( VectorRegister v1, Register r3, int64_t d2, Register b2);
|
||||
inline void z_vlvgg( VectorRegister v1, Register r3, int64_t d2, Register b2);
|
||||
|
||||
inline void z_vlvgp( VectorRegister v1, Register r2, Register r3);
|
||||
|
||||
// vector register pack
|
||||
inline void z_vpk( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vpkh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vpkf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vpkg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
|
||||
inline void z_vpks( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
|
||||
inline void z_vpksh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vpksf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vpksg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vpkshs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vpksfs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vpksgs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
|
||||
inline void z_vpkls( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
|
||||
inline void z_vpklsh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vpklsf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vpklsg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vpklshs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vpklsfs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vpklsgs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
|
||||
// vector register unpack (sign-extended)
|
||||
inline void z_vuph( VectorRegister v1, VectorRegister v2, int64_t m3);
|
||||
inline void z_vuphb( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vuphh( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vuphf( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vupl( VectorRegister v1, VectorRegister v2, int64_t m3);
|
||||
inline void z_vuplb( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vuplh( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vuplf( VectorRegister v1, VectorRegister v2);
|
||||
|
||||
// vector register unpack (zero-extended)
|
||||
inline void z_vuplh( VectorRegister v1, VectorRegister v2, int64_t m3);
|
||||
inline void z_vuplhb( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vuplhh( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vuplhf( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vupll( VectorRegister v1, VectorRegister v2, int64_t m3);
|
||||
inline void z_vupllb( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vupllh( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vupllf( VectorRegister v1, VectorRegister v2);
|
||||
|
||||
// vector register merge high/low
|
||||
inline void z_vmrh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vmrhb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vmrhh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vmrhf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vmrhg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
|
||||
inline void z_vmrl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vmrlb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vmrlh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vmrlf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vmrlg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
|
||||
// vector register permute
|
||||
inline void z_vperm( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
|
||||
inline void z_vpdi( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
|
||||
// vector register replicate
|
||||
inline void z_vrep( VectorRegister v1, VectorRegister v3, int64_t imm2, int64_t m4);
|
||||
inline void z_vrepb( VectorRegister v1, VectorRegister v3, int64_t imm2);
|
||||
inline void z_vreph( VectorRegister v1, VectorRegister v3, int64_t imm2);
|
||||
inline void z_vrepf( VectorRegister v1, VectorRegister v3, int64_t imm2);
|
||||
inline void z_vrepg( VectorRegister v1, VectorRegister v3, int64_t imm2);
|
||||
inline void z_vrepi( VectorRegister v1, int64_t imm2, int64_t m3);
|
||||
inline void z_vrepib(VectorRegister v1, int64_t imm2);
|
||||
inline void z_vrepih(VectorRegister v1, int64_t imm2);
|
||||
inline void z_vrepif(VectorRegister v1, int64_t imm2);
|
||||
inline void z_vrepig(VectorRegister v1, int64_t imm2);
|
||||
|
||||
inline void z_vsel( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
|
||||
inline void z_vseg( VectorRegister v1, VectorRegister v2, int64_t imm3);
|
||||
|
||||
// Load (immediate)
|
||||
inline void z_vleib( VectorRegister v1, int64_t imm2, int64_t m3);
|
||||
inline void z_vleih( VectorRegister v1, int64_t imm2, int64_t m3);
|
||||
inline void z_vleif( VectorRegister v1, int64_t imm2, int64_t m3);
|
||||
inline void z_vleig( VectorRegister v1, int64_t imm2, int64_t m3);
|
||||
|
||||
// Store
|
||||
inline void z_vstm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
|
||||
inline void z_vst( VectorRegister v1, int64_t d2, Register x2, Register b2);
|
||||
inline void z_vsteb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
|
||||
inline void z_vsteh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
|
||||
inline void z_vstef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
|
||||
inline void z_vsteg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
|
||||
inline void z_vstl( VectorRegister v1, Register r3, int64_t d2, Register b2);
|
||||
|
||||
// Misc
|
||||
inline void z_vgm( VectorRegister v1, int64_t imm2, int64_t imm3, int64_t m4);
|
||||
inline void z_vgmb( VectorRegister v1, int64_t imm2, int64_t imm3);
|
||||
inline void z_vgmh( VectorRegister v1, int64_t imm2, int64_t imm3);
|
||||
inline void z_vgmf( VectorRegister v1, int64_t imm2, int64_t imm3);
|
||||
inline void z_vgmg( VectorRegister v1, int64_t imm2, int64_t imm3);
|
||||
|
||||
inline void z_vgbm( VectorRegister v1, int64_t imm2);
|
||||
inline void z_vzero( VectorRegister v1); // preferred method to set vreg to all zeroes
|
||||
inline void z_vone( VectorRegister v1); // preferred method to set vreg to all ones
|
||||
|
||||
//---< Vector Arithmetic Instructions >---
|
||||
|
||||
// Load
|
||||
inline void z_vlc( VectorRegister v1, VectorRegister v2, int64_t m3);
|
||||
inline void z_vlcb( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vlch( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vlcf( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vlcg( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vlp( VectorRegister v1, VectorRegister v2, int64_t m3);
|
||||
inline void z_vlpb( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vlph( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vlpf( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vlpg( VectorRegister v1, VectorRegister v2);
|
||||
|
||||
// ADD
|
||||
inline void z_va( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vab( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vah( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vaf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vag( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vaq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vacc( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vaccb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vacch( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vaccf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vaccg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vaccq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
|
||||
// SUB
|
||||
inline void z_vs( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vsb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vsh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vsf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vsg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vsq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vscbi( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vscbib( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vscbih( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vscbif( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vscbig( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vscbiq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
|
||||
// MULTIPLY
|
||||
inline void z_vml( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vmh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vmlh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vme( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vmle( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vmo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vmlo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
|
||||
// MULTIPLY & ADD
|
||||
inline void z_vmal( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
|
||||
inline void z_vmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
|
||||
inline void z_vmalh( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
|
||||
inline void z_vmae( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
|
||||
inline void z_vmale( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
|
||||
inline void z_vmao( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
|
||||
inline void z_vmalo( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
|
||||
|
||||
// VECTOR SUM
|
||||
inline void z_vsum( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vsumb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vsumh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vsumg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vsumgh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vsumgf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vsumq( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vsumqf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vsumqg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
|
||||
// Average
|
||||
inline void z_vavg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vavgb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vavgh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vavgf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vavgg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vavgl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vavglb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vavglh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vavglf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vavglg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
|
||||
// VECTOR Galois Field Multiply Sum
|
||||
inline void z_vgfm( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vgfmb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vgfmh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vgfmf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vgfmg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
// VECTOR Galois Field Multiply Sum and Accumulate
|
||||
inline void z_vgfma( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
|
||||
inline void z_vgfmab( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
|
||||
inline void z_vgfmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
|
||||
inline void z_vgfmaf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
|
||||
inline void z_vgfmag( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
|
||||
|
||||
//---< Vector Logical Instructions >---
|
||||
|
||||
// AND
|
||||
inline void z_vn( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vnc( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
|
||||
// XOR
|
||||
inline void z_vx( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
|
||||
// NOR
|
||||
inline void z_vno( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
|
||||
// OR
|
||||
inline void z_vo( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
|
||||
// Comparison (element-wise)
|
||||
inline void z_vceq( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
|
||||
inline void z_vceqb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vceqh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vceqf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vceqg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vceqbs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vceqhs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vceqfs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vceqgs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vch( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
|
||||
inline void z_vchb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vchh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vchf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vchg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vchbs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vchhs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vchfs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vchgs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vchl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
|
||||
inline void z_vchlb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vchlh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vchlf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vchlg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vchlbs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vchlhs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vchlfs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vchlgs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
|
||||
// Max/Min (element-wise)
|
||||
inline void z_vmx( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vmxb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vmxh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vmxf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vmxg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vmxl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vmxlb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vmxlh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vmxlf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vmxlg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vmn( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vmnb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vmnh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vmnf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vmng( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vmnl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vmnlb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vmnlh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vmnlf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vmnlg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
|
||||
// Leading/Trailing Zeros, population count
|
||||
inline void z_vclz( VectorRegister v1, VectorRegister v2, int64_t m3);
|
||||
inline void z_vclzb( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vclzh( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vclzf( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vclzg( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vctz( VectorRegister v1, VectorRegister v2, int64_t m3);
|
||||
inline void z_vctzb( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vctzh( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vctzf( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vctzg( VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vpopct( VectorRegister v1, VectorRegister v2, int64_t m3);
|
||||
|
||||
// Rotate/Shift
|
||||
inline void z_verllv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_verllvb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_verllvh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_verllvf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_verllvg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_verll( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
|
||||
inline void z_verllb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
|
||||
inline void z_verllh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
|
||||
inline void z_verllf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
|
||||
inline void z_verllg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
|
||||
inline void z_verim( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t m5);
|
||||
inline void z_verimb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
|
||||
inline void z_verimh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
|
||||
inline void z_verimf( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
|
||||
inline void z_verimg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
|
||||
|
||||
inline void z_veslv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_veslvb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_veslvh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_veslvf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_veslvg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vesl( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
|
||||
inline void z_veslb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
|
||||
inline void z_veslh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
|
||||
inline void z_veslf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
|
||||
inline void z_veslg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
|
||||
|
||||
inline void z_vesrav( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vesravb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vesravh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vesravf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vesravg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vesra( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
|
||||
inline void z_vesrab( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
|
||||
inline void z_vesrah( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
|
||||
inline void z_vesraf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
|
||||
inline void z_vesrag( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
|
||||
inline void z_vesrlv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
|
||||
inline void z_vesrlvb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vesrlvh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vesrlvf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vesrlvg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vesrl( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
|
||||
inline void z_vesrlb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
|
||||
inline void z_vesrlh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
|
||||
inline void z_vesrlf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
|
||||
inline void z_vesrlg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
|
||||
|
||||
inline void z_vsl( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vslb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vsldb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
|
||||
|
||||
inline void z_vsra( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vsrab( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vsrl( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
inline void z_vsrlb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
|
||||
|
||||
// Test under Mask
|
||||
inline void z_vtm( VectorRegister v1, VectorRegister v2);
|
||||
|
||||
//---< Vector String Instructions >---
|
||||
inline void z_vfae( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5); // Find any element
|
||||
inline void z_vfaeb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
|
||||
inline void z_vfaeh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
|
||||
inline void z_vfaef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
|
||||
inline void z_vfee( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5); // Find element equal
|
||||
inline void z_vfeeb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
|
||||
inline void z_vfeeh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
|
||||
inline void z_vfeef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
|
||||
inline void z_vfene( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5); // Find element not equal
|
||||
inline void z_vfeneb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
|
||||
inline void z_vfeneh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
|
||||
inline void z_vfenef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
|
||||
inline void z_vstrc( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t imm5, int64_t cc6); // String range compare
|
||||
inline void z_vstrcb( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6);
|
||||
inline void z_vstrch( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6);
|
||||
inline void z_vstrcf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6);
|
||||
inline void z_vistr( VectorRegister v1, VectorRegister v2, int64_t imm3, int64_t cc5); // Isolate String
|
||||
inline void z_vistrb( VectorRegister v1, VectorRegister v2, int64_t cc5);
|
||||
inline void z_vistrh( VectorRegister v1, VectorRegister v2, int64_t cc5);
|
||||
inline void z_vistrf( VectorRegister v1, VectorRegister v2, int64_t cc5);
|
||||
inline void z_vistrbs(VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vistrhs(VectorRegister v1, VectorRegister v2);
|
||||
inline void z_vistrfs(VectorRegister v1, VectorRegister v2);
|
||||
|
||||
|
||||
// Floatingpoint instructions
|
||||
// ==========================
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -309,6 +309,9 @@ inline void Assembler::z_lcgfr(Register r1, Register r2) { emit_32( LCGFR_ZOPC |
|
||||
inline void Assembler::z_lnr( Register r1, Register r2) { emit_16( LNR_ZOPC | regt( r1, 8, 16) | reg((r2 == noreg) ? r1:r2, 12, 16)); }
|
||||
inline void Assembler::z_lngr( Register r1, Register r2) { emit_32( LNGR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
|
||||
inline void Assembler::z_lngfr(Register r1, Register r2) { emit_32( LNGFR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
|
||||
inline void Assembler::z_lpr( Register r1, Register r2) { emit_16( LPR_ZOPC | regt( r1, 8, 16) | reg((r2 == noreg) ? r1:r2, 12, 16)); }
|
||||
inline void Assembler::z_lpgr( Register r1, Register r2) { emit_32( LPGR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
|
||||
inline void Assembler::z_lpgfr(Register r1, Register r2) { emit_32( LPGFR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
|
||||
|
||||
inline void Assembler::z_lrvr( Register r1, Register r2) { emit_32( LRVR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
|
||||
inline void Assembler::z_lrvgr(Register r1, Register r2) { emit_32( LRVGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
|
||||
@ -702,6 +705,421 @@ inline void Assembler::z_cvd(Register r1, int64_t d2, Register x2, Register b2)
|
||||
inline void Assembler::z_cvdg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CVDG_ZOPC | regt(r1, 8, 48) | reg(x2, 12, 48) | reg(b2, 16, 48) | simm20(d2)); }
|
||||
|
||||
|
||||
//---------------------------
|
||||
//-- Vector Instructions --
|
||||
//---------------------------
|
||||
|
||||
//---< Vector Support Instructions >---
|
||||
|
||||
// Load (transfer from memory)
|
||||
inline void Assembler::z_vlm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {emit_48(VLM_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2)); }
|
||||
inline void Assembler::z_vl( VectorRegister v1, int64_t d2, Register x2, Register b2) {emit_48(VL_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2)); }
|
||||
inline void Assembler::z_vleb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEB_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_BYTE, 32)); }
|
||||
inline void Assembler::z_vleh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEH_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_HW, 32)); }
|
||||
inline void Assembler::z_vlef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEF_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_FW, 32)); }
|
||||
inline void Assembler::z_vleg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEG_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_DW, 32)); }
|
||||
|
||||
// Gather/Scatter
|
||||
inline void Assembler::z_vgef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VGEF_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_FW, 32)); }
|
||||
inline void Assembler::z_vgeg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VGEG_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_DW, 32)); }
|
||||
|
||||
inline void Assembler::z_vscef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VSCEF_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_FW, 32)); }
|
||||
inline void Assembler::z_vsceg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VSCEG_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_DW, 32)); }
|
||||
|
||||
// load and replicate
|
||||
inline void Assembler::z_vlrep( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLREP_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vlrepb( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vlrep(v1, d2, x2, b2, VRET_BYTE); }// load byte and replicate to all vector elements of type 'B'
|
||||
inline void Assembler::z_vlreph( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vlrep(v1, d2, x2, b2, VRET_HW); } // load HW and replicate to all vector elements of type 'H'
|
||||
inline void Assembler::z_vlrepf( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vlrep(v1, d2, x2, b2, VRET_FW); } // load FW and replicate to all vector elements of type 'F'
|
||||
inline void Assembler::z_vlrepg( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vlrep(v1, d2, x2, b2, VRET_DW); } // load DW and replicate to all vector elements of type 'G'
|
||||
|
||||
inline void Assembler::z_vllez( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLLEZ_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vllezb( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vllez(v1, d2, x2, b2, VRET_BYTE); }// load logical byte into left DW of VR, zero all other bit positions.
|
||||
inline void Assembler::z_vllezh( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vllez(v1, d2, x2, b2, VRET_HW); } // load logical HW into left DW of VR, zero all other bit positions.
|
||||
inline void Assembler::z_vllezf( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vllez(v1, d2, x2, b2, VRET_FW); } // load logical FW into left DW of VR, zero all other bit positions.
|
||||
inline void Assembler::z_vllezg( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vllez(v1, d2, x2, b2, VRET_DW); } // load logical DW into left DW of VR, zero all other bit positions.
|
||||
|
||||
inline void Assembler::z_vlbb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLBB_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | uimm4(m3, 32, 48)); }
|
||||
inline void Assembler::z_vll( VectorRegister v1, Register r3, int64_t d2, Register b2) {emit_48(VLL_ZOPC | vreg(v1, 8) | reg(r3, 12, 48) | rsmask_48(d2, b2)); }
|
||||
|
||||
// Load (register to register)
|
||||
inline void Assembler::z_vlr ( VectorRegister v1, VectorRegister v2) {emit_48(VLR_ZOPC | vreg(v1, 8) | vreg(v2, 12)); }
|
||||
|
||||
inline void Assembler::z_vlgv( Register r1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VLGV_ZOPC | reg(r1, 8, 48) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vlgvb( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_BYTE); } // load byte from VR element (index d2(b2)) into GR (logical)
|
||||
inline void Assembler::z_vlgvh( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_HW); } // load HW from VR element (index d2(b2)) into GR (logical)
|
||||
inline void Assembler::z_vlgvf( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_FW); } // load FW from VR element (index d2(b2)) into GR (logical)
|
||||
inline void Assembler::z_vlgvg( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_DW); } // load DW from VR element (index d2(b2)) into GR.
|
||||
|
||||
inline void Assembler::z_vlvg( VectorRegister v1, Register r3, int64_t d2, Register b2, int64_t m4) {emit_48(VLVG_ZOPC | vreg(v1, 8) | reg(r3, 12, 48) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vlvgb( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_BYTE); }
|
||||
inline void Assembler::z_vlvgh( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_HW); }
|
||||
inline void Assembler::z_vlvgf( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_FW); }
|
||||
inline void Assembler::z_vlvgg( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_DW); }
|
||||
|
||||
inline void Assembler::z_vlvgp( VectorRegister v1, Register r2, Register r3) {emit_48(VLVGP_ZOPC | vreg(v1, 8) | reg(r2, 12, 48) | reg(r3, 16, 48)); }
|
||||
|
||||
// vector register pack
|
||||
inline void Assembler::z_vpk( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VPK_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vpkh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpk(v1, v2, v3, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vpkf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpk(v1, v2, v3, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vpkg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpk(v1, v2, v3, VRET_DW); } // vector element type 'G'
|
||||
|
||||
inline void Assembler::z_vpks( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VPKS_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
|
||||
inline void Assembler::z_vpksh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
|
||||
inline void Assembler::z_vpksf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
|
||||
inline void Assembler::z_vpksg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
|
||||
inline void Assembler::z_vpkshs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', set CC
|
||||
inline void Assembler::z_vpksfs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', set CC
|
||||
inline void Assembler::z_vpksgs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', set CC
|
||||
|
||||
inline void Assembler::z_vpkls( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VPKLS_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
|
||||
inline void Assembler::z_vpklsh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
|
||||
inline void Assembler::z_vpklsf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
|
||||
inline void Assembler::z_vpklsg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
|
||||
inline void Assembler::z_vpklshs(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', set CC
|
||||
inline void Assembler::z_vpklsfs(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', set CC
|
||||
inline void Assembler::z_vpklsgs(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', set CC
|
||||
|
||||
// vector register unpack (sign-extended)
|
||||
inline void Assembler::z_vuph( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VUPH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
|
||||
inline void Assembler::z_vuphb( VectorRegister v1, VectorRegister v2) {z_vuph(v1, v2, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vuphh( VectorRegister v1, VectorRegister v2) {z_vuph(v1, v2, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vuphf( VectorRegister v1, VectorRegister v2) {z_vuph(v1, v2, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vupl( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VUPL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
|
||||
inline void Assembler::z_vuplb( VectorRegister v1, VectorRegister v2) {z_vupl(v1, v2, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vuplh( VectorRegister v1, VectorRegister v2) {z_vupl(v1, v2, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vuplf( VectorRegister v1, VectorRegister v2) {z_vupl(v1, v2, VRET_FW); } // vector element type 'F'
|
||||
|
||||
// vector register unpack (zero-extended)
|
||||
inline void Assembler::z_vuplh( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VUPLH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
|
||||
inline void Assembler::z_vuplhb( VectorRegister v1, VectorRegister v2) {z_vuplh(v1, v2, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vuplhh( VectorRegister v1, VectorRegister v2) {z_vuplh(v1, v2, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vuplhf( VectorRegister v1, VectorRegister v2) {z_vuplh(v1, v2, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vupll( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VUPLL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
|
||||
inline void Assembler::z_vupllb( VectorRegister v1, VectorRegister v2) {z_vupll(v1, v2, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vupllh( VectorRegister v1, VectorRegister v2) {z_vupll(v1, v2, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vupllf( VectorRegister v1, VectorRegister v2) {z_vupll(v1, v2, VRET_FW); } // vector element type 'F'
|
||||
|
||||
// vector register merge high/low
|
||||
inline void Assembler::z_vmrh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMRH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vmrhb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vmrhh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vmrhf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vmrhg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_DW); } // vector element type 'G'
|
||||
|
||||
inline void Assembler::z_vmrl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMRL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vmrlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vmrlh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vmrlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vmrlg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_DW); } // vector element type 'G'
|
||||
|
||||
// vector register permute
|
||||
inline void Assembler::z_vperm( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {emit_48(VPERM_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32)); }
|
||||
inline void Assembler::z_vpdi( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VPDI_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | uimm4(m4, 32, 48)); }
|
||||
|
||||
// vector register replicate
|
||||
inline void Assembler::z_vrep( VectorRegister v1, VectorRegister v3, int64_t imm2, int64_t m4) {emit_48(VREP_ZOPC | vreg(v1, 8) | vreg(v3, 12) | simm16(imm2, 16, 48) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vrepb( VectorRegister v1, VectorRegister v3, int64_t imm2) {z_vrep(v1, v3, imm2, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vreph( VectorRegister v1, VectorRegister v3, int64_t imm2) {z_vrep(v1, v3, imm2, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vrepf( VectorRegister v1, VectorRegister v3, int64_t imm2) {z_vrep(v1, v3, imm2, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vrepg( VectorRegister v1, VectorRegister v3, int64_t imm2) {z_vrep(v1, v3, imm2, VRET_DW); } // vector element type 'G'
|
||||
inline void Assembler::z_vrepi( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VREPI_ZOPC | vreg(v1, 8) | simm16(imm2, 16, 48) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vrepib( VectorRegister v1, int64_t imm2) {z_vrepi(v1, imm2, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vrepih( VectorRegister v1, int64_t imm2) {z_vrepi(v1, imm2, VRET_HW); } // vector element type 'B'
|
||||
inline void Assembler::z_vrepif( VectorRegister v1, int64_t imm2) {z_vrepi(v1, imm2, VRET_FW); } // vector element type 'B'
|
||||
inline void Assembler::z_vrepig( VectorRegister v1, int64_t imm2) {z_vrepi(v1, imm2, VRET_DW); } // vector element type 'B'
|
||||
|
||||
inline void Assembler::z_vsel( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {emit_48(VSEL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32)); }
|
||||
inline void Assembler::z_vseg( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VSEG_ZOPC | vreg(v1, 8) | vreg(v2, 12) | uimm4(m3, 32, 48)); }
|
||||
|
||||
// Load (immediate)
|
||||
inline void Assembler::z_vleib( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VLEIB_ZOPC | vreg(v1, 8) | simm16(imm2, 32, 48) | veix_mask(m3, VRET_BYTE, 32)); }
|
||||
inline void Assembler::z_vleih( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VLEIH_ZOPC | vreg(v1, 8) | simm16(imm2, 32, 48) | veix_mask(m3, VRET_HW, 32)); }
|
||||
inline void Assembler::z_vleif( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VLEIF_ZOPC | vreg(v1, 8) | simm16(imm2, 32, 48) | veix_mask(m3, VRET_FW, 32)); }
|
||||
inline void Assembler::z_vleig( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VLEIG_ZOPC | vreg(v1, 8) | simm16(imm2, 32, 48) | veix_mask(m3, VRET_DW, 32)); }
|
||||
|
||||
// Store
|
||||
inline void Assembler::z_vstm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {emit_48(VSTM_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2)); }
|
||||
inline void Assembler::z_vst( VectorRegister v1, int64_t d2, Register x2, Register b2) {emit_48(VST_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2)); }
|
||||
inline void Assembler::z_vsteb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEB_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_BYTE, 32)); }
|
||||
inline void Assembler::z_vsteh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEH_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_HW, 32)); }
|
||||
inline void Assembler::z_vstef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEF_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_FW, 32)); }
|
||||
inline void Assembler::z_vsteg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEG_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vstl( VectorRegister v1, Register r3, int64_t d2, Register b2) {emit_48(VSTL_ZOPC | vreg(v1, 8) | reg(r3, 12, 48) | rsmask_48(d2, b2)); }
|
||||
|
||||
// Misc
|
||||
inline void Assembler::z_vgm( VectorRegister v1, int64_t imm2, int64_t imm3, int64_t m4) {emit_48(VGM_ZOPC | vreg(v1, 8) | uimm8( imm2, 16, 48) | uimm8(imm3, 24, 48) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vgmb( VectorRegister v1, int64_t imm2, int64_t imm3) {z_vgm(v1, imm2, imm3, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vgmh( VectorRegister v1, int64_t imm2, int64_t imm3) {z_vgm(v1, imm2, imm3, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vgmf( VectorRegister v1, int64_t imm2, int64_t imm3) {z_vgm(v1, imm2, imm3, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vgmg( VectorRegister v1, int64_t imm2, int64_t imm3) {z_vgm(v1, imm2, imm3, VRET_DW); } // vector element type 'G'
|
||||
|
||||
inline void Assembler::z_vgbm( VectorRegister v1, int64_t imm2) {emit_48(VGBM_ZOPC | vreg(v1, 8) | uimm16(imm2, 16, 48)); }
|
||||
inline void Assembler::z_vzero( VectorRegister v1) {z_vgbm(v1, 0); } // preferred method to set vreg to all zeroes
|
||||
inline void Assembler::z_vone( VectorRegister v1) {z_vgbm(v1, 0xffff); } // preferred method to set vreg to all ones
|
||||
|
||||
//---< Vector Arithmetic Instructions >---
|
||||
|
||||
// Load
|
||||
inline void Assembler::z_vlc( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VLC_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vlcb( VectorRegister v1, VectorRegister v2) {z_vlc(v1, v2, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vlch( VectorRegister v1, VectorRegister v2) {z_vlc(v1, v2, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vlcf( VectorRegister v1, VectorRegister v2) {z_vlc(v1, v2, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vlcg( VectorRegister v1, VectorRegister v2) {z_vlc(v1, v2, VRET_DW); } // vector element type 'G'
|
||||
inline void Assembler::z_vlp( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VLP_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vlpb( VectorRegister v1, VectorRegister v2) {z_vlp(v1, v2, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vlph( VectorRegister v1, VectorRegister v2) {z_vlp(v1, v2, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vlpf( VectorRegister v1, VectorRegister v2) {z_vlp(v1, v2, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vlpg( VectorRegister v1, VectorRegister v2) {z_vlp(v1, v2, VRET_DW); } // vector element type 'G'
|
||||
|
||||
// ADD
|
||||
inline void Assembler::z_va( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VA_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
|
||||
inline void Assembler::z_vab( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vah( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vaf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vag( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_DW); } // vector element type 'G'
|
||||
inline void Assembler::z_vaq( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_QW); } // vector element type 'Q'
|
||||
inline void Assembler::z_vacc( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VACC_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
|
||||
inline void Assembler::z_vaccb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vacch( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vaccf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vaccg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_DW); } // vector element type 'G'
|
||||
inline void Assembler::z_vaccq( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_QW); } // vector element type 'Q'
|
||||
|
||||
// SUB
|
||||
inline void Assembler::z_vs( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VS_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
|
||||
inline void Assembler::z_vsb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vsh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vsf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vsg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_DW); } // vector element type 'G'
|
||||
inline void Assembler::z_vsq( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_QW); } // vector element type 'Q'
|
||||
inline void Assembler::z_vscbi( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSCBI_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
|
||||
inline void Assembler::z_vscbib( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vscbih( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vscbif( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vscbig( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_DW); } // vector element type 'G'
|
||||
inline void Assembler::z_vscbiq( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_QW); } // vector element type 'Q'
|
||||
|
||||
// MULTIPLY
|
||||
inline void Assembler::z_vml( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VML_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
|
||||
inline void Assembler::z_vmh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
|
||||
inline void Assembler::z_vmlh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
|
||||
inline void Assembler::z_vme( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VME_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
|
||||
inline void Assembler::z_vmle( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
|
||||
inline void Assembler::z_vmo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
|
||||
inline void Assembler::z_vmlo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
|
||||
|
||||
// MULTIPLY & ADD
|
||||
inline void Assembler::z_vmal( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
|
||||
inline void Assembler::z_vmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
|
||||
inline void Assembler::z_vmalh( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMALH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
|
||||
inline void Assembler::z_vmae( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
|
||||
inline void Assembler::z_vmale( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMALE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
|
||||
inline void Assembler::z_vmao( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
|
||||
inline void Assembler::z_vmalo( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMALO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
|
||||
|
||||
// VECTOR SUM
|
||||
inline void Assembler::z_vsum( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSUM_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_HW, 32)); }
|
||||
inline void Assembler::z_vsumb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsum(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vsumh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsum(v1, v2, v3, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vsumg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSUMG_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_FW, 32)); }
|
||||
inline void Assembler::z_vsumgh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsumg(v1, v2, v3, VRET_HW); } // vector element type 'B'
|
||||
inline void Assembler::z_vsumgf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsumg(v1, v2, v3, VRET_FW); } // vector element type 'H'
|
||||
inline void Assembler::z_vsumq( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSUMQ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_FW, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vsumqf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsumq(v1, v2, v3, VRET_FW); } // vector element type 'B'
|
||||
inline void Assembler::z_vsumqg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsumq(v1, v2, v3, VRET_DW); } // vector element type 'H'
|
||||
|
||||
// Average
|
||||
inline void Assembler::z_vavg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VAVG_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vavgb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavg(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vavgh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavg(v1, v2, v3, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vavgf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavg(v1, v2, v3, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vavgg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavg(v1, v2, v3, VRET_DW); } // vector element type 'G'
|
||||
inline void Assembler::z_vavgl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VAVGL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vavglb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavgl(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vavglh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavgl(v1, v2, v3, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vavglf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavgl(v1, v2, v3, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vavglg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavgl(v1, v2, v3, VRET_DW); } // vector element type 'G'
|
||||
|
||||
// VECTOR Galois Field Multiply Sum
|
||||
inline void Assembler::z_vgfm( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VGFM_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vgfmb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vgfm(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vgfmh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vgfm(v1, v2, v3, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vgfmf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vgfm(v1, v2, v3, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vgfmg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vgfm(v1, v2, v3, VRET_DW); } // vector element type 'G'
|
||||
inline void Assembler::z_vgfma( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VGFMA_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v3, 16) | vesc_mask(m5, VRET_BYTE, VRET_DW, 20)); }
|
||||
inline void Assembler::z_vgfmab( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vgfmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vgfmaf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vgfmag( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_DW); } // vector element type 'G'
|
||||
|
||||
//---< Vector Logical Instructions >---
|
||||
|
||||
// AND
|
||||
inline void Assembler::z_vn( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VN_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
|
||||
inline void Assembler::z_vnc( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VNC_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
|
||||
|
||||
// XOR
|
||||
inline void Assembler::z_vx( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VX_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
|
||||
|
||||
// NOR
|
||||
inline void Assembler::z_vno( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VNO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
|
||||
|
||||
// OR
|
||||
inline void Assembler::z_vo( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
|
||||
|
||||
// Comparison (element-wise)
|
||||
inline void Assembler::z_vceq( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VCEQ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
|
||||
inline void Assembler::z_vceqb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_BYTE, VOPRC_CCIGN); } // vector element type 'B', don't set CC
|
||||
inline void Assembler::z_vceqh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
|
||||
inline void Assembler::z_vceqf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
|
||||
inline void Assembler::z_vceqg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
|
||||
inline void Assembler::z_vceqbs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_BYTE, VOPRC_CCSET); } // vector element type 'B', don't set CC
|
||||
inline void Assembler::z_vceqhs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', don't set CC
|
||||
inline void Assembler::z_vceqfs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', don't set CC
|
||||
inline void Assembler::z_vceqgs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', don't set CC
|
||||
inline void Assembler::z_vch( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VCH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
|
||||
inline void Assembler::z_vchb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_BYTE, VOPRC_CCIGN); } // vector element type 'B', don't set CC
|
||||
inline void Assembler::z_vchh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
|
||||
inline void Assembler::z_vchf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
|
||||
inline void Assembler::z_vchg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
|
||||
inline void Assembler::z_vchbs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_BYTE, VOPRC_CCSET); } // vector element type 'B', don't set CC
|
||||
inline void Assembler::z_vchhs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', don't set CC
|
||||
inline void Assembler::z_vchfs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', don't set CC
|
||||
inline void Assembler::z_vchgs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', don't set CC
|
||||
inline void Assembler::z_vchl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VCHL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
|
||||
inline void Assembler::z_vchlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_BYTE, VOPRC_CCIGN); } // vector element type 'B', don't set CC
|
||||
inline void Assembler::z_vchlh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
|
||||
inline void Assembler::z_vchlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
|
||||
inline void Assembler::z_vchlg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
|
||||
inline void Assembler::z_vchlbs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_BYTE, VOPRC_CCSET); } // vector element type 'B', don't set CC
|
||||
inline void Assembler::z_vchlhs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', don't set CC
|
||||
inline void Assembler::z_vchlfs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', don't set CC
|
||||
inline void Assembler::z_vchlgs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', don't set CC
|
||||
|
||||
// Max/Min (element-wise)
|
||||
inline void Assembler::z_vmx( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMX_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vmxb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmx(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vmxh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmx(v1, v2, v3, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vmxf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmx(v1, v2, v3, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vmxg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmx(v1, v2, v3, VRET_DW); } // vector element type 'G'
|
||||
inline void Assembler::z_vmxl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMXL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vmxlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmxl(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vmxlh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmxl(v1, v2, v3, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vmxlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmxl(v1, v2, v3, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vmxlg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmxl(v1, v2, v3, VRET_DW); } // vector element type 'G'
|
||||
inline void Assembler::z_vmn( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMN_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vmnb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmn(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vmnh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmn(v1, v2, v3, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vmnf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmn(v1, v2, v3, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vmng( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmn(v1, v2, v3, VRET_DW); } // vector element type 'G'
|
||||
inline void Assembler::z_vmnl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMNL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vmnlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmnl(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vmnlh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmnl(v1, v2, v3, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vmnlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmnl(v1, v2, v3, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vmnlg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmnl(v1, v2, v3, VRET_DW); } // vector element type 'G'
|
||||
|
||||
// Leading/Trailing Zeros, population count
|
||||
inline void Assembler::z_vclz( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VCLZ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vclzb( VectorRegister v1, VectorRegister v2) {z_vclz(v1, v2, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vclzh( VectorRegister v1, VectorRegister v2) {z_vclz(v1, v2, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vclzf( VectorRegister v1, VectorRegister v2) {z_vclz(v1, v2, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vclzg( VectorRegister v1, VectorRegister v2) {z_vclz(v1, v2, VRET_DW); } // vector element type 'G'
|
||||
inline void Assembler::z_vctz( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VCTZ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vctzb( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vctzh( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vctzf( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vctzg( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_DW); } // vector element type 'G'
|
||||
inline void Assembler::z_vpopct( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VPOPCT_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
|
||||
|
||||
// Rotate/Shift
|
||||
inline void Assembler::z_verllv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VERLLV_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_verllvb(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_verllvh(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_verllvf(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_verllvg(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_DW); } // vector element type 'G'
|
||||
inline void Assembler::z_verll( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VERLL_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_verllb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
|
||||
inline void Assembler::z_verllh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_HW);} // vector element type 'H'
|
||||
inline void Assembler::z_verllf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_FW);} // vector element type 'F'
|
||||
inline void Assembler::z_verllg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_DW);} // vector element type 'G'
|
||||
inline void Assembler::z_verim( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t m5) {emit_48(VERLL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | uimm8(imm4, 24, 48) | vesc_mask(m5, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_verimb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_verimh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_verimf( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_verimg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_DW); } // vector element type 'G'
|
||||
|
||||
inline void Assembler::z_veslv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VESLV_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_veslvb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_veslv(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_veslvh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_veslv(v1, v2, v3, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_veslvf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_veslv(v1, v2, v3, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_veslvg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_veslv(v1, v2, v3, VRET_DW); } // vector element type 'G'
|
||||
inline void Assembler::z_vesl( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VESL_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_veslb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesl(v1, v3, d2, b2, VRET_BYTE);} // vector element type 'B'
|
||||
inline void Assembler::z_veslh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesl(v1, v3, d2, b2, VRET_HW);} // vector element type 'H'
|
||||
inline void Assembler::z_veslf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesl(v1, v3, d2, b2, VRET_FW);} // vector element type 'F'
|
||||
inline void Assembler::z_veslg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesl(v1, v3, d2, b2, VRET_DW);} // vector element type 'G'
|
||||
|
||||
inline void Assembler::z_vesrav( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VESRAV_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vesravb(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrav(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vesravh(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrav(v1, v2, v3, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vesravf(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrav(v1, v2, v3, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vesravg(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrav(v1, v2, v3, VRET_DW); } // vector element type 'G'
|
||||
inline void Assembler::z_vesra( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VESRA_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vesrab( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesra(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
|
||||
inline void Assembler::z_vesrah( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesra(v1, v3, d2, b2, VRET_HW);} // vector element type 'H'
|
||||
inline void Assembler::z_vesraf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesra(v1, v3, d2, b2, VRET_FW);} // vector element type 'F'
|
||||
inline void Assembler::z_vesrag( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesra(v1, v3, d2, b2, VRET_DW);} // vector element type 'G'
|
||||
inline void Assembler::z_vesrlv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VESRLV_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vesrlvb(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrlv(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
|
||||
inline void Assembler::z_vesrlvh(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrlv(v1, v2, v3, VRET_HW); } // vector element type 'H'
|
||||
inline void Assembler::z_vesrlvf(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrlv(v1, v2, v3, VRET_FW); } // vector element type 'F'
|
||||
inline void Assembler::z_vesrlvg(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrlv(v1, v2, v3, VRET_DW); } // vector element type 'G'
|
||||
inline void Assembler::z_vesrl( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VESRL_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
|
||||
inline void Assembler::z_vesrlb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesrl(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
|
||||
inline void Assembler::z_vesrlh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesrl(v1, v3, d2, b2, VRET_HW);} // vector element type 'H'
|
||||
inline void Assembler::z_vesrlf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesrl(v1, v3, d2, b2, VRET_FW);} // vector element type 'F'
|
||||
inline void Assembler::z_vesrlg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesrl(v1, v3, d2, b2, VRET_DW);} // vector element type 'G'
|
||||
|
||||
inline void Assembler::z_vsl( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
|
||||
inline void Assembler::z_vslb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSLB_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
|
||||
inline void Assembler::z_vsldb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {emit_48(VSLDB_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | uimm8(imm4, 24, 48)); }
|
||||
|
||||
inline void Assembler::z_vsra( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSRA_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
|
||||
inline void Assembler::z_vsrab( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSRAB_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
|
||||
inline void Assembler::z_vsrl( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSRL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
|
||||
inline void Assembler::z_vsrlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSRLB_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
|
||||
|
||||
// Test under Mask
|
||||
inline void Assembler::z_vtm( VectorRegister v1, VectorRegister v2) {emit_48(VTM_ZOPC | vreg(v1, 8) | vreg(v2, 12)); }
|
||||
|
||||
//---< Vector String Instructions >---
|
||||
inline void Assembler::z_vfae( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5) {emit_48(VFAE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(imm4, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); } // Find any element
|
||||
inline void Assembler::z_vfaeb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfae(v1, v2, v3, VRET_BYTE, cc5); }
|
||||
inline void Assembler::z_vfaeh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfae(v1, v2, v3, VRET_HW, cc5); }
|
||||
inline void Assembler::z_vfaef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfae(v1, v2, v3, VRET_FW, cc5); }
|
||||
inline void Assembler::z_vfee( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5) {emit_48(VFEE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(imm4, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); } // Find element equal
|
||||
inline void Assembler::z_vfeeb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfee(v1, v2, v3, VRET_BYTE, cc5); }
|
||||
inline void Assembler::z_vfeeh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfee(v1, v2, v3, VRET_HW, cc5); }
|
||||
inline void Assembler::z_vfeef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfee(v1, v2, v3, VRET_FW, cc5); }
|
||||
inline void Assembler::z_vfene( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5) {emit_48(VFENE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(imm4, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); } // Find element not equal
|
||||
inline void Assembler::z_vfeneb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfene(v1, v2, v3, VRET_BYTE, cc5); }
|
||||
inline void Assembler::z_vfeneh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfene(v1, v2, v3, VRET_HW, cc5); }
|
||||
inline void Assembler::z_vfenef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfene(v1, v2, v3, VRET_FW, cc5); }
|
||||
inline void Assembler::z_vstrc( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t imm5, int64_t cc6) {emit_48(VSTRC_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(imm5, VRET_BYTE, VRET_FW, 20) | voprc_any(cc6, 24) ); } // String range compare
|
||||
inline void Assembler::z_vstrcb( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6) {z_vstrc(v1, v2, v3, v4, VRET_BYTE, cc6); }
|
||||
inline void Assembler::z_vstrch( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6) {z_vstrc(v1, v2, v3, v4, VRET_HW, cc6); }
|
||||
inline void Assembler::z_vstrcf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6) {z_vstrc(v1, v2, v3, v4, VRET_FW, cc6); }
|
||||
inline void Assembler::z_vistr( VectorRegister v1, VectorRegister v2, int64_t imm3, int64_t cc5) {emit_48(VISTR_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(imm3, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); } // isolate string
|
||||
inline void Assembler::z_vistrb( VectorRegister v1, VectorRegister v2, int64_t cc5) {z_vistr(v1, v2, VRET_BYTE, cc5); }
|
||||
inline void Assembler::z_vistrh( VectorRegister v1, VectorRegister v2, int64_t cc5) {z_vistr(v1, v2, VRET_HW, cc5); }
|
||||
inline void Assembler::z_vistrf( VectorRegister v1, VectorRegister v2, int64_t cc5) {z_vistr(v1, v2, VRET_FW, cc5); }
|
||||
inline void Assembler::z_vistrbs(VectorRegister v1, VectorRegister v2) {z_vistr(v1, v2, VRET_BYTE, VOPRC_CCSET); }
|
||||
inline void Assembler::z_vistrhs(VectorRegister v1, VectorRegister v2) {z_vistr(v1, v2, VRET_HW, VOPRC_CCSET); }
|
||||
inline void Assembler::z_vistrfs(VectorRegister v1, VectorRegister v2) {z_vistr(v1, v2, VRET_FW, VOPRC_CCSET); }
|
||||
|
||||
|
||||
//-------------------------------
|
||||
// FLOAT INSTRUCTIONS
|
||||
//-------------------------------
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2016, 2017 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -34,7 +34,7 @@
|
||||
// Sorted according to sparc.
|
||||
|
||||
// z/Architecture remembers branch targets, so don't share vtables.
|
||||
define_pd_global(bool, ShareVtableStubs, false);
|
||||
define_pd_global(bool, ShareVtableStubs, true);
|
||||
define_pd_global(bool, NeedsDeoptSuspend, false); // Only register window machines need this.
|
||||
|
||||
define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks.
|
||||
|
@ -4671,6 +4671,7 @@ void MacroAssembler::load_mirror(Register mirror, Register method) {
|
||||
mem2reg_opt(mirror, Address(mirror, ConstMethod::constants_offset()));
|
||||
mem2reg_opt(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes()));
|
||||
mem2reg_opt(mirror, Address(mirror, Klass::java_mirror_offset()));
|
||||
resolve_oop_handle(mirror);
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -35,3 +35,5 @@
|
||||
REGISTER_DEFINITION(Register, noreg);
|
||||
|
||||
REGISTER_DEFINITION(FloatRegister, fnoreg);
|
||||
|
||||
REGISTER_DEFINITION(VectorRegister, vnoreg);
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -46,3 +46,13 @@ const char* FloatRegisterImpl::name() const {
|
||||
};
|
||||
return is_valid() ? names[encoding()] : "fnoreg";
|
||||
}
|
||||
|
||||
const char* VectorRegisterImpl::name() const {
|
||||
const char* names[number_of_registers] = {
|
||||
"Z_V0", "Z_V1", "Z_V2", "Z_V3", "Z_V4", "Z_V5", "Z_V6", "Z_V7",
|
||||
"Z_V8", "Z_V9", "Z_V10", "Z_V11", "Z_V12", "Z_V13", "Z_V14", "Z_V15",
|
||||
"Z_V16", "Z_V17", "Z_V18", "Z_V19", "Z_V20", "Z_V21", "Z_V22", "Z_V23",
|
||||
"Z_V24", "Z_V25", "Z_V26", "Z_V27", "Z_V28", "Z_V29", "Z_V30", "Z_V31"
|
||||
};
|
||||
return is_valid() ? names[encoding()] : "fnoreg";
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -34,11 +34,6 @@ class VMRegImpl;
|
||||
|
||||
typedef VMRegImpl* VMReg;
|
||||
|
||||
// Use Register as shortcut.
|
||||
class RegisterImpl;
|
||||
typedef RegisterImpl* Register;
|
||||
|
||||
// The implementation of integer registers for z/Architecture.
|
||||
|
||||
// z/Architecture registers, see "LINUX for zSeries ELF ABI Supplement", IBM March 2001
|
||||
//
|
||||
@ -57,6 +52,17 @@ typedef RegisterImpl* Register;
|
||||
// f1,f3,f5,f7 General purpose (volatile)
|
||||
// f8-f15 General purpose (nonvolatile)
|
||||
|
||||
|
||||
//===========================
|
||||
//=== Integer Registers ===
|
||||
//===========================
|
||||
|
||||
// Use Register as shortcut.
|
||||
class RegisterImpl;
|
||||
typedef RegisterImpl* Register;
|
||||
|
||||
// The implementation of integer registers for z/Architecture.
|
||||
|
||||
inline Register as_Register(int encoding) {
|
||||
return (Register)(long)encoding;
|
||||
}
|
||||
@ -110,6 +116,11 @@ CONSTANT_REGISTER_DECLARATION(Register, Z_R13, (13));
|
||||
CONSTANT_REGISTER_DECLARATION(Register, Z_R14, (14));
|
||||
CONSTANT_REGISTER_DECLARATION(Register, Z_R15, (15));
|
||||
|
||||
|
||||
//=============================
|
||||
//=== Condition Registers ===
|
||||
//=============================
|
||||
|
||||
// Use ConditionRegister as shortcut
|
||||
class ConditionRegisterImpl;
|
||||
typedef ConditionRegisterImpl* ConditionRegister;
|
||||
@ -159,7 +170,7 @@ CONSTANT_REGISTER_DECLARATION(ConditionRegister, Z_CR, (0));
|
||||
// dangers of defines.
|
||||
// If a particular file has a problem with these defines then it's possible
|
||||
// to turn them off in that file by defining
|
||||
// DONT_USE_REGISTER_DEFINES. Register_definition_s390.cpp does that
|
||||
// DONT_USE_REGISTER_DEFINES. Register_definitions_s390.cpp does that
|
||||
// so that it's able to provide real definitions of these registers
|
||||
// for use in debuggers and such.
|
||||
|
||||
@ -186,6 +197,11 @@ CONSTANT_REGISTER_DECLARATION(ConditionRegister, Z_CR, (0));
|
||||
#define Z_CR ((ConditionRegister)(Z_CR_ConditionRegisterEnumValue))
|
||||
#endif // DONT_USE_REGISTER_DEFINES
|
||||
|
||||
|
||||
//=========================
|
||||
//=== Float Registers ===
|
||||
//=========================
|
||||
|
||||
// Use FloatRegister as shortcut
|
||||
class FloatRegisterImpl;
|
||||
typedef FloatRegisterImpl* FloatRegister;
|
||||
@ -263,22 +279,6 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F15, (15));
|
||||
#define Z_F15 ((FloatRegister)( Z_F15_FloatRegisterEnumValue))
|
||||
#endif // DONT_USE_REGISTER_DEFINES
|
||||
|
||||
// Need to know the total number of registers of all sorts for SharedInfo.
|
||||
// Define a class that exports it.
|
||||
|
||||
class ConcreteRegisterImpl : public AbstractRegisterImpl {
|
||||
public:
|
||||
enum {
|
||||
number_of_registers =
|
||||
(RegisterImpl::number_of_registers +
|
||||
FloatRegisterImpl::number_of_registers)
|
||||
* 2 // register halves
|
||||
+ 1 // condition code register
|
||||
};
|
||||
static const int max_gpr;
|
||||
static const int max_fpr;
|
||||
};
|
||||
|
||||
// Single, Double and Quad fp reg classes. These exist to map the ADLC
|
||||
// encoding for a floating point register, to the FloatRegister number
|
||||
// desired by the macroassembler. A FloatRegister is a number between
|
||||
@ -329,6 +329,161 @@ class QuadFloatRegisterImpl {
|
||||
};
|
||||
|
||||
|
||||
//==========================
|
||||
//=== Vector Registers ===
|
||||
//==========================
|
||||
|
||||
// Use VectorRegister as shortcut
|
||||
class VectorRegisterImpl;
|
||||
typedef VectorRegisterImpl* VectorRegister;
|
||||
|
||||
// The implementation of vector registers for z/Architecture.
|
||||
|
||||
inline VectorRegister as_VectorRegister(int encoding) {
|
||||
return (VectorRegister)(long)encoding;
|
||||
}
|
||||
|
||||
class VectorRegisterImpl: public AbstractRegisterImpl {
|
||||
public:
|
||||
enum {
|
||||
number_of_registers = 32,
|
||||
number_of_arg_registers = 0
|
||||
};
|
||||
|
||||
// construction
|
||||
inline friend VectorRegister as_VectorRegister(int encoding);
|
||||
|
||||
inline VMReg as_VMReg();
|
||||
|
||||
// accessors
|
||||
int encoding() const {
|
||||
assert(is_valid(), "invalid register"); return value();
|
||||
}
|
||||
|
||||
bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
|
||||
bool is_volatile() const { return true; }
|
||||
bool is_nonvolatile() const { return false; }
|
||||
|
||||
// Register fields in z/Architecture instructions are 4 bits wide, restricting the
|
||||
// addressable register set size to 16.
|
||||
// The vector register set size is 32, requiring an extension, by one bit, of the
|
||||
// register encoding. This is accomplished by the introduction of a RXB field in the
|
||||
// instruction. RXB = Register eXtension Bits.
|
||||
// The RXB field contains the MSBs (most significant bit) of the vector register numbers
|
||||
// used for this instruction. Assignment of MSB in RBX is by bit position of the
|
||||
// register field in the instruction.
|
||||
// Example:
|
||||
// The register field starting at bit position 12 in the instruction is assigned RXB bit 0b0100.
|
||||
int64_t RXB_mask(int pos) {
|
||||
if (encoding() >= number_of_registers/2) {
|
||||
switch (pos) {
|
||||
case 8: return ((int64_t)0b1000) << 8; // actual bit pos: 36
|
||||
case 12: return ((int64_t)0b0100) << 8; // actual bit pos: 37
|
||||
case 16: return ((int64_t)0b0010) << 8; // actual bit pos: 38
|
||||
case 32: return ((int64_t)0b0001) << 8; // actual bit pos: 39
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char* name() const;
|
||||
|
||||
VectorRegister successor() const { return as_VectorRegister(encoding() + 1); }
|
||||
};
|
||||
|
||||
// The Vector registers of z/Architecture.
|
||||
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg, (-1));
|
||||
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V0, (0));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V1, (1));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V2, (2));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V3, (3));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V4, (4));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V5, (5));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V6, (6));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V7, (7));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V8, (8));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V9, (9));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V10, (10));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V11, (11));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V12, (12));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V13, (13));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V14, (14));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V15, (15));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V16, (16));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V17, (17));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V18, (18));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V19, (19));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V20, (20));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V21, (21));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V22, (22));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V23, (23));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V24, (24));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V25, (25));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V26, (26));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V27, (27));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V28, (28));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V29, (29));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V30, (30));
|
||||
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V31, (31));
|
||||
|
||||
#ifndef DONT_USE_REGISTER_DEFINES
|
||||
#define vnoreg ((VectorRegister)(vnoreg_VectorRegisterEnumValue))
|
||||
#define Z_V0 ((VectorRegister)( Z_V0_VectorRegisterEnumValue))
|
||||
#define Z_V1 ((VectorRegister)( Z_V1_VectorRegisterEnumValue))
|
||||
#define Z_V2 ((VectorRegister)( Z_V2_VectorRegisterEnumValue))
|
||||
#define Z_V3 ((VectorRegister)( Z_V3_VectorRegisterEnumValue))
|
||||
#define Z_V4 ((VectorRegister)( Z_V4_VectorRegisterEnumValue))
|
||||
#define Z_V5 ((VectorRegister)( Z_V5_VectorRegisterEnumValue))
|
||||
#define Z_V6 ((VectorRegister)( Z_V6_VectorRegisterEnumValue))
|
||||
#define Z_V7 ((VectorRegister)( Z_V7_VectorRegisterEnumValue))
|
||||
#define Z_V8 ((VectorRegister)( Z_V8_VectorRegisterEnumValue))
|
||||
#define Z_V9 ((VectorRegister)( Z_V9_VectorRegisterEnumValue))
|
||||
#define Z_V10 ((VectorRegister)( Z_V10_VectorRegisterEnumValue))
|
||||
#define Z_V11 ((VectorRegister)( Z_V11_VectorRegisterEnumValue))
|
||||
#define Z_V12 ((VectorRegister)( Z_V12_VectorRegisterEnumValue))
|
||||
#define Z_V13 ((VectorRegister)( Z_V13_VectorRegisterEnumValue))
|
||||
#define Z_V14 ((VectorRegister)( Z_V14_VectorRegisterEnumValue))
|
||||
#define Z_V15 ((VectorRegister)( Z_V15_VectorRegisterEnumValue))
|
||||
#define Z_V16 ((VectorRegister)( Z_V16_VectorRegisterEnumValue))
|
||||
#define Z_V17 ((VectorRegister)( Z_V17_VectorRegisterEnumValue))
|
||||
#define Z_V18 ((VectorRegister)( Z_V18_VectorRegisterEnumValue))
|
||||
#define Z_V19 ((VectorRegister)( Z_V19_VectorRegisterEnumValue))
|
||||
#define Z_V20 ((VectorRegister)( Z_V20_VectorRegisterEnumValue))
|
||||
#define Z_V21 ((VectorRegister)( Z_V21_VectorRegisterEnumValue))
|
||||
#define Z_V22 ((VectorRegister)( Z_V22_VectorRegisterEnumValue))
|
||||
#define Z_V23 ((VectorRegister)( Z_V23_VectorRegisterEnumValue))
|
||||
#define Z_V24 ((VectorRegister)( Z_V24_VectorRegisterEnumValue))
|
||||
#define Z_V25 ((VectorRegister)( Z_V25_VectorRegisterEnumValue))
|
||||
#define Z_V26 ((VectorRegister)( Z_V26_VectorRegisterEnumValue))
|
||||
#define Z_V27 ((VectorRegister)( Z_V27_VectorRegisterEnumValue))
|
||||
#define Z_V28 ((VectorRegister)( Z_V28_VectorRegisterEnumValue))
|
||||
#define Z_V29 ((VectorRegister)( Z_V29_VectorRegisterEnumValue))
|
||||
#define Z_V30 ((VectorRegister)( Z_V30_VectorRegisterEnumValue))
|
||||
#define Z_V31 ((VectorRegister)( Z_V31_VectorRegisterEnumValue))
|
||||
#endif // DONT_USE_REGISTER_DEFINES
|
||||
|
||||
|
||||
// Need to know the total number of registers of all sorts for SharedInfo.
|
||||
// Define a class that exports it.
|
||||
|
||||
class ConcreteRegisterImpl : public AbstractRegisterImpl {
|
||||
public:
|
||||
enum {
|
||||
number_of_registers =
|
||||
(RegisterImpl::number_of_registers +
|
||||
FloatRegisterImpl::number_of_registers)
|
||||
* 2 // register halves
|
||||
+ 1 // condition code register
|
||||
};
|
||||
static const int max_gpr;
|
||||
static const int max_fpr;
|
||||
};
|
||||
|
||||
|
||||
// Common register declarations used in assembler code.
|
||||
REGISTER_DECLARATION(Register, Z_EXC_OOP, Z_R2);
|
||||
REGISTER_DECLARATION(Register, Z_EXC_PC, Z_R3);
|
||||
|
@ -3149,7 +3149,7 @@ operand noArg_iRegI() %{
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Revenregi and roddRegI constitute and even-odd-pair.
|
||||
// revenRegI and roddRegI constitute and even-odd-pair.
|
||||
operand revenRegI() %{
|
||||
constraint(ALLOC_IN_RC(z_rarg3_int_reg));
|
||||
match(iRegI);
|
||||
@ -3157,7 +3157,7 @@ operand revenRegI() %{
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Revenregi and roddRegI constitute and even-odd-pair.
|
||||
// revenRegI and roddRegI constitute and even-odd-pair.
|
||||
operand roddRegI() %{
|
||||
constraint(ALLOC_IN_RC(z_rarg4_int_reg));
|
||||
match(iRegI);
|
||||
@ -3283,7 +3283,7 @@ operand memoryRegP() %{
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Revenregp and roddRegP constitute and even-odd-pair.
|
||||
// revenRegP and roddRegP constitute and even-odd-pair.
|
||||
operand revenRegP() %{
|
||||
constraint(ALLOC_IN_RC(z_rarg3_ptr_reg));
|
||||
match(iRegP);
|
||||
@ -3291,7 +3291,7 @@ operand revenRegP() %{
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Revenregl and roddRegL constitute and even-odd-pair.
|
||||
// revenRegP and roddRegP constitute and even-odd-pair.
|
||||
operand roddRegP() %{
|
||||
constraint(ALLOC_IN_RC(z_rarg4_ptr_reg));
|
||||
match(iRegP);
|
||||
@ -3380,7 +3380,7 @@ operand iRegL() %{
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Revenregl and roddRegL constitute and even-odd-pair.
|
||||
// revenRegL and roddRegL constitute and even-odd-pair.
|
||||
operand revenRegL() %{
|
||||
constraint(ALLOC_IN_RC(z_rarg3_long_reg));
|
||||
match(iRegL);
|
||||
@ -3388,7 +3388,7 @@ operand revenRegL() %{
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Revenregl and roddRegL constitute and even-odd-pair.
|
||||
// revenRegL and roddRegL constitute and even-odd-pair.
|
||||
operand roddRegL() %{
|
||||
constraint(ALLOC_IN_RC(z_rarg4_long_reg));
|
||||
match(iRegL);
|
||||
@ -6443,6 +6443,32 @@ instruct mulL_Reg_mem(iRegL dst, memory src)%{
|
||||
ins_pipe(pipe_class_dummy);
|
||||
%}
|
||||
|
||||
instruct mulHiL_reg_reg(revenRegL Rdst, roddRegL Rsrc1, iRegL Rsrc2, iRegL Rtmp1, flagsReg cr)%{
|
||||
match(Set Rdst (MulHiL Rsrc1 Rsrc2));
|
||||
effect(TEMP_DEF Rdst, USE_KILL Rsrc1, TEMP Rtmp1, KILL cr);
|
||||
ins_cost(7*DEFAULT_COST);
|
||||
// TODO: s390 port size(VARIABLE_SIZE);
|
||||
format %{ "MulHiL $Rdst, $Rsrc1, $Rsrc2\t # Multiply High Long" %}
|
||||
ins_encode%{
|
||||
Register dst = $Rdst$$Register;
|
||||
Register src1 = $Rsrc1$$Register;
|
||||
Register src2 = $Rsrc2$$Register;
|
||||
Register tmp1 = $Rtmp1$$Register;
|
||||
Register tmp2 = $Rdst$$Register;
|
||||
// z/Architecture has only unsigned multiply (64 * 64 -> 128).
|
||||
// implementing mulhs(a,b) = mulhu(a,b) – (a & (b>>63)) – (b & (a>>63))
|
||||
__ z_srag(tmp2, src1, 63); // a>>63
|
||||
__ z_srag(tmp1, src2, 63); // b>>63
|
||||
__ z_ngr(tmp2, src2); // b & (a>>63)
|
||||
__ z_ngr(tmp1, src1); // a & (b>>63)
|
||||
__ z_agr(tmp1, tmp2); // ((a & (b>>63)) + (b & (a>>63)))
|
||||
__ z_mlgr(dst, src2); // tricky: 128-bit product is written to even/odd pair (dst,src1),
|
||||
// multiplicand is taken from oddReg (src1), multiplier in src2.
|
||||
__ z_sgr(dst, tmp1);
|
||||
%}
|
||||
ins_pipe(pipe_class_dummy);
|
||||
%}
|
||||
|
||||
// DIV
|
||||
|
||||
// Integer DIVMOD with Register, both quotient and mod results
|
||||
|
@ -2382,6 +2382,7 @@ void TemplateTable::load_field_cp_cache_entry(Register obj,
|
||||
if (is_static) {
|
||||
__ mem2reg_opt(obj, Address(cache, index, cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
|
||||
__ mem2reg_opt(obj, Address(obj, Klass::java_mirror_offset()));
|
||||
__ resolve_oop_handle(obj);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -706,12 +706,13 @@ void VM_Version::determine_features() {
|
||||
Label getCPUFEATURES; // fcode = -1 (cache)
|
||||
Label getCIPHERFEATURES; // fcode = -2 (cipher)
|
||||
Label getMSGDIGESTFEATURES; // fcode = -3 (SHA)
|
||||
Label getVECTORFEATURES; // fcode = -4 (OS support for vector instructions)
|
||||
Label checkLongDispFast;
|
||||
Label noLongDisp;
|
||||
Label posDisp, negDisp;
|
||||
Label errRTN;
|
||||
a->z_ltgfr(Z_R0, Z_ARG2); // Buf len to r0 and test.
|
||||
a->z_brl(getFEATURES); // negative -> Get machine features.
|
||||
a->z_brl(getFEATURES); // negative -> Get machine features not covered by facility list.
|
||||
a->z_brz(checkLongDispFast); // zero -> Check for high-speed Long Displacement Facility.
|
||||
a->z_aghi(Z_R0, -1);
|
||||
a->z_stfle(0, Z_ARG1);
|
||||
@ -736,6 +737,8 @@ void VM_Version::determine_features() {
|
||||
a->z_bre(getCIPHERFEATURES);
|
||||
a->z_cghi(Z_R0, -3); // -3: Extract detailed crypto capabilities (msg digest instructions).
|
||||
a->z_bre(getMSGDIGESTFEATURES);
|
||||
a->z_cghi(Z_R0, -4); // -4: Verify vector instruction availability (OS support).
|
||||
a->z_bre(getVECTORFEATURES);
|
||||
|
||||
a->z_xgr(Z_RET, Z_RET); // Not a valid function code.
|
||||
a->z_br(Z_R14); // Return "operation aborted".
|
||||
@ -766,6 +769,11 @@ void VM_Version::determine_features() {
|
||||
a->z_ecag(Z_RET,Z_R0,0,Z_ARG3); // Extract information as requested by Z_ARG1 contents.
|
||||
a->z_br(Z_R14);
|
||||
|
||||
// Use a vector instruction to verify OS support. Will fail with SIGFPE if OS support is missing.
|
||||
a->bind(getVECTORFEATURES);
|
||||
a->z_vtm(Z_V0,Z_V0); // non-destructive vector instruction. Will cause SIGFPE if not supported.
|
||||
a->z_br(Z_R14);
|
||||
|
||||
// Check the performance of the Long Displacement Facility, i.e. find out if we are running on z900 or newer.
|
||||
a->bind(checkLongDispFast);
|
||||
a->z_llill(Z_R0, 0xffff); // preset #iterations
|
||||
@ -962,6 +970,19 @@ void VM_Version::determine_features() {
|
||||
_nfeatures = 0;
|
||||
}
|
||||
|
||||
if (has_VectorFacility()) {
|
||||
// Verify that feature can actually be used. OS support required.
|
||||
call_getFeatures(buffer, -4, 0);
|
||||
if (printVerbose) {
|
||||
ttyLocker ttyl;
|
||||
if (has_VectorFacility()) {
|
||||
tty->print_cr(" Vector Facility has been verified to be supported by OS");
|
||||
} else {
|
||||
tty->print_cr(" Vector Facility has been disabled - not supported by OS");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract Crypto Facility details.
|
||||
if (has_Crypto()) {
|
||||
// Get cipher features.
|
||||
|
@ -473,6 +473,8 @@ class VM_Version: public Abstract_VM_Version {
|
||||
static void set_has_CryptoExt5() { _features[0] |= CryptoExtension5Mask; }
|
||||
static void set_has_VectorFacility() { _features[2] |= VectorFacilityMask; }
|
||||
|
||||
static void reset_has_VectorFacility() { _features[2] &= ~VectorFacilityMask; }
|
||||
|
||||
// Assembler testing.
|
||||
static void allow_all();
|
||||
static void revert();
|
||||
|
@ -122,6 +122,7 @@ class Assembler : public AbstractAssembler {
|
||||
fpop1_op3 = 0x34,
|
||||
fpop2_op3 = 0x35,
|
||||
impdep1_op3 = 0x36,
|
||||
addx_op3 = 0x36,
|
||||
aes3_op3 = 0x36,
|
||||
sha_op3 = 0x36,
|
||||
bmask_op3 = 0x36,
|
||||
@ -133,6 +134,8 @@ class Assembler : public AbstractAssembler {
|
||||
fzero_op3 = 0x36,
|
||||
fsrc_op3 = 0x36,
|
||||
fnot_op3 = 0x36,
|
||||
mpmul_op3 = 0x36,
|
||||
umulx_op3 = 0x36,
|
||||
xmulx_op3 = 0x36,
|
||||
crc32c_op3 = 0x36,
|
||||
impdep2_op3 = 0x37,
|
||||
@ -195,6 +198,9 @@ class Assembler : public AbstractAssembler {
|
||||
fnegs_opf = 0x05,
|
||||
fnegd_opf = 0x06,
|
||||
|
||||
addxc_opf = 0x11,
|
||||
addxccc_opf = 0x13,
|
||||
umulxhi_opf = 0x16,
|
||||
alignaddr_opf = 0x18,
|
||||
bmask_opf = 0x19,
|
||||
|
||||
@ -240,7 +246,8 @@ class Assembler : public AbstractAssembler {
|
||||
sha256_opf = 0x142,
|
||||
sha512_opf = 0x143,
|
||||
|
||||
crc32c_opf = 0x147
|
||||
crc32c_opf = 0x147,
|
||||
mpmul_opf = 0x148
|
||||
};
|
||||
|
||||
enum op5s {
|
||||
@ -380,7 +387,7 @@ class Assembler : public AbstractAssembler {
|
||||
assert_signed_range(x, nbits + 2);
|
||||
}
|
||||
|
||||
static void assert_unsigned_const(int x, int nbits) {
|
||||
static void assert_unsigned_range(int x, int nbits) {
|
||||
assert(juint(x) < juint(1 << nbits), "unsigned constant out of range");
|
||||
}
|
||||
|
||||
@ -534,6 +541,12 @@ class Assembler : public AbstractAssembler {
|
||||
return x & ((1 << nbits) - 1);
|
||||
}
|
||||
|
||||
// unsigned immediate, in low bits, at most nbits long.
|
||||
static int uimm(int x, int nbits) {
|
||||
assert_unsigned_range(x, nbits);
|
||||
return x & ((1 << nbits) - 1);
|
||||
}
|
||||
|
||||
// compute inverse of wdisp16
|
||||
static intptr_t inv_wdisp16(int x, intptr_t pos) {
|
||||
int lo = x & ((1 << 14) - 1);
|
||||
@ -631,6 +644,9 @@ class Assembler : public AbstractAssembler {
|
||||
// FMAf instructions supported only on certain processors
|
||||
static void fmaf_only() { assert(VM_Version::has_fmaf(), "This instruction only works on SPARC with FMAf"); }
|
||||
|
||||
// MPMUL instruction supported only on certain processors
|
||||
static void mpmul_only() { assert(VM_Version::has_mpmul(), "This instruction only works on SPARC with MPMUL"); }
|
||||
|
||||
// instruction only in VIS1
|
||||
static void vis1_only() { assert(VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }
|
||||
|
||||
@ -772,11 +788,12 @@ class Assembler : public AbstractAssembler {
|
||||
AbstractAssembler::flush();
|
||||
}
|
||||
|
||||
inline void emit_int32(int); // shadows AbstractAssembler::emit_int32
|
||||
inline void emit_data(int);
|
||||
inline void emit_data(int, RelocationHolder const &rspec);
|
||||
inline void emit_data(int, relocInfo::relocType rtype);
|
||||
// helper for above functions
|
||||
inline void emit_int32(int32_t); // shadows AbstractAssembler::emit_int32
|
||||
inline void emit_data(int32_t);
|
||||
inline void emit_data(int32_t, RelocationHolder const&);
|
||||
inline void emit_data(int32_t, relocInfo::relocType rtype);
|
||||
|
||||
// Helper for the above functions.
|
||||
inline void check_delay();
|
||||
|
||||
|
||||
@ -929,6 +946,10 @@ class Assembler : public AbstractAssembler {
|
||||
// fmaf instructions.
|
||||
|
||||
inline void fmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
|
||||
inline void fmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
|
||||
|
||||
inline void fnmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
|
||||
inline void fnmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
|
||||
|
||||
// pp 165
|
||||
|
||||
@ -960,6 +981,8 @@ class Assembler : public AbstractAssembler {
|
||||
inline void ldf(FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d,
|
||||
RelocationHolder const &rspec = RelocationHolder());
|
||||
|
||||
inline void ldd(Register s1, Register s2, FloatRegister d);
|
||||
inline void ldd(Register s1, int simm13a, FloatRegister d);
|
||||
|
||||
inline void ldfsr(Register s1, Register s2);
|
||||
inline void ldfsr(Register s1, int simm13a);
|
||||
@ -987,8 +1010,6 @@ class Assembler : public AbstractAssembler {
|
||||
inline void lduw(Register s1, int simm13a, Register d);
|
||||
inline void ldx(Register s1, Register s2, Register d);
|
||||
inline void ldx(Register s1, int simm13a, Register d);
|
||||
inline void ldd(Register s1, Register s2, Register d);
|
||||
inline void ldd(Register s1, int simm13a, Register d);
|
||||
|
||||
// pp 177
|
||||
|
||||
@ -1157,6 +1178,9 @@ class Assembler : public AbstractAssembler {
|
||||
inline void stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2);
|
||||
inline void stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, int simm13a);
|
||||
|
||||
inline void std(FloatRegister d, Register s1, Register s2);
|
||||
inline void std(FloatRegister d, Register s1, int simm13a);
|
||||
|
||||
inline void stfsr(Register s1, Register s2);
|
||||
inline void stfsr(Register s1, int simm13a);
|
||||
inline void stxfsr(Register s1, Register s2);
|
||||
@ -1177,8 +1201,6 @@ class Assembler : public AbstractAssembler {
|
||||
inline void stw(Register d, Register s1, int simm13a);
|
||||
inline void stx(Register d, Register s1, Register s2);
|
||||
inline void stx(Register d, Register s1, int simm13a);
|
||||
inline void std(Register d, Register s1, Register s2);
|
||||
inline void std(Register d, Register s1, int simm13a);
|
||||
|
||||
// pp 177
|
||||
|
||||
@ -1267,6 +1289,9 @@ class Assembler : public AbstractAssembler {
|
||||
|
||||
// VIS3 instructions
|
||||
|
||||
inline void addxc(Register s1, Register s2, Register d);
|
||||
inline void addxccc(Register s1, Register s2, Register d);
|
||||
|
||||
inline void movstosw(FloatRegister s, Register d);
|
||||
inline void movstouw(FloatRegister s, Register d);
|
||||
inline void movdtox(FloatRegister s, Register d);
|
||||
@ -1276,6 +1301,7 @@ class Assembler : public AbstractAssembler {
|
||||
|
||||
inline void xmulx(Register s1, Register s2, Register d);
|
||||
inline void xmulxhi(Register s1, Register s2, Register d);
|
||||
inline void umulxhi(Register s1, Register s2, Register d);
|
||||
|
||||
// Crypto SHA instructions
|
||||
|
||||
@ -1287,6 +1313,10 @@ class Assembler : public AbstractAssembler {
|
||||
|
||||
inline void crc32c(FloatRegister s1, FloatRegister s2, FloatRegister d);
|
||||
|
||||
// MPMUL instruction
|
||||
|
||||
inline void mpmul(int uimm5);
|
||||
|
||||
// Creation
|
||||
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
|
||||
#ifdef VALIDATE_PIPELINE
|
||||
|
@ -59,7 +59,7 @@ inline void Assembler::check_delay() {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void Assembler::emit_int32(int x) {
|
||||
inline void Assembler::emit_int32(int32_t x) {
|
||||
check_delay();
|
||||
#ifdef VALIDATE_PIPELINE
|
||||
_hazard_state = NoHazard;
|
||||
@ -67,16 +67,16 @@ inline void Assembler::emit_int32(int x) {
|
||||
AbstractAssembler::emit_int32(x);
|
||||
}
|
||||
|
||||
inline void Assembler::emit_data(int x) {
|
||||
inline void Assembler::emit_data(int32_t x) {
|
||||
emit_int32(x);
|
||||
}
|
||||
|
||||
inline void Assembler::emit_data(int x, relocInfo::relocType rtype) {
|
||||
inline void Assembler::emit_data(int32_t x, relocInfo::relocType rtype) {
|
||||
relocate(rtype);
|
||||
emit_int32(x);
|
||||
}
|
||||
|
||||
inline void Assembler::emit_data(int x, RelocationHolder const &rspec) {
|
||||
inline void Assembler::emit_data(int32_t x, RelocationHolder const &rspec) {
|
||||
relocate(rspec);
|
||||
emit_int32(x);
|
||||
}
|
||||
@ -359,6 +359,19 @@ inline void Assembler::fmadd(FloatRegisterImpl::Width w, FloatRegister s1, Float
|
||||
fmaf_only();
|
||||
emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(w) | fs2(s2, w));
|
||||
}
|
||||
inline void Assembler::fmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
|
||||
fmaf_only();
|
||||
emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0x4 + w) | fs2(s2, w));
|
||||
}
|
||||
|
||||
inline void Assembler::fnmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
|
||||
fmaf_only();
|
||||
emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0xc + w) | fs2(s2, w));
|
||||
}
|
||||
inline void Assembler::fnmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
|
||||
fmaf_only();
|
||||
emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0x8 + w) | fs2(s2, w));
|
||||
}
|
||||
|
||||
inline void Assembler::flush(Register s1, Register s2) {
|
||||
emit_int32(op(arith_op) | op3(flush_op3) | rs1(s1) | rs2(s2));
|
||||
@ -402,6 +415,15 @@ inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, int simm13a,
|
||||
emit_data(op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec);
|
||||
}
|
||||
|
||||
inline void Assembler::ldd(Register s1, Register s2, FloatRegister d) {
|
||||
assert(d->is_even(), "not even");
|
||||
ldf(FloatRegisterImpl::D, s1, s2, d);
|
||||
}
|
||||
inline void Assembler::ldd(Register s1, int simm13a, FloatRegister d) {
|
||||
assert(d->is_even(), "not even");
|
||||
ldf(FloatRegisterImpl::D, s1, simm13a, d);
|
||||
}
|
||||
|
||||
inline void Assembler::ldxfsr(Register s1, Register s2) {
|
||||
emit_int32(op(ldst_op) | rd(G1) | op3(ldfsr_op3) | rs1(s1) | rs2(s2));
|
||||
}
|
||||
@ -460,16 +482,6 @@ inline void Assembler::ldx(Register s1, Register s2, Register d) {
|
||||
inline void Assembler::ldx(Register s1, int simm13a, Register d) {
|
||||
emit_data(op(ldst_op) | rd(d) | op3(ldx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
|
||||
}
|
||||
inline void Assembler::ldd(Register s1, Register s2, Register d) {
|
||||
v9_dep();
|
||||
assert(d->is_even(), "not even");
|
||||
emit_int32(op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | rs2(s2));
|
||||
}
|
||||
inline void Assembler::ldd(Register s1, int simm13a, Register d) {
|
||||
v9_dep();
|
||||
assert(d->is_even(), "not even");
|
||||
emit_data(op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
|
||||
}
|
||||
|
||||
inline void Assembler::ldsba(Register s1, Register s2, int ia, Register d) {
|
||||
emit_int32(op(ldst_op) | rd(d) | op3(ldsb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2));
|
||||
@ -806,6 +818,15 @@ inline void Assembler::stf(FloatRegisterImpl::Width w, FloatRegister d, Register
|
||||
emit_data(op(ldst_op) | fd(d, w) | alt_op3(stf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13));
|
||||
}
|
||||
|
||||
inline void Assembler::std(FloatRegister d, Register s1, Register s2) {
|
||||
assert(d->is_even(), "not even");
|
||||
stf(FloatRegisterImpl::D, d, s1, s2);
|
||||
}
|
||||
inline void Assembler::std(FloatRegister d, Register s1, int simm13a) {
|
||||
assert(d->is_even(), "not even");
|
||||
stf(FloatRegisterImpl::D, d, s1, simm13a);
|
||||
}
|
||||
|
||||
inline void Assembler::stxfsr(Register s1, Register s2) {
|
||||
emit_int32(op(ldst_op) | rd(G1) | op3(stfsr_op3) | rs1(s1) | rs2(s2));
|
||||
}
|
||||
@ -848,16 +869,6 @@ inline void Assembler::stx(Register d, Register s1, Register s2) {
|
||||
inline void Assembler::stx(Register d, Register s1, int simm13a) {
|
||||
emit_data(op(ldst_op) | rd(d) | op3(stx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
|
||||
}
|
||||
inline void Assembler::std(Register d, Register s1, Register s2) {
|
||||
v9_dep();
|
||||
assert(d->is_even(), "not even");
|
||||
emit_int32(op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | rs2(s2));
|
||||
}
|
||||
inline void Assembler::std(Register d, Register s1, int simm13a) {
|
||||
v9_dep();
|
||||
assert(d->is_even(), "not even");
|
||||
emit_data(op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
|
||||
}
|
||||
|
||||
inline void Assembler::stba(Register d, Register s1, Register s2, int ia) {
|
||||
emit_int32(op(ldst_op) | rd(d) | op3(stb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2));
|
||||
@ -1043,6 +1054,15 @@ inline void Assembler::bshuffle(FloatRegister s1, FloatRegister s2, FloatRegiste
|
||||
|
||||
// VIS3 instructions
|
||||
|
||||
inline void Assembler::addxc(Register s1, Register s2, Register d) {
|
||||
vis3_only();
|
||||
emit_int32(op(arith_op) | rd(d) | op3(addx_op3) | rs1(s1) | opf(addxc_opf) | rs2(s2));
|
||||
}
|
||||
inline void Assembler::addxccc(Register s1, Register s2, Register d) {
|
||||
vis3_only();
|
||||
emit_int32(op(arith_op) | rd(d) | op3(addx_op3) | rs1(s1) | opf(addxccc_opf) | rs2(s2));
|
||||
}
|
||||
|
||||
inline void Assembler::movstosw(FloatRegister s, Register d) {
|
||||
vis3_only();
|
||||
emit_int32(op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mstosw_opf) | fs2(s, FloatRegisterImpl::S));
|
||||
@ -1073,6 +1093,10 @@ inline void Assembler::xmulxhi(Register s1, Register s2, Register d) {
|
||||
vis3_only();
|
||||
emit_int32(op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulxhi_opf) | rs2(s2));
|
||||
}
|
||||
inline void Assembler::umulxhi(Register s1, Register s2, Register d) {
|
||||
vis3_only();
|
||||
emit_int32(op(arith_op) | rd(d) | op3(umulx_op3) | rs1(s1) | opf(umulxhi_opf) | rs2(s2));
|
||||
}
|
||||
|
||||
// Crypto SHA instructions
|
||||
|
||||
@ -1096,4 +1120,11 @@ inline void Assembler::crc32c(FloatRegister s1, FloatRegister s2, FloatRegister
|
||||
emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(crc32c_op3) | fs1(s1, FloatRegisterImpl::D) | opf(crc32c_opf) | fs2(s2, FloatRegisterImpl::D));
|
||||
}
|
||||
|
||||
// MPMUL instruction
|
||||
|
||||
inline void Assembler::mpmul(int uimm5) {
|
||||
mpmul_only();
|
||||
emit_int32(op(arith_op) | rd(0) | op3(mpmul_op3) | rs1(0) | opf(mpmul_opf) | uimm(uimm5, 5));
|
||||
}
|
||||
|
||||
#endif // CPU_SPARC_VM_ASSEMBLER_SPARC_INLINE_HPP
|
||||
|
@ -119,8 +119,8 @@ address RegisterMap::pd_location(VMReg regname) const {
|
||||
reg = regname->as_Register();
|
||||
}
|
||||
if (reg->is_out()) {
|
||||
assert(_younger_window != NULL, "Younger window should be available");
|
||||
return second_word + (address)&_younger_window[reg->after_save()->sp_offset_in_saved_window()];
|
||||
return _younger_window == NULL ? NULL :
|
||||
second_word + (address)&_younger_window[reg->after_save()->sp_offset_in_saved_window()];
|
||||
}
|
||||
if (reg->is_local() || reg->is_in()) {
|
||||
assert(_window != NULL, "Window should be available");
|
||||
|
@ -97,12 +97,15 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
|
||||
writeable) \
|
||||
\
|
||||
product(intx, UseVIS, 99, \
|
||||
"Highest supported VIS instructions set on Sparc") \
|
||||
"Highest supported VIS instructions set on SPARC") \
|
||||
range(0, 99) \
|
||||
\
|
||||
product(bool, UseCBCond, false, \
|
||||
"Use compare and branch instruction on SPARC") \
|
||||
\
|
||||
product(bool, UseMPMUL, false, \
|
||||
"Use multi-precision multiply instruction (mpmul) on SPARC") \
|
||||
\
|
||||
product(bool, UseBlockZeroing, false, \
|
||||
"Use special cpu instructions for block zeroing") \
|
||||
\
|
||||
|
@ -1574,29 +1574,39 @@ void MacroAssembler::br_null_short(Register s1, Predict p, Label& L) {
|
||||
assert_not_delayed();
|
||||
if (use_cbcond(L)) {
|
||||
Assembler::cbcond(zero, ptr_cc, s1, 0, L);
|
||||
return;
|
||||
} else {
|
||||
br_null(s1, false, p, L);
|
||||
delayed()->nop();
|
||||
}
|
||||
br_null(s1, false, p, L);
|
||||
delayed()->nop();
|
||||
}
|
||||
|
||||
void MacroAssembler::br_notnull_short(Register s1, Predict p, Label& L) {
|
||||
assert_not_delayed();
|
||||
if (use_cbcond(L)) {
|
||||
Assembler::cbcond(notZero, ptr_cc, s1, 0, L);
|
||||
return;
|
||||
} else {
|
||||
br_notnull(s1, false, p, L);
|
||||
delayed()->nop();
|
||||
}
|
||||
br_notnull(s1, false, p, L);
|
||||
delayed()->nop();
|
||||
}
|
||||
|
||||
// Unconditional short branch
|
||||
void MacroAssembler::ba_short(Label& L) {
|
||||
assert_not_delayed();
|
||||
if (use_cbcond(L)) {
|
||||
Assembler::cbcond(equal, icc, G0, G0, L);
|
||||
return;
|
||||
} else {
|
||||
br(always, false, pt, L);
|
||||
delayed()->nop();
|
||||
}
|
||||
br(always, false, pt, L);
|
||||
}
|
||||
|
||||
// Branch if 'icc' says zero or not (i.e. icc.z == 1|0).
|
||||
|
||||
void MacroAssembler::br_icc_zero(bool iszero, Predict p, Label &L) {
|
||||
assert_not_delayed();
|
||||
Condition cf = (iszero ? Assembler::zero : Assembler::notZero);
|
||||
br(cf, false, p, L);
|
||||
delayed()->nop();
|
||||
}
|
||||
|
||||
@ -3834,6 +3844,7 @@ void MacroAssembler::load_mirror(Register mirror, Register method) {
|
||||
ld_ptr(mirror, in_bytes(ConstMethod::constants_offset()), mirror);
|
||||
ld_ptr(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
|
||||
ld_ptr(mirror, mirror_offset, mirror);
|
||||
resolve_oop_handle(mirror);
|
||||
}
|
||||
|
||||
void MacroAssembler::load_klass(Register src_oop, Register klass) {
|
||||
|
@ -606,7 +606,7 @@ class MacroAssembler : public Assembler {
|
||||
// offset. No explicit code generation is needed if the offset is within a certain
|
||||
// range (0 <= offset <= page_size).
|
||||
//
|
||||
// %%%%%% Currently not done for SPARC
|
||||
// FIXME: Currently not done for SPARC
|
||||
|
||||
void null_check(Register reg, int offset = -1);
|
||||
static bool needs_explicit_null_check(intptr_t offset);
|
||||
@ -648,6 +648,9 @@ class MacroAssembler : public Assembler {
|
||||
// unconditional short branch
|
||||
void ba_short(Label& L);
|
||||
|
||||
// Branch on icc.z (true or not).
|
||||
void br_icc_zero(bool iszero, Predict p, Label &L);
|
||||
|
||||
inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
|
||||
inline void bp( Condition c, bool a, CC cc, Predict p, Label& L );
|
||||
|
||||
@ -663,19 +666,19 @@ class MacroAssembler : public Assembler {
|
||||
inline void fbp( Condition c, bool a, CC cc, Predict p, Label& L );
|
||||
|
||||
// Sparc shorthands(pp 85, V8 manual, pp 289 V9 manual)
|
||||
inline void cmp( Register s1, Register s2 );
|
||||
inline void cmp( Register s1, int simm13a );
|
||||
inline void cmp( Register s1, Register s2 );
|
||||
inline void cmp( Register s1, int simm13a );
|
||||
|
||||
inline void jmp( Register s1, Register s2 );
|
||||
inline void jmp( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() );
|
||||
|
||||
// Check if the call target is out of wdisp30 range (relative to the code cache)
|
||||
static inline bool is_far_target(address d);
|
||||
inline void call( address d, relocInfo::relocType rt = relocInfo::runtime_call_type );
|
||||
inline void call( address d, RelocationHolder const& rspec);
|
||||
inline void call( address d, relocInfo::relocType rt = relocInfo::runtime_call_type );
|
||||
inline void call( address d, RelocationHolder const& rspec);
|
||||
|
||||
inline void call( Label& L, relocInfo::relocType rt = relocInfo::runtime_call_type );
|
||||
inline void call( Label& L, RelocationHolder const& rspec);
|
||||
inline void call( Label& L, relocInfo::relocType rt = relocInfo::runtime_call_type );
|
||||
inline void call( Label& L, RelocationHolder const& rspec);
|
||||
|
||||
inline void callr( Register s1, Register s2 );
|
||||
inline void callr( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() );
|
||||
|
@ -185,7 +185,7 @@ inline void MacroAssembler::br( Condition c, bool a, Predict p, address d, reloc
|
||||
}
|
||||
|
||||
inline void MacroAssembler::br( Condition c, bool a, Predict p, Label& L ) {
|
||||
// See note[+] on 'avoid_pipeline_stalls()', in "assembler_sparc.inline.hpp".
|
||||
// See note[+] on 'avoid_pipeline_stall()', in "assembler_sparc.inline.hpp".
|
||||
avoid_pipeline_stall();
|
||||
br(c, a, p, target(L));
|
||||
}
|
||||
|
@ -236,7 +236,7 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
|
||||
inline VMReg as_VMReg( );
|
||||
|
||||
// accessors
|
||||
int encoding() const { assert(is_valid(), "invalid register"); return value(); }
|
||||
int encoding() const { assert(is_valid(), "invalid register"); return value(); }
|
||||
|
||||
public:
|
||||
int encoding(Width w) const {
|
||||
@ -258,10 +258,12 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
|
||||
return -1;
|
||||
}
|
||||
|
||||
bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
|
||||
bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
|
||||
bool is_even() const { return (encoding() & 1) == 0; }
|
||||
|
||||
const char* name() const;
|
||||
|
||||
FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
|
||||
FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
|
||||
};
|
||||
|
||||
|
||||
|
@ -2628,7 +2628,6 @@ enc_class fsqrtd (dflt_reg dst, dflt_reg src) %{
|
||||
%}
|
||||
|
||||
|
||||
|
||||
enc_class fmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
|
||||
@ -2651,7 +2650,71 @@ enc_class fmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
|
||||
__ fmadd(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
|
||||
%}
|
||||
|
||||
enc_class fmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
|
||||
FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
|
||||
FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
|
||||
FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
|
||||
FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
|
||||
|
||||
__ fmsub(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
|
||||
%}
|
||||
|
||||
enc_class fmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
|
||||
FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
|
||||
FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
|
||||
FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
|
||||
FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
|
||||
|
||||
__ fmsub(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
|
||||
%}
|
||||
|
||||
enc_class fnmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
|
||||
FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
|
||||
FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
|
||||
FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
|
||||
FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
|
||||
|
||||
__ fnmadd(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
|
||||
%}
|
||||
|
||||
enc_class fnmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
|
||||
FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
|
||||
FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
|
||||
FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
|
||||
FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
|
||||
|
||||
__ fnmadd(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
|
||||
%}
|
||||
|
||||
enc_class fnmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
|
||||
FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
|
||||
FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
|
||||
FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
|
||||
FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
|
||||
|
||||
__ fnmsub(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
|
||||
%}
|
||||
|
||||
enc_class fnmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
|
||||
FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
|
||||
FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
|
||||
FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
|
||||
FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
|
||||
|
||||
__ fnmsub(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
|
||||
%}
|
||||
|
||||
|
||||
enc_class fmovs (dflt_reg dst, dflt_reg src) %{
|
||||
@ -7597,7 +7660,7 @@ instruct sqrtD_reg_reg(regD dst, regD src) %{
|
||||
ins_pipe(fdivD_reg_reg);
|
||||
%}
|
||||
|
||||
// Single precision fused floating-point multiply-add (d = a * b + c).
|
||||
// Single/Double precision fused floating-point multiply-add (d = a * b + c).
|
||||
instruct fmaF_regx4(regF dst, regF a, regF b, regF c) %{
|
||||
predicate(UseFMA);
|
||||
match(Set dst (FmaF c (Binary a b)));
|
||||
@ -7606,7 +7669,6 @@ instruct fmaF_regx4(regF dst, regF a, regF b, regF c) %{
|
||||
ins_pipe(fmaF_regx4);
|
||||
%}
|
||||
|
||||
// Double precision fused floating-point multiply-add (d = a * b + c).
|
||||
instruct fmaD_regx4(regD dst, regD a, regD b, regD c) %{
|
||||
predicate(UseFMA);
|
||||
match(Set dst (FmaD c (Binary a b)));
|
||||
@ -7615,6 +7677,66 @@ instruct fmaD_regx4(regD dst, regD a, regD b, regD c) %{
|
||||
ins_pipe(fmaD_regx4);
|
||||
%}
|
||||
|
||||
// Additional patterns matching complement versions that we can map directly to
|
||||
// variants of the fused multiply-add instructions.
|
||||
|
||||
// Single/Double precision fused floating-point multiply-sub (d = a * b - c)
|
||||
instruct fmsubF_regx4(regF dst, regF a, regF b, regF c) %{
|
||||
predicate(UseFMA);
|
||||
match(Set dst (FmaF (NegF c) (Binary a b)));
|
||||
format %{ "fmsubs $a,$b,$c,$dst\t# $dst = $a * $b - $c" %}
|
||||
ins_encode(fmsubs(dst, a, b, c));
|
||||
ins_pipe(fmaF_regx4);
|
||||
%}
|
||||
|
||||
instruct fmsubD_regx4(regD dst, regD a, regD b, regD c) %{
|
||||
predicate(UseFMA);
|
||||
match(Set dst (FmaD (NegD c) (Binary a b)));
|
||||
format %{ "fmsubd $a,$b,$c,$dst\t# $dst = $a * $b - $c" %}
|
||||
ins_encode(fmsubd(dst, a, b, c));
|
||||
ins_pipe(fmaD_regx4);
|
||||
%}
|
||||
|
||||
// Single/Double precision fused floating-point neg. multiply-add,
|
||||
// d = -1 * a * b - c = -(a * b + c)
|
||||
instruct fnmaddF_regx4(regF dst, regF a, regF b, regF c) %{
|
||||
predicate(UseFMA);
|
||||
match(Set dst (FmaF (NegF c) (Binary (NegF a) b)));
|
||||
match(Set dst (FmaF (NegF c) (Binary a (NegF b))));
|
||||
format %{ "fnmadds $a,$b,$c,$dst\t# $dst = -($a * $b + $c)" %}
|
||||
ins_encode(fnmadds(dst, a, b, c));
|
||||
ins_pipe(fmaF_regx4);
|
||||
%}
|
||||
|
||||
instruct fnmaddD_regx4(regD dst, regD a, regD b, regD c) %{
|
||||
predicate(UseFMA);
|
||||
match(Set dst (FmaD (NegD c) (Binary (NegD a) b)));
|
||||
match(Set dst (FmaD (NegD c) (Binary a (NegD b))));
|
||||
format %{ "fnmaddd $a,$b,$c,$dst\t# $dst = -($a * $b + $c)" %}
|
||||
ins_encode(fnmaddd(dst, a, b, c));
|
||||
ins_pipe(fmaD_regx4);
|
||||
%}
|
||||
|
||||
// Single/Double precision fused floating-point neg. multiply-sub,
|
||||
// d = -1 * a * b + c = -(a * b - c)
|
||||
instruct fnmsubF_regx4(regF dst, regF a, regF b, regF c) %{
|
||||
predicate(UseFMA);
|
||||
match(Set dst (FmaF c (Binary (NegF a) b)));
|
||||
match(Set dst (FmaF c (Binary a (NegF b))));
|
||||
format %{ "fnmsubs $a,$b,$c,$dst\t# $dst = -($a * $b - $c)" %}
|
||||
ins_encode(fnmsubs(dst, a, b, c));
|
||||
ins_pipe(fmaF_regx4);
|
||||
%}
|
||||
|
||||
instruct fnmsubD_regx4(regD dst, regD a, regD b, regD c) %{
|
||||
predicate(UseFMA);
|
||||
match(Set dst (FmaD c (Binary (NegD a) b)));
|
||||
match(Set dst (FmaD c (Binary a (NegD b))));
|
||||
format %{ "fnmsubd $a,$b,$c,$dst\t# $dst = -($a * $b - $c)" %}
|
||||
ins_encode(fnmsubd(dst, a, b, c));
|
||||
ins_pipe(fmaD_regx4);
|
||||
%}
|
||||
|
||||
//----------Logical Instructions-----------------------------------------------
|
||||
// And Instructions
|
||||
// Register And
|
||||
|
@ -58,7 +58,6 @@
|
||||
// Note: The register L7 is used as L7_thread_cache, and may not be used
|
||||
// any other way within this module.
|
||||
|
||||
|
||||
static const Register& Lstub_temp = L2;
|
||||
|
||||
// -------------------------------------------------------------------------------------------------------------------------
|
||||
@ -4943,7 +4942,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
/**
|
||||
/**
|
||||
* Arguments:
|
||||
*
|
||||
* Inputs:
|
||||
@ -4975,6 +4974,773 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
/**
|
||||
* Arguments:
|
||||
*
|
||||
* Inputs:
|
||||
* I0 - int* x-addr
|
||||
* I1 - int x-len
|
||||
* I2 - int* y-addr
|
||||
* I3 - int y-len
|
||||
* I4 - int* z-addr (output vector)
|
||||
* I5 - int z-len
|
||||
*/
|
||||
address generate_multiplyToLen() {
|
||||
assert(UseMultiplyToLenIntrinsic, "need VIS3 instructions");
|
||||
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
|
||||
address start = __ pc();
|
||||
|
||||
__ save_frame(0);
|
||||
|
||||
const Register xptr = I0; // input address
|
||||
const Register xlen = I1; // ...and length in 32b-words
|
||||
const Register yptr = I2; //
|
||||
const Register ylen = I3; //
|
||||
const Register zptr = I4; // output address
|
||||
const Register zlen = I5; // ...and length in 32b-words
|
||||
|
||||
/* The minimal "limb" representation suggest that odd length vectors are as
|
||||
* likely as even length dittos. This in turn suggests that we need to cope
|
||||
* with odd/even length arrays and data not aligned properly for 64-bit read
|
||||
* and write operations. We thus use a number of different kernels:
|
||||
*
|
||||
* if (is_even(x.len) && is_even(y.len))
|
||||
* if (is_align64(x) && is_align64(y) && is_align64(z))
|
||||
* if (x.len == y.len && 16 <= x.len && x.len <= 64)
|
||||
* memv_mult_mpmul(...)
|
||||
* else
|
||||
* memv_mult_64x64(...)
|
||||
* else
|
||||
* memv_mult_64x64u(...)
|
||||
* else
|
||||
* memv_mult_32x32(...)
|
||||
*
|
||||
* Here we assume VIS3 support (for 'umulxhi', 'addxc' and 'addxccc').
|
||||
* In case CBCOND instructions are supported, we will use 'cxbX'. If the
|
||||
* MPMUL instruction is supported, we will generate a kernel using 'mpmul'
|
||||
* (for vectors with proper characteristics).
|
||||
*/
|
||||
const Register tmp0 = L0;
|
||||
const Register tmp1 = L1;
|
||||
|
||||
Label L_mult_32x32;
|
||||
Label L_mult_64x64u;
|
||||
Label L_mult_64x64;
|
||||
Label L_exit;
|
||||
|
||||
if_both_even(xlen, ylen, tmp0, false, L_mult_32x32);
|
||||
if_all3_aligned(xptr, yptr, zptr, tmp1, 64, false, L_mult_64x64u);
|
||||
|
||||
if (UseMPMUL) {
|
||||
if_eq(xlen, ylen, false, L_mult_64x64);
|
||||
if_in_rng(xlen, 16, 64, tmp0, tmp1, false, L_mult_64x64);
|
||||
|
||||
// 1. Multiply naturally aligned 64b-datums using a generic 'mpmul' kernel,
|
||||
// operating on equal length vectors of size [16..64].
|
||||
gen_mult_mpmul(xlen, xptr, yptr, zptr, L_exit);
|
||||
}
|
||||
|
||||
// 2. Multiply naturally aligned 64-bit datums (64x64).
|
||||
__ bind(L_mult_64x64);
|
||||
gen_mult_64x64(xptr, xlen, yptr, ylen, zptr, zlen, L_exit);
|
||||
|
||||
// 3. Multiply unaligned 64-bit datums (64x64).
|
||||
__ bind(L_mult_64x64u);
|
||||
gen_mult_64x64_unaligned(xptr, xlen, yptr, ylen, zptr, zlen, L_exit);
|
||||
|
||||
// 4. Multiply naturally aligned 32-bit datums (32x32).
|
||||
__ bind(L_mult_32x32);
|
||||
gen_mult_32x32(xptr, xlen, yptr, ylen, zptr, zlen, L_exit);
|
||||
|
||||
__ bind(L_exit);
|
||||
__ ret();
|
||||
__ delayed()->restore();
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
// Additional help functions used by multiplyToLen generation.
|
||||
|
||||
void if_both_even(Register r1, Register r2, Register tmp, bool iseven, Label &L)
|
||||
{
|
||||
__ or3(r1, r2, tmp);
|
||||
__ andcc(tmp, 0x1, tmp);
|
||||
__ br_icc_zero(iseven, Assembler::pn, L);
|
||||
}
|
||||
|
||||
void if_all3_aligned(Register r1, Register r2, Register r3,
|
||||
Register tmp, uint align, bool isalign, Label &L)
|
||||
{
|
||||
__ or3(r1, r2, tmp);
|
||||
__ or3(r3, tmp, tmp);
|
||||
__ andcc(tmp, (align - 1), tmp);
|
||||
__ br_icc_zero(isalign, Assembler::pn, L);
|
||||
}
|
||||
|
||||
void if_eq(Register x, Register y, bool iseq, Label &L)
|
||||
{
|
||||
Assembler::Condition cf = (iseq ? Assembler::equal : Assembler::notEqual);
|
||||
__ cmp_and_br_short(x, y, cf, Assembler::pt, L);
|
||||
}
|
||||
|
||||
void if_in_rng(Register x, int lb, int ub, Register t1, Register t2, bool inrng, Label &L)
|
||||
{
|
||||
assert(Assembler::is_simm13(lb), "Small ints only!");
|
||||
assert(Assembler::is_simm13(ub), "Small ints only!");
|
||||
// Compute (x - lb) * (ub - x) >= 0
|
||||
// NOTE: With the local use of this routine, we rely on small integers to
|
||||
// guarantee that we do not overflow in the multiplication.
|
||||
__ add(G0, ub, t2);
|
||||
__ sub(x, lb, t1);
|
||||
__ sub(t2, x, t2);
|
||||
__ mulx(t1, t2, t1);
|
||||
Assembler::Condition cf = (inrng ? Assembler::greaterEqual : Assembler::less);
|
||||
__ cmp_and_br_short(t1, G0, cf, Assembler::pt, L);
|
||||
}
|
||||
|
||||
void ldd_entry(Register base, Register offs, FloatRegister dest)
|
||||
{
|
||||
__ ldd(base, offs, dest);
|
||||
__ inc(offs, 8);
|
||||
}
|
||||
|
||||
void ldx_entry(Register base, Register offs, Register dest)
|
||||
{
|
||||
__ ldx(base, offs, dest);
|
||||
__ inc(offs, 8);
|
||||
}
|
||||
|
||||
void mpmul_entry(int m, Label &next)
|
||||
{
|
||||
__ mpmul(m);
|
||||
__ cbcond(Assembler::equal, Assembler::icc, G0, G0, next);
|
||||
}
|
||||
|
||||
void stx_entry(Label &L, Register r1, Register r2, Register base, Register offs)
|
||||
{
|
||||
__ bind(L);
|
||||
__ stx(r1, base, offs);
|
||||
__ inc(offs, 8);
|
||||
__ stx(r2, base, offs);
|
||||
__ inc(offs, 8);
|
||||
}
|
||||
|
||||
void offs_entry(Label &Lbl0, Label &Lbl1)
|
||||
{
|
||||
assert(Lbl0.is_bound(), "must be");
|
||||
assert(Lbl1.is_bound(), "must be");
|
||||
|
||||
int offset = Lbl0.loc_pos() - Lbl1.loc_pos();
|
||||
|
||||
__ emit_data(offset);
|
||||
}
|
||||
|
||||
/* Generate the actual multiplication kernels for BigInteger vectors:
|
||||
*
|
||||
* 1. gen_mult_mpmul(...)
|
||||
*
|
||||
* 2. gen_mult_64x64(...)
|
||||
*
|
||||
* 3. gen_mult_64x64_unaligned(...)
|
||||
*
|
||||
* 4. gen_mult_32x32(...)
|
||||
*/
|
||||
void gen_mult_mpmul(Register len, Register xptr, Register yptr, Register zptr,
|
||||
Label &L_exit)
|
||||
{
|
||||
const Register zero = G0;
|
||||
const Register gxp = G1; // Need to use global registers across RWs.
|
||||
const Register gyp = G2;
|
||||
const Register gzp = G3;
|
||||
const Register offs = G4;
|
||||
const Register disp = G5;
|
||||
|
||||
__ mov(xptr, gxp);
|
||||
__ mov(yptr, gyp);
|
||||
__ mov(zptr, gzp);
|
||||
|
||||
/* Compute jump vector entry:
|
||||
*
|
||||
* 1. mpmul input size (0..31) x 64b
|
||||
* 2. vector input size in 32b limbs (even number)
|
||||
* 3. branch entries in reverse order (31..0), using two
|
||||
* instructions per entry (2 * 4 bytes).
|
||||
*
|
||||
* displacement = byte_offset(bra_offset(len))
|
||||
* = byte_offset((64 - len)/2)
|
||||
* = 8 * (64 - len)/2
|
||||
* = 4 * (64 - len)
|
||||
*/
|
||||
Register temp = I5; // Alright to use input regs. in first batch.
|
||||
|
||||
__ sub(zero, len, temp);
|
||||
__ add(temp, 64, temp);
|
||||
__ sllx(temp, 2, disp); // disp := (64 - len) << 2
|
||||
|
||||
// Dispatch relative current PC, into instruction table below.
|
||||
__ rdpc(temp);
|
||||
__ add(temp, 16, temp);
|
||||
__ jmp(temp, disp);
|
||||
__ delayed()->clr(offs);
|
||||
|
||||
ldd_entry(gxp, offs, F22);
|
||||
ldd_entry(gxp, offs, F20);
|
||||
ldd_entry(gxp, offs, F18);
|
||||
ldd_entry(gxp, offs, F16);
|
||||
ldd_entry(gxp, offs, F14);
|
||||
ldd_entry(gxp, offs, F12);
|
||||
ldd_entry(gxp, offs, F10);
|
||||
ldd_entry(gxp, offs, F8);
|
||||
ldd_entry(gxp, offs, F6);
|
||||
ldd_entry(gxp, offs, F4);
|
||||
ldx_entry(gxp, offs, I5);
|
||||
ldx_entry(gxp, offs, I4);
|
||||
ldx_entry(gxp, offs, I3);
|
||||
ldx_entry(gxp, offs, I2);
|
||||
ldx_entry(gxp, offs, I1);
|
||||
ldx_entry(gxp, offs, I0);
|
||||
ldx_entry(gxp, offs, L7);
|
||||
ldx_entry(gxp, offs, L6);
|
||||
ldx_entry(gxp, offs, L5);
|
||||
ldx_entry(gxp, offs, L4);
|
||||
ldx_entry(gxp, offs, L3);
|
||||
ldx_entry(gxp, offs, L2);
|
||||
ldx_entry(gxp, offs, L1);
|
||||
ldx_entry(gxp, offs, L0);
|
||||
ldd_entry(gxp, offs, F2);
|
||||
ldd_entry(gxp, offs, F0);
|
||||
ldx_entry(gxp, offs, O5);
|
||||
ldx_entry(gxp, offs, O4);
|
||||
ldx_entry(gxp, offs, O3);
|
||||
ldx_entry(gxp, offs, O2);
|
||||
ldx_entry(gxp, offs, O1);
|
||||
ldx_entry(gxp, offs, O0);
|
||||
|
||||
__ save(SP, -176, SP);
|
||||
|
||||
const Register addr = gxp; // Alright to reuse 'gxp'.
|
||||
|
||||
// Dispatch relative current PC, into instruction table below.
|
||||
__ rdpc(addr);
|
||||
__ add(addr, 16, addr);
|
||||
__ jmp(addr, disp);
|
||||
__ delayed()->clr(offs);
|
||||
|
||||
ldd_entry(gyp, offs, F58);
|
||||
ldd_entry(gyp, offs, F56);
|
||||
ldd_entry(gyp, offs, F54);
|
||||
ldd_entry(gyp, offs, F52);
|
||||
ldd_entry(gyp, offs, F50);
|
||||
ldd_entry(gyp, offs, F48);
|
||||
ldd_entry(gyp, offs, F46);
|
||||
ldd_entry(gyp, offs, F44);
|
||||
ldd_entry(gyp, offs, F42);
|
||||
ldd_entry(gyp, offs, F40);
|
||||
ldd_entry(gyp, offs, F38);
|
||||
ldd_entry(gyp, offs, F36);
|
||||
ldd_entry(gyp, offs, F34);
|
||||
ldd_entry(gyp, offs, F32);
|
||||
ldd_entry(gyp, offs, F30);
|
||||
ldd_entry(gyp, offs, F28);
|
||||
ldd_entry(gyp, offs, F26);
|
||||
ldd_entry(gyp, offs, F24);
|
||||
ldx_entry(gyp, offs, O5);
|
||||
ldx_entry(gyp, offs, O4);
|
||||
ldx_entry(gyp, offs, O3);
|
||||
ldx_entry(gyp, offs, O2);
|
||||
ldx_entry(gyp, offs, O1);
|
||||
ldx_entry(gyp, offs, O0);
|
||||
ldx_entry(gyp, offs, L7);
|
||||
ldx_entry(gyp, offs, L6);
|
||||
ldx_entry(gyp, offs, L5);
|
||||
ldx_entry(gyp, offs, L4);
|
||||
ldx_entry(gyp, offs, L3);
|
||||
ldx_entry(gyp, offs, L2);
|
||||
ldx_entry(gyp, offs, L1);
|
||||
ldx_entry(gyp, offs, L0);
|
||||
|
||||
__ save(SP, -176, SP);
|
||||
__ save(SP, -176, SP);
|
||||
__ save(SP, -176, SP);
|
||||
__ save(SP, -176, SP);
|
||||
__ save(SP, -176, SP);
|
||||
|
||||
Label L_mpmul_restore_4, L_mpmul_restore_3, L_mpmul_restore_2;
|
||||
Label L_mpmul_restore_1, L_mpmul_restore_0;
|
||||
|
||||
// Dispatch relative current PC, into instruction table below.
|
||||
__ rdpc(addr);
|
||||
__ add(addr, 16, addr);
|
||||
__ jmp(addr, disp);
|
||||
__ delayed()->clr(offs);
|
||||
|
||||
mpmul_entry(31, L_mpmul_restore_0);
|
||||
mpmul_entry(30, L_mpmul_restore_0);
|
||||
mpmul_entry(29, L_mpmul_restore_0);
|
||||
mpmul_entry(28, L_mpmul_restore_0);
|
||||
mpmul_entry(27, L_mpmul_restore_1);
|
||||
mpmul_entry(26, L_mpmul_restore_1);
|
||||
mpmul_entry(25, L_mpmul_restore_1);
|
||||
mpmul_entry(24, L_mpmul_restore_1);
|
||||
mpmul_entry(23, L_mpmul_restore_1);
|
||||
mpmul_entry(22, L_mpmul_restore_1);
|
||||
mpmul_entry(21, L_mpmul_restore_1);
|
||||
mpmul_entry(20, L_mpmul_restore_2);
|
||||
mpmul_entry(19, L_mpmul_restore_2);
|
||||
mpmul_entry(18, L_mpmul_restore_2);
|
||||
mpmul_entry(17, L_mpmul_restore_2);
|
||||
mpmul_entry(16, L_mpmul_restore_2);
|
||||
mpmul_entry(15, L_mpmul_restore_2);
|
||||
mpmul_entry(14, L_mpmul_restore_2);
|
||||
mpmul_entry(13, L_mpmul_restore_3);
|
||||
mpmul_entry(12, L_mpmul_restore_3);
|
||||
mpmul_entry(11, L_mpmul_restore_3);
|
||||
mpmul_entry(10, L_mpmul_restore_3);
|
||||
mpmul_entry( 9, L_mpmul_restore_3);
|
||||
mpmul_entry( 8, L_mpmul_restore_3);
|
||||
mpmul_entry( 7, L_mpmul_restore_3);
|
||||
mpmul_entry( 6, L_mpmul_restore_4);
|
||||
mpmul_entry( 5, L_mpmul_restore_4);
|
||||
mpmul_entry( 4, L_mpmul_restore_4);
|
||||
mpmul_entry( 3, L_mpmul_restore_4);
|
||||
mpmul_entry( 2, L_mpmul_restore_4);
|
||||
mpmul_entry( 1, L_mpmul_restore_4);
|
||||
mpmul_entry( 0, L_mpmul_restore_4);
|
||||
|
||||
Label L_z31, L_z30, L_z29, L_z28, L_z27, L_z26, L_z25, L_z24;
|
||||
Label L_z23, L_z22, L_z21, L_z20, L_z19, L_z18, L_z17, L_z16;
|
||||
Label L_z15, L_z14, L_z13, L_z12, L_z11, L_z10, L_z09, L_z08;
|
||||
Label L_z07, L_z06, L_z05, L_z04, L_z03, L_z02, L_z01, L_z00;
|
||||
|
||||
Label L_zst_base; // Store sequence base address.
|
||||
__ bind(L_zst_base);
|
||||
|
||||
stx_entry(L_z31, L7, L6, gzp, offs);
|
||||
stx_entry(L_z30, L5, L4, gzp, offs);
|
||||
stx_entry(L_z29, L3, L2, gzp, offs);
|
||||
stx_entry(L_z28, L1, L0, gzp, offs);
|
||||
__ restore();
|
||||
stx_entry(L_z27, O5, O4, gzp, offs);
|
||||
stx_entry(L_z26, O3, O2, gzp, offs);
|
||||
stx_entry(L_z25, O1, O0, gzp, offs);
|
||||
stx_entry(L_z24, L7, L6, gzp, offs);
|
||||
stx_entry(L_z23, L5, L4, gzp, offs);
|
||||
stx_entry(L_z22, L3, L2, gzp, offs);
|
||||
stx_entry(L_z21, L1, L0, gzp, offs);
|
||||
__ restore();
|
||||
stx_entry(L_z20, O5, O4, gzp, offs);
|
||||
stx_entry(L_z19, O3, O2, gzp, offs);
|
||||
stx_entry(L_z18, O1, O0, gzp, offs);
|
||||
stx_entry(L_z17, L7, L6, gzp, offs);
|
||||
stx_entry(L_z16, L5, L4, gzp, offs);
|
||||
stx_entry(L_z15, L3, L2, gzp, offs);
|
||||
stx_entry(L_z14, L1, L0, gzp, offs);
|
||||
__ restore();
|
||||
stx_entry(L_z13, O5, O4, gzp, offs);
|
||||
stx_entry(L_z12, O3, O2, gzp, offs);
|
||||
stx_entry(L_z11, O1, O0, gzp, offs);
|
||||
stx_entry(L_z10, L7, L6, gzp, offs);
|
||||
stx_entry(L_z09, L5, L4, gzp, offs);
|
||||
stx_entry(L_z08, L3, L2, gzp, offs);
|
||||
stx_entry(L_z07, L1, L0, gzp, offs);
|
||||
__ restore();
|
||||
stx_entry(L_z06, O5, O4, gzp, offs);
|
||||
stx_entry(L_z05, O3, O2, gzp, offs);
|
||||
stx_entry(L_z04, O1, O0, gzp, offs);
|
||||
stx_entry(L_z03, L7, L6, gzp, offs);
|
||||
stx_entry(L_z02, L5, L4, gzp, offs);
|
||||
stx_entry(L_z01, L3, L2, gzp, offs);
|
||||
stx_entry(L_z00, L1, L0, gzp, offs);
|
||||
|
||||
__ restore();
|
||||
__ restore();
|
||||
// Exit out of 'mpmul' routine, back to multiplyToLen.
|
||||
__ ba_short(L_exit);
|
||||
|
||||
Label L_zst_offs;
|
||||
__ bind(L_zst_offs);
|
||||
|
||||
offs_entry(L_z31, L_zst_base); // index 31: 2048x2048
|
||||
offs_entry(L_z30, L_zst_base);
|
||||
offs_entry(L_z29, L_zst_base);
|
||||
offs_entry(L_z28, L_zst_base);
|
||||
offs_entry(L_z27, L_zst_base);
|
||||
offs_entry(L_z26, L_zst_base);
|
||||
offs_entry(L_z25, L_zst_base);
|
||||
offs_entry(L_z24, L_zst_base);
|
||||
offs_entry(L_z23, L_zst_base);
|
||||
offs_entry(L_z22, L_zst_base);
|
||||
offs_entry(L_z21, L_zst_base);
|
||||
offs_entry(L_z20, L_zst_base);
|
||||
offs_entry(L_z19, L_zst_base);
|
||||
offs_entry(L_z18, L_zst_base);
|
||||
offs_entry(L_z17, L_zst_base);
|
||||
offs_entry(L_z16, L_zst_base);
|
||||
offs_entry(L_z15, L_zst_base);
|
||||
offs_entry(L_z14, L_zst_base);
|
||||
offs_entry(L_z13, L_zst_base);
|
||||
offs_entry(L_z12, L_zst_base);
|
||||
offs_entry(L_z11, L_zst_base);
|
||||
offs_entry(L_z10, L_zst_base);
|
||||
offs_entry(L_z09, L_zst_base);
|
||||
offs_entry(L_z08, L_zst_base);
|
||||
offs_entry(L_z07, L_zst_base);
|
||||
offs_entry(L_z06, L_zst_base);
|
||||
offs_entry(L_z05, L_zst_base);
|
||||
offs_entry(L_z04, L_zst_base);
|
||||
offs_entry(L_z03, L_zst_base);
|
||||
offs_entry(L_z02, L_zst_base);
|
||||
offs_entry(L_z01, L_zst_base);
|
||||
offs_entry(L_z00, L_zst_base); // index 0: 64x64
|
||||
|
||||
__ bind(L_mpmul_restore_4);
|
||||
__ restore();
|
||||
__ bind(L_mpmul_restore_3);
|
||||
__ restore();
|
||||
__ bind(L_mpmul_restore_2);
|
||||
__ restore();
|
||||
__ bind(L_mpmul_restore_1);
|
||||
__ restore();
|
||||
__ bind(L_mpmul_restore_0);
|
||||
|
||||
// Dispatch via offset vector entry, into z-store sequence.
|
||||
Label L_zst_rdpc;
|
||||
__ bind(L_zst_rdpc);
|
||||
|
||||
assert(L_zst_base.is_bound(), "must be");
|
||||
assert(L_zst_offs.is_bound(), "must be");
|
||||
assert(L_zst_rdpc.is_bound(), "must be");
|
||||
|
||||
int dbase = L_zst_rdpc.loc_pos() - L_zst_base.loc_pos();
|
||||
int doffs = L_zst_rdpc.loc_pos() - L_zst_offs.loc_pos();
|
||||
|
||||
temp = gyp; // Alright to reuse 'gyp'.
|
||||
|
||||
__ rdpc(addr);
|
||||
__ sub(addr, doffs, temp);
|
||||
__ srlx(disp, 1, disp);
|
||||
__ lduw(temp, disp, offs);
|
||||
__ sub(addr, dbase, temp);
|
||||
__ jmp(temp, offs);
|
||||
__ delayed()->clr(offs);
|
||||
}
|
||||
|
||||
void gen_mult_64x64(Register xp, Register xn,
|
||||
Register yp, Register yn,
|
||||
Register zp, Register zn, Label &L_exit)
|
||||
{
|
||||
// Assuming that a stack frame has already been created, i.e. local and
|
||||
// output registers are available for immediate use.
|
||||
|
||||
const Register ri = L0; // Outer loop index, xv[i]
|
||||
const Register rj = L1; // Inner loop index, yv[j]
|
||||
const Register rk = L2; // Output loop index, zv[k]
|
||||
const Register rx = L4; // x-vector datum [i]
|
||||
const Register ry = L5; // y-vector datum [j]
|
||||
const Register rz = L6; // z-vector datum [k]
|
||||
const Register rc = L7; // carry over (to z-vector datum [k-1])
|
||||
|
||||
const Register lop = O0; // lo-64b product
|
||||
const Register hip = O1; // hi-64b product
|
||||
|
||||
const Register zero = G0;
|
||||
|
||||
Label L_loop_i, L_exit_loop_i;
|
||||
Label L_loop_j;
|
||||
Label L_loop_i2, L_exit_loop_i2;
|
||||
|
||||
__ srlx(xn, 1, xn); // index for u32 to u64 ditto
|
||||
__ srlx(yn, 1, yn); // index for u32 to u64 ditto
|
||||
__ srlx(zn, 1, zn); // index for u32 to u64 ditto
|
||||
__ dec(xn); // Adjust [0..(N/2)-1]
|
||||
__ dec(yn);
|
||||
__ dec(zn);
|
||||
__ clr(rc); // u64 c = 0
|
||||
__ sllx(xn, 3, ri); // int i = xn (byte offset i = 8*xn)
|
||||
__ sllx(yn, 3, rj); // int j = yn (byte offset i = 8*xn)
|
||||
__ sllx(zn, 3, rk); // int k = zn (byte offset k = 8*zn)
|
||||
__ ldx(yp, rj, ry); // u64 y = yp[yn]
|
||||
|
||||
// for (int i = xn; i >= 0; i--)
|
||||
__ bind(L_loop_i);
|
||||
|
||||
__ cmp_and_br_short(ri, 0, // i >= 0
|
||||
Assembler::less, Assembler::pn, L_exit_loop_i);
|
||||
__ ldx(xp, ri, rx); // x = xp[i]
|
||||
__ mulx(rx, ry, lop); // lo-64b-part of result 64x64
|
||||
__ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64
|
||||
__ addcc(rc, lop, lop); // Accumulate lower order bits (producing carry)
|
||||
__ addxc(hip, zero, rc); // carry over to next datum [k-1]
|
||||
__ stx(lop, zp, rk); // z[k] = lop
|
||||
__ dec(rk, 8); // k--
|
||||
__ dec(ri, 8); // i--
|
||||
__ ba_short(L_loop_i);
|
||||
|
||||
__ bind(L_exit_loop_i);
|
||||
__ stx(rc, zp, rk); // z[k] = c
|
||||
|
||||
// for (int j = yn - 1; j >= 0; j--)
|
||||
__ sllx(yn, 3, rj); // int j = yn - 1 (byte offset j = 8*yn)
|
||||
__ dec(rj, 8);
|
||||
|
||||
__ bind(L_loop_j);
|
||||
|
||||
__ cmp_and_br_short(rj, 0, // j >= 0
|
||||
Assembler::less, Assembler::pn, L_exit);
|
||||
__ clr(rc); // u64 c = 0
|
||||
__ ldx(yp, rj, ry); // u64 y = yp[j]
|
||||
|
||||
// for (int i = xn, k = --zn; i >= 0; i--)
|
||||
__ dec(zn); // --zn
|
||||
__ sllx(xn, 3, ri); // int i = xn (byte offset i = 8*xn)
|
||||
__ sllx(zn, 3, rk); // int k = zn (byte offset k = 8*zn)
|
||||
|
||||
__ bind(L_loop_i2);
|
||||
|
||||
__ cmp_and_br_short(ri, 0, // i >= 0
|
||||
Assembler::less, Assembler::pn, L_exit_loop_i2);
|
||||
__ ldx(xp, ri, rx); // x = xp[i]
|
||||
__ ldx(zp, rk, rz); // z = zp[k], accumulator
|
||||
__ mulx(rx, ry, lop); // lo-64b-part of result 64x64
|
||||
__ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64
|
||||
__ addcc(rz, rc, rz); // Accumulate lower order bits,
|
||||
__ addxc(hip, zero, rc); // Accumulate higher order bits to carry
|
||||
__ addcc(rz, lop, rz); // z += lo(p) + c
|
||||
__ addxc(rc, zero, rc);
|
||||
__ stx(rz, zp, rk); // zp[k] = z
|
||||
__ dec(rk, 8); // k--
|
||||
__ dec(ri, 8); // i--
|
||||
__ ba_short(L_loop_i2);
|
||||
|
||||
__ bind(L_exit_loop_i2);
|
||||
__ stx(rc, zp, rk); // z[k] = c
|
||||
__ dec(rj, 8); // j--
|
||||
__ ba_short(L_loop_j);
|
||||
}
|
||||
|
||||
void gen_mult_64x64_unaligned(Register xp, Register xn,
|
||||
Register yp, Register yn,
|
||||
Register zp, Register zn, Label &L_exit)
|
||||
{
|
||||
// Assuming that a stack frame has already been created, i.e. local and
|
||||
// output registers are available for use.
|
||||
|
||||
const Register xpc = L0; // Outer loop cursor, xp[i]
|
||||
const Register ypc = L1; // Inner loop cursor, yp[j]
|
||||
const Register zpc = L2; // Output loop cursor, zp[k]
|
||||
const Register rx = L4; // x-vector datum [i]
|
||||
const Register ry = L5; // y-vector datum [j]
|
||||
const Register rz = L6; // z-vector datum [k]
|
||||
const Register rc = L7; // carry over (to z-vector datum [k-1])
|
||||
const Register rt = O2;
|
||||
|
||||
const Register lop = O0; // lo-64b product
|
||||
const Register hip = O1; // hi-64b product
|
||||
|
||||
const Register zero = G0;
|
||||
|
||||
Label L_loop_i, L_exit_loop_i;
|
||||
Label L_loop_j;
|
||||
Label L_loop_i2, L_exit_loop_i2;
|
||||
|
||||
__ srlx(xn, 1, xn); // index for u32 to u64 ditto
|
||||
__ srlx(yn, 1, yn); // index for u32 to u64 ditto
|
||||
__ srlx(zn, 1, zn); // index for u32 to u64 ditto
|
||||
__ dec(xn); // Adjust [0..(N/2)-1]
|
||||
__ dec(yn);
|
||||
__ dec(zn);
|
||||
__ clr(rc); // u64 c = 0
|
||||
__ sllx(xn, 3, xpc); // u32* xpc = &xp[xn] (byte offset 8*xn)
|
||||
__ add(xp, xpc, xpc);
|
||||
__ sllx(yn, 3, ypc); // u32* ypc = &yp[yn] (byte offset 8*yn)
|
||||
__ add(yp, ypc, ypc);
|
||||
__ sllx(zn, 3, zpc); // u32* zpc = &zp[zn] (byte offset 8*zn)
|
||||
__ add(zp, zpc, zpc);
|
||||
__ lduw(ypc, 0, rt); // u64 y = yp[yn]
|
||||
__ lduw(ypc, 4, ry); // ...
|
||||
__ sllx(rt, 32, rt);
|
||||
__ or3(rt, ry, ry);
|
||||
|
||||
// for (int i = xn; i >= 0; i--)
|
||||
__ bind(L_loop_i);
|
||||
|
||||
__ cmp_and_br_short(xpc, xp,// i >= 0
|
||||
Assembler::less, Assembler::pn, L_exit_loop_i);
|
||||
__ lduw(xpc, 0, rt); // u64 x = xp[i]
|
||||
__ lduw(xpc, 4, rx); // ...
|
||||
__ sllx(rt, 32, rt);
|
||||
__ or3(rt, rx, rx);
|
||||
__ mulx(rx, ry, lop); // lo-64b-part of result 64x64
|
||||
__ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64
|
||||
__ addcc(rc, lop, lop); // Accumulate lower order bits (producing carry)
|
||||
__ addxc(hip, zero, rc); // carry over to next datum [k-1]
|
||||
__ srlx(lop, 32, rt);
|
||||
__ stw(rt, zpc, 0); // z[k] = lop
|
||||
__ stw(lop, zpc, 4); // ...
|
||||
__ dec(zpc, 8); // k-- (zpc--)
|
||||
__ dec(xpc, 8); // i-- (xpc--)
|
||||
__ ba_short(L_loop_i);
|
||||
|
||||
__ bind(L_exit_loop_i);
|
||||
__ srlx(rc, 32, rt);
|
||||
__ stw(rt, zpc, 0); // z[k] = c
|
||||
__ stw(rc, zpc, 4);
|
||||
|
||||
// for (int j = yn - 1; j >= 0; j--)
|
||||
__ sllx(yn, 3, ypc); // u32* ypc = &yp[yn] (byte offset 8*yn)
|
||||
__ add(yp, ypc, ypc);
|
||||
__ dec(ypc, 8); // yn - 1 (ypc--)
|
||||
|
||||
__ bind(L_loop_j);
|
||||
|
||||
__ cmp_and_br_short(ypc, yp,// j >= 0
|
||||
Assembler::less, Assembler::pn, L_exit);
|
||||
__ clr(rc); // u64 c = 0
|
||||
__ lduw(ypc, 0, rt); // u64 y = yp[j] (= *ypc)
|
||||
__ lduw(ypc, 4, ry); // ...
|
||||
__ sllx(rt, 32, rt);
|
||||
__ or3(rt, ry, ry);
|
||||
|
||||
// for (int i = xn, k = --zn; i >= 0; i--)
|
||||
__ sllx(xn, 3, xpc); // u32* xpc = &xp[xn] (byte offset 8*xn)
|
||||
__ add(xp, xpc, xpc);
|
||||
__ dec(zn); // --zn
|
||||
__ sllx(zn, 3, zpc); // u32* zpc = &zp[zn] (byte offset 8*zn)
|
||||
__ add(zp, zpc, zpc);
|
||||
|
||||
__ bind(L_loop_i2);
|
||||
|
||||
__ cmp_and_br_short(xpc, xp,// i >= 0
|
||||
Assembler::less, Assembler::pn, L_exit_loop_i2);
|
||||
__ lduw(xpc, 0, rt); // u64 x = xp[i] (= *xpc)
|
||||
__ lduw(xpc, 4, rx); // ...
|
||||
__ sllx(rt, 32, rt);
|
||||
__ or3(rt, rx, rx);
|
||||
|
||||
__ lduw(zpc, 0, rt); // u64 z = zp[k] (= *zpc)
|
||||
__ lduw(zpc, 4, rz); // ...
|
||||
__ sllx(rt, 32, rt);
|
||||
__ or3(rt, rz, rz);
|
||||
|
||||
__ mulx(rx, ry, lop); // lo-64b-part of result 64x64
|
||||
__ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64
|
||||
__ addcc(rz, rc, rz); // Accumulate lower order bits...
|
||||
__ addxc(hip, zero, rc); // Accumulate higher order bits to carry
|
||||
__ addcc(rz, lop, rz); // ... z += lo(p) + c
|
||||
__ addxccc(rc, zero, rc);
|
||||
__ srlx(rz, 32, rt);
|
||||
__ stw(rt, zpc, 0); // zp[k] = z (*zpc = z)
|
||||
__ stw(rz, zpc, 4);
|
||||
__ dec(zpc, 8); // k-- (zpc--)
|
||||
__ dec(xpc, 8); // i-- (xpc--)
|
||||
__ ba_short(L_loop_i2);
|
||||
|
||||
__ bind(L_exit_loop_i2);
|
||||
__ srlx(rc, 32, rt);
|
||||
__ stw(rt, zpc, 0); // z[k] = c
|
||||
__ stw(rc, zpc, 4);
|
||||
__ dec(ypc, 8); // j-- (ypc--)
|
||||
__ ba_short(L_loop_j);
|
||||
}
|
||||
|
||||
void gen_mult_32x32(Register xp, Register xn,
|
||||
Register yp, Register yn,
|
||||
Register zp, Register zn, Label &L_exit)
|
||||
{
|
||||
// Assuming that a stack frame has already been created, i.e. local and
|
||||
// output registers are available for use.
|
||||
|
||||
const Register ri = L0; // Outer loop index, xv[i]
|
||||
const Register rj = L1; // Inner loop index, yv[j]
|
||||
const Register rk = L2; // Output loop index, zv[k]
|
||||
const Register rx = L4; // x-vector datum [i]
|
||||
const Register ry = L5; // y-vector datum [j]
|
||||
const Register rz = L6; // z-vector datum [k]
|
||||
const Register rc = L7; // carry over (to z-vector datum [k-1])
|
||||
|
||||
const Register p64 = O0; // 64b product
|
||||
const Register z65 = O1; // carry+64b accumulator
|
||||
const Register c65 = O2; // carry at bit 65
|
||||
const Register c33 = O2; // carry at bit 33 (after shift)
|
||||
|
||||
const Register zero = G0;
|
||||
|
||||
Label L_loop_i, L_exit_loop_i;
|
||||
Label L_loop_j;
|
||||
Label L_loop_i2, L_exit_loop_i2;
|
||||
|
||||
__ dec(xn); // Adjust [0..N-1]
|
||||
__ dec(yn);
|
||||
__ dec(zn);
|
||||
__ clr(rc); // u32 c = 0
|
||||
__ sllx(xn, 2, ri); // int i = xn (byte offset i = 4*xn)
|
||||
__ sllx(yn, 2, rj); // int j = yn (byte offset i = 4*xn)
|
||||
__ sllx(zn, 2, rk); // int k = zn (byte offset k = 4*zn)
|
||||
__ lduw(yp, rj, ry); // u32 y = yp[yn]
|
||||
|
||||
// for (int i = xn; i >= 0; i--)
|
||||
__ bind(L_loop_i);
|
||||
|
||||
__ cmp_and_br_short(ri, 0, // i >= 0
|
||||
Assembler::less, Assembler::pn, L_exit_loop_i);
|
||||
__ lduw(xp, ri, rx); // x = xp[i]
|
||||
__ mulx(rx, ry, p64); // 64b result of 32x32
|
||||
__ addcc(rc, p64, z65); // Accumulate to 65 bits (producing carry)
|
||||
__ addxc(zero, zero, c65); // Materialise carry (in bit 65) into lsb,
|
||||
__ sllx(c65, 32, c33); // and shift into bit 33
|
||||
__ srlx(z65, 32, rc); // carry = c33 | hi(z65) >> 32
|
||||
__ add(c33, rc, rc); // carry over to next datum [k-1]
|
||||
__ stw(z65, zp, rk); // z[k] = lo(z65)
|
||||
__ dec(rk, 4); // k--
|
||||
__ dec(ri, 4); // i--
|
||||
__ ba_short(L_loop_i);
|
||||
|
||||
__ bind(L_exit_loop_i);
|
||||
__ stw(rc, zp, rk); // z[k] = c
|
||||
|
||||
// for (int j = yn - 1; j >= 0; j--)
|
||||
__ sllx(yn, 2, rj); // int j = yn - 1 (byte offset j = 4*yn)
|
||||
__ dec(rj, 4);
|
||||
|
||||
__ bind(L_loop_j);
|
||||
|
||||
__ cmp_and_br_short(rj, 0, // j >= 0
|
||||
Assembler::less, Assembler::pn, L_exit);
|
||||
__ clr(rc); // u32 c = 0
|
||||
__ lduw(yp, rj, ry); // u32 y = yp[j]
|
||||
|
||||
// for (int i = xn, k = --zn; i >= 0; i--)
|
||||
__ dec(zn); // --zn
|
||||
__ sllx(xn, 2, ri); // int i = xn (byte offset i = 4*xn)
|
||||
__ sllx(zn, 2, rk); // int k = zn (byte offset k = 4*zn)
|
||||
|
||||
__ bind(L_loop_i2);
|
||||
|
||||
__ cmp_and_br_short(ri, 0, // i >= 0
|
||||
Assembler::less, Assembler::pn, L_exit_loop_i2);
|
||||
__ lduw(xp, ri, rx); // x = xp[i]
|
||||
__ lduw(zp, rk, rz); // z = zp[k], accumulator
|
||||
__ mulx(rx, ry, p64); // 64b result of 32x32
|
||||
__ add(rz, rc, rz); // Accumulate lower order bits,
|
||||
__ addcc(rz, p64, z65); // z += lo(p64) + c
|
||||
__ addxc(zero, zero, c65); // Materialise carry (in bit 65) into lsb,
|
||||
__ sllx(c65, 32, c33); // and shift into bit 33
|
||||
__ srlx(z65, 32, rc); // carry = c33 | hi(z65) >> 32
|
||||
__ add(c33, rc, rc); // carry over to next datum [k-1]
|
||||
__ stw(z65, zp, rk); // zp[k] = lo(z65)
|
||||
__ dec(rk, 4); // k--
|
||||
__ dec(ri, 4); // i--
|
||||
__ ba_short(L_loop_i2);
|
||||
|
||||
__ bind(L_exit_loop_i2);
|
||||
__ stw(rc, zp, rk); // z[k] = c
|
||||
__ dec(rj, 4); // j--
|
||||
__ ba_short(L_loop_j);
|
||||
}
|
||||
|
||||
|
||||
void generate_initial() {
|
||||
// Generates all stubs and initializes the entry points
|
||||
|
||||
@ -5073,8 +5839,14 @@ class StubGenerator: public StubCodeGenerator {
|
||||
if (UseAdler32Intrinsics) {
|
||||
StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef COMPILER2
|
||||
// Intrinsics supported by C2 only:
|
||||
if (UseMultiplyToLenIntrinsic) {
|
||||
StubRoutines::_multiplyToLen = generate_multiplyToLen();
|
||||
}
|
||||
#endif // COMPILER2
|
||||
}
|
||||
|
||||
public:
|
||||
StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
|
||||
|
@ -41,7 +41,7 @@ static bool returns_to_call_stub(address return_pc) {
|
||||
enum /* platform_dependent_constants */ {
|
||||
// %%%%%%%% May be able to shrink this a lot
|
||||
code_size1 = 20000, // simply increase if too small (assembler will crash if too small)
|
||||
code_size2 = 27000 // simply increase if too small (assembler will crash if too small)
|
||||
code_size2 = 29000 // simply increase if too small (assembler will crash if too small)
|
||||
};
|
||||
|
||||
class Sparc {
|
||||
|
@ -2049,6 +2049,7 @@ void TemplateTable::load_field_cp_cache_entry(Register Robj,
|
||||
__ ld_ptr(Rcache, cp_base_offset + ConstantPoolCacheEntry::f1_offset(), Robj);
|
||||
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
|
||||
__ ld_ptr( Robj, mirror_offset, Robj);
|
||||
__ resolve_oop_handle(Robj);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -101,6 +101,14 @@
|
||||
declare_constant(VM_Version::ISA_XMONT) \
|
||||
declare_constant(VM_Version::ISA_PAUSE_NSEC) \
|
||||
declare_constant(VM_Version::ISA_VAMASK) \
|
||||
declare_constant(VM_Version::ISA_SPARC6) \
|
||||
declare_constant(VM_Version::ISA_DICTUNP) \
|
||||
declare_constant(VM_Version::ISA_FPCMPSHL) \
|
||||
declare_constant(VM_Version::ISA_RLE) \
|
||||
declare_constant(VM_Version::ISA_SHA3) \
|
||||
declare_constant(VM_Version::ISA_VIS3C) \
|
||||
declare_constant(VM_Version::ISA_SPARC5B) \
|
||||
declare_constant(VM_Version::ISA_MME) \
|
||||
declare_constant(VM_Version::CPU_FAST_IDIV) \
|
||||
declare_constant(VM_Version::CPU_FAST_RDPC) \
|
||||
declare_constant(VM_Version::CPU_FAST_BIS) \
|
||||
|
@ -103,7 +103,7 @@ void VM_Version::initialize() {
|
||||
FLAG_SET_DEFAULT(AllocatePrefetchInstr, 1);
|
||||
}
|
||||
else if (has_sparc5()) {
|
||||
// Use prefetch instruction to avoid partial RAW issue on Core S4 processors,
|
||||
// Use prefetch instruction to avoid partial RAW issue on Core C4 processors,
|
||||
// also use prefetch style 3.
|
||||
FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
|
||||
if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
|
||||
@ -128,7 +128,7 @@ void VM_Version::initialize() {
|
||||
|
||||
// We increase the number of prefetched cache lines, to use just a bit more
|
||||
// aggressive approach, when the L2-cache line size is small (32 bytes), or
|
||||
// when running on newer processor implementations, such as the Core S4.
|
||||
// when running on newer processor implementations, such as the Core C4.
|
||||
bool inc_prefetch = cache_line_size > 0 && (cache_line_size < 64 || has_sparc5());
|
||||
|
||||
if (inc_prefetch) {
|
||||
@ -168,6 +168,16 @@ void VM_Version::initialize() {
|
||||
FLAG_SET_DEFAULT(UseCBCond, false);
|
||||
}
|
||||
|
||||
// Use 'mpmul' instruction if available.
|
||||
if (has_mpmul()) {
|
||||
if (FLAG_IS_DEFAULT(UseMPMUL)) {
|
||||
FLAG_SET_DEFAULT(UseMPMUL, true);
|
||||
}
|
||||
} else if (UseMPMUL) {
|
||||
warning("MPMUL instruction is not available on this CPU");
|
||||
FLAG_SET_DEFAULT(UseMPMUL, false);
|
||||
}
|
||||
|
||||
assert(BlockZeroingLowLimit > 0, "invalid value");
|
||||
|
||||
if (has_blk_zeroing() && cache_line_size > 0) {
|
||||
@ -208,7 +218,9 @@ void VM_Version::initialize() {
|
||||
|
||||
char buf[512];
|
||||
jio_snprintf(buf, sizeof(buf),
|
||||
"%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
"%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s"
|
||||
"%s%s%s%s%s%s%s%s%s" "%s%s%s%s%s%s%s%s%s"
|
||||
"%s%s%s%s%s%s%s",
|
||||
(has_v9() ? "v9" : ""),
|
||||
(has_popc() ? ", popc" : ""),
|
||||
(has_vis1() ? ", vis1" : ""),
|
||||
@ -241,6 +253,16 @@ void VM_Version::initialize() {
|
||||
(has_pause_nsec() ? ", pause_nsec" : ""),
|
||||
(has_vamask() ? ", vamask" : ""),
|
||||
|
||||
(has_sparc6() ? ", sparc6" : ""),
|
||||
(has_dictunp() ? ", dictunp" : ""),
|
||||
(has_fpcmpshl() ? ", fpcmpshl" : ""),
|
||||
(has_rle() ? ", rle" : ""),
|
||||
(has_sha3() ? ", sha3" : ""),
|
||||
(has_athena_plus2()? ", athena_plus2" : ""),
|
||||
(has_vis3c() ? ", vis3c" : ""),
|
||||
(has_sparc5b() ? ", sparc5b" : ""),
|
||||
(has_mme() ? ", mme" : ""),
|
||||
|
||||
(has_fast_idiv() ? ", *idiv" : ""),
|
||||
(has_fast_rdpc() ? ", *rdpc" : ""),
|
||||
(has_fast_bis() ? ", *bis" : ""),
|
||||
@ -409,6 +431,15 @@ void VM_Version::initialize() {
|
||||
FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
|
||||
}
|
||||
|
||||
if (UseVIS > 2) {
|
||||
if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
|
||||
FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
|
||||
}
|
||||
} else if (UseMultiplyToLenIntrinsic) {
|
||||
warning("SPARC multiplyToLen intrinsics require VIS3 instructions support. Intrinsics will be disabled");
|
||||
FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
|
||||
}
|
||||
|
||||
if (UseVectorizedMismatchIntrinsic) {
|
||||
warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU.");
|
||||
FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
|
||||
|
@ -67,6 +67,16 @@ protected:
|
||||
ISA_PAUSE_NSEC,
|
||||
ISA_VAMASK,
|
||||
|
||||
ISA_SPARC6,
|
||||
ISA_DICTUNP,
|
||||
ISA_FPCMPSHL,
|
||||
ISA_RLE,
|
||||
ISA_SHA3,
|
||||
ISA_FJATHPLUS2,
|
||||
ISA_VIS3C,
|
||||
ISA_SPARC5B,
|
||||
ISA_MME,
|
||||
|
||||
// Synthesised properties:
|
||||
|
||||
CPU_FAST_IDIV,
|
||||
@ -79,7 +89,7 @@ protected:
|
||||
};
|
||||
|
||||
private:
|
||||
enum { ISA_last_feature = ISA_VAMASK,
|
||||
enum { ISA_last_feature = ISA_MME,
|
||||
CPU_last_feature = CPU_BLK_ZEROING };
|
||||
|
||||
enum {
|
||||
@ -119,6 +129,16 @@ private:
|
||||
ISA_pause_nsec_msk = UINT64_C(1) << ISA_PAUSE_NSEC,
|
||||
ISA_vamask_msk = UINT64_C(1) << ISA_VAMASK,
|
||||
|
||||
ISA_sparc6_msk = UINT64_C(1) << ISA_SPARC6,
|
||||
ISA_dictunp_msk = UINT64_C(1) << ISA_DICTUNP,
|
||||
ISA_fpcmpshl_msk = UINT64_C(1) << ISA_FPCMPSHL,
|
||||
ISA_rle_msk = UINT64_C(1) << ISA_RLE,
|
||||
ISA_sha3_msk = UINT64_C(1) << ISA_SHA3,
|
||||
ISA_fjathplus2_msk = UINT64_C(1) << ISA_FJATHPLUS2,
|
||||
ISA_vis3c_msk = UINT64_C(1) << ISA_VIS3C,
|
||||
ISA_sparc5b_msk = UINT64_C(1) << ISA_SPARC5B,
|
||||
ISA_mme_msk = UINT64_C(1) << ISA_MME,
|
||||
|
||||
CPU_fast_idiv_msk = UINT64_C(1) << CPU_FAST_IDIV,
|
||||
CPU_fast_rdpc_msk = UINT64_C(1) << CPU_FAST_RDPC,
|
||||
CPU_fast_bis_msk = UINT64_C(1) << CPU_FAST_BIS,
|
||||
@ -153,40 +173,51 @@ private:
|
||||
* UltraSPARC T2+: (Victoria Falls, etc.)
|
||||
* SPARC-V9, VIS, VIS2, ASI_BIS, POPC (Crypto/hash in SPU)
|
||||
*
|
||||
* UltraSPARC T3: (Rainbow Falls/S2)
|
||||
* UltraSPARC T3: (Rainbow Falls/C2)
|
||||
* SPARC-V9, VIS, VIS2, ASI_BIS, POPC (Crypto/hash in SPU)
|
||||
*
|
||||
* Oracle SPARC T4/T5/M5: (Core S3)
|
||||
* Oracle SPARC T4/T5/M5: (Core C3)
|
||||
* SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND,
|
||||
* AES, DES, Kasumi, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL
|
||||
*
|
||||
* Oracle SPARC M7: (Core S4)
|
||||
* Oracle SPARC M7: (Core C4)
|
||||
* SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND,
|
||||
* AES, DES, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL, VIS3b,
|
||||
* ADI, SPARC5, MWAIT, XMPMUL, XMONT, PAUSE_NSEC, VAMASK
|
||||
*
|
||||
* Oracle SPARC M8: (Core C5)
|
||||
* SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND,
|
||||
* AES, DES, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL, VIS3b,
|
||||
* ADI, SPARC5, MWAIT, XMPMUL, XMONT, PAUSE_NSEC, VAMASK, SPARC6, FPCMPSHL,
|
||||
* DICTUNP, RLE, SHA3, MME
|
||||
*
|
||||
* NOTE: Oracle Number support ignored.
|
||||
*/
|
||||
enum {
|
||||
niagara1_msk = ISA_v9_msk | ISA_vis1_msk | ISA_blk_init_msk,
|
||||
niagara2_msk = niagara1_msk | ISA_popc_msk,
|
||||
|
||||
core_S2_msk = niagara2_msk | ISA_vis2_msk,
|
||||
core_C2_msk = niagara2_msk | ISA_vis2_msk,
|
||||
|
||||
core_S3_msk = core_S2_msk | ISA_fmaf_msk | ISA_vis3_msk | ISA_hpc_msk |
|
||||
core_C3_msk = core_C2_msk | ISA_fmaf_msk | ISA_vis3_msk | ISA_hpc_msk |
|
||||
ISA_ima_msk | ISA_aes_msk | ISA_des_msk | ISA_kasumi_msk |
|
||||
ISA_camellia_msk | ISA_md5_msk | ISA_sha1_msk | ISA_sha256_msk |
|
||||
ISA_sha512_msk | ISA_mpmul_msk | ISA_mont_msk | ISA_pause_msk |
|
||||
ISA_cbcond_msk | ISA_crc32c_msk,
|
||||
|
||||
core_S4_msk = core_S3_msk - ISA_kasumi_msk |
|
||||
core_C4_msk = core_C3_msk - ISA_kasumi_msk |
|
||||
ISA_vis3b_msk | ISA_adi_msk | ISA_sparc5_msk | ISA_mwait_msk |
|
||||
ISA_xmpmul_msk | ISA_xmont_msk | ISA_pause_nsec_msk | ISA_vamask_msk,
|
||||
|
||||
core_C5_msk = core_C4_msk | ISA_sparc6_msk | ISA_dictunp_msk |
|
||||
ISA_fpcmpshl_msk | ISA_rle_msk | ISA_sha3_msk | ISA_mme_msk,
|
||||
|
||||
ultra_sparc_t1_msk = niagara1_msk,
|
||||
ultra_sparc_t2_msk = niagara2_msk,
|
||||
ultra_sparc_t3_msk = core_S2_msk,
|
||||
ultra_sparc_m5_msk = core_S3_msk, // NOTE: First out-of-order pipeline.
|
||||
ultra_sparc_m7_msk = core_S4_msk
|
||||
ultra_sparc_t3_msk = core_C2_msk,
|
||||
ultra_sparc_m5_msk = core_C3_msk, // NOTE: First out-of-order pipeline.
|
||||
ultra_sparc_m7_msk = core_C4_msk,
|
||||
ultra_sparc_m8_msk = core_C5_msk
|
||||
};
|
||||
|
||||
static uint _L2_data_cache_line_size;
|
||||
@ -247,6 +278,16 @@ public:
|
||||
static bool has_pause_nsec() { return (_features & ISA_pause_nsec_msk) != 0; }
|
||||
static bool has_vamask() { return (_features & ISA_vamask_msk) != 0; }
|
||||
|
||||
static bool has_sparc6() { return (_features & ISA_sparc6_msk) != 0; }
|
||||
static bool has_dictunp() { return (_features & ISA_dictunp_msk) != 0; }
|
||||
static bool has_fpcmpshl() { return (_features & ISA_fpcmpshl_msk) != 0; }
|
||||
static bool has_rle() { return (_features & ISA_rle_msk) != 0; }
|
||||
static bool has_sha3() { return (_features & ISA_sha3_msk) != 0; }
|
||||
static bool has_athena_plus2() { return (_features & ISA_fjathplus2_msk) != 0; }
|
||||
static bool has_vis3c() { return (_features & ISA_vis3c_msk) != 0; }
|
||||
static bool has_sparc5b() { return (_features & ISA_sparc5b_msk) != 0; }
|
||||
static bool has_mme() { return (_features & ISA_mme_msk) != 0; }
|
||||
|
||||
static bool has_fast_idiv() { return (_features & CPU_fast_idiv_msk) != 0; }
|
||||
static bool has_fast_rdpc() { return (_features & CPU_fast_rdpc_msk) != 0; }
|
||||
static bool has_fast_bis() { return (_features & CPU_fast_bis_msk) != 0; }
|
||||
|
@ -383,6 +383,7 @@ void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// frame::adjust_unextended_sp
|
||||
#ifdef ASSERT
|
||||
void frame::adjust_unextended_sp() {
|
||||
// On x86, sites calling method handle intrinsics and lambda forms are treated
|
||||
// as any other call site. Therefore, no special action is needed when we are
|
||||
@ -394,11 +395,12 @@ void frame::adjust_unextended_sp() {
|
||||
// If the sender PC is a deoptimization point, get the original PC.
|
||||
if (sender_cm->is_deopt_entry(_pc) ||
|
||||
sender_cm->is_deopt_mh_entry(_pc)) {
|
||||
DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp));
|
||||
verify_deopt_original_pc(sender_cm, _unextended_sp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// frame::update_map_with_saved_link
|
||||
|
@ -117,7 +117,7 @@
|
||||
// original sp we use that convention.
|
||||
|
||||
intptr_t* _unextended_sp;
|
||||
void adjust_unextended_sp();
|
||||
void adjust_unextended_sp() NOT_DEBUG_RETURN;
|
||||
|
||||
intptr_t* ptr_at_addr(int offset) const {
|
||||
return (intptr_t*) addr_at(offset);
|
||||
|
@ -6617,6 +6617,7 @@ void MacroAssembler::load_mirror(Register mirror, Register method) {
|
||||
movptr(mirror, Address(mirror, ConstMethod::constants_offset()));
|
||||
movptr(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes()));
|
||||
movptr(mirror, Address(mirror, mirror_offset));
|
||||
resolve_oop_handle(mirror);
|
||||
}
|
||||
|
||||
void MacroAssembler::load_klass(Register dst, Register src) {
|
||||
|
@ -2665,6 +2665,7 @@ void TemplateTable::load_field_cp_cache_entry(Register obj,
|
||||
ConstantPoolCacheEntry::f1_offset())));
|
||||
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
|
||||
__ movptr(obj, Address(obj, mirror_offset));
|
||||
__ resolve_oop_handle(obj);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -46,7 +46,7 @@ address VM_Version::_cpuinfo_segv_addr = 0;
|
||||
address VM_Version::_cpuinfo_cont_addr = 0;
|
||||
|
||||
static BufferBlob* stub_blob;
|
||||
static const int stub_size = 1000;
|
||||
static const int stub_size = 1100;
|
||||
|
||||
extern "C" {
|
||||
typedef void (*get_cpu_info_stub_t)(void*);
|
||||
@ -70,7 +70,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
|
||||
bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
|
||||
|
||||
Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
|
||||
Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done, wrapup;
|
||||
Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
|
||||
Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
|
||||
|
||||
StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
|
||||
@ -267,14 +267,30 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
|
||||
__ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
|
||||
__ jcc(Assembler::belowEqual, done);
|
||||
__ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
|
||||
__ jccb(Assembler::belowEqual, ext_cpuid1);
|
||||
__ jcc(Assembler::belowEqual, ext_cpuid1);
|
||||
__ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported?
|
||||
__ jccb(Assembler::belowEqual, ext_cpuid5);
|
||||
__ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
|
||||
__ jccb(Assembler::belowEqual, ext_cpuid7);
|
||||
__ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported?
|
||||
__ jccb(Assembler::belowEqual, ext_cpuid8);
|
||||
__ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported?
|
||||
__ jccb(Assembler::below, ext_cpuid8);
|
||||
//
|
||||
// Extended cpuid(0x8000001E)
|
||||
//
|
||||
__ movl(rax, 0x8000001E);
|
||||
__ cpuid();
|
||||
__ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
|
||||
__ movl(Address(rsi, 0), rax);
|
||||
__ movl(Address(rsi, 4), rbx);
|
||||
__ movl(Address(rsi, 8), rcx);
|
||||
__ movl(Address(rsi,12), rdx);
|
||||
|
||||
//
|
||||
// Extended cpuid(0x80000008)
|
||||
//
|
||||
__ bind(ext_cpuid8);
|
||||
__ movl(rax, 0x80000008);
|
||||
__ cpuid();
|
||||
__ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
|
||||
@ -1109,11 +1125,27 @@ void VM_Version::get_processor_features() {
|
||||
}
|
||||
|
||||
#ifdef COMPILER2
|
||||
if (MaxVectorSize > 16) {
|
||||
// Limit vectors size to 16 bytes on current AMD cpus.
|
||||
if (cpu_family() < 0x17 && MaxVectorSize > 16) {
|
||||
// Limit vectors size to 16 bytes on AMD cpus < 17h.
|
||||
FLAG_SET_DEFAULT(MaxVectorSize, 16);
|
||||
}
|
||||
#endif // COMPILER2
|
||||
|
||||
// Some defaults for AMD family 17h
|
||||
if ( cpu_family() == 0x17 ) {
|
||||
// On family 17h processors use XMM and UnalignedLoadStores for Array Copy
|
||||
if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
|
||||
FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
|
||||
}
|
||||
if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
|
||||
FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
|
||||
}
|
||||
#ifdef COMPILER2
|
||||
if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
|
||||
FLAG_SET_DEFAULT(UseFPUForSpilling, true);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
if( is_intel() ) { // Intel cpus specific settings
|
||||
|
@ -228,6 +228,15 @@ class VM_Version : public Abstract_VM_Version {
|
||||
} bits;
|
||||
};
|
||||
|
||||
union ExtCpuid1EEbx {
|
||||
uint32_t value;
|
||||
struct {
|
||||
uint32_t : 8,
|
||||
threads_per_core : 8,
|
||||
: 16;
|
||||
} bits;
|
||||
};
|
||||
|
||||
union XemXcr0Eax {
|
||||
uint32_t value;
|
||||
struct {
|
||||
@ -398,6 +407,12 @@ protected:
|
||||
ExtCpuid8Ecx ext_cpuid8_ecx;
|
||||
uint32_t ext_cpuid8_edx; // reserved
|
||||
|
||||
// cpuid function 0x8000001E // AMD 17h
|
||||
uint32_t ext_cpuid1E_eax;
|
||||
ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
|
||||
uint32_t ext_cpuid1E_ecx;
|
||||
uint32_t ext_cpuid1E_edx; // unused currently
|
||||
|
||||
// extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
|
||||
XemXcr0Eax xem_xcr0_eax;
|
||||
uint32_t xem_xcr0_edx; // reserved
|
||||
@ -505,6 +520,14 @@ protected:
|
||||
result |= CPU_CLMUL;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
|
||||
result |= CPU_RTM;
|
||||
if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
|
||||
result |= CPU_ADX;
|
||||
if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
|
||||
result |= CPU_BMI2;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
|
||||
result |= CPU_SHA;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
|
||||
result |= CPU_FMA;
|
||||
|
||||
// AMD features.
|
||||
if (is_amd()) {
|
||||
@ -518,16 +541,8 @@ protected:
|
||||
}
|
||||
// Intel features.
|
||||
if(is_intel()) {
|
||||
if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
|
||||
result |= CPU_ADX;
|
||||
if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
|
||||
result |= CPU_BMI2;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
|
||||
result |= CPU_SHA;
|
||||
if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
|
||||
result |= CPU_LZCNT;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
|
||||
result |= CPU_FMA;
|
||||
// for Intel, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
|
||||
if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
|
||||
result |= CPU_3DNOW_PREFETCH;
|
||||
@ -590,6 +605,7 @@ public:
|
||||
static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
|
||||
static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
|
||||
static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
|
||||
static ByteSize ext_cpuid1E_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
|
||||
static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
|
||||
static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
|
||||
static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
|
||||
@ -673,8 +689,12 @@ public:
|
||||
if (is_intel() && supports_processor_topology()) {
|
||||
result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
|
||||
} else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
|
||||
result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
|
||||
cores_per_cpu();
|
||||
if (cpu_family() >= 0x17) {
|
||||
result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
|
||||
} else {
|
||||
result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
|
||||
cores_per_cpu();
|
||||
}
|
||||
}
|
||||
return (result == 0 ? 1 : result);
|
||||
}
|
||||
|
@ -770,8 +770,15 @@ static void *thread_native_entry(Thread *thread) {
|
||||
const pthread_t pthread_id = ::pthread_self();
|
||||
const tid_t kernel_thread_id = ::thread_self();
|
||||
|
||||
log_info(os, thread)("Thread is alive (tid: " UINTX_FORMAT ", kernel thread id: " UINTX_FORMAT ").",
|
||||
os::current_thread_id(), (uintx) kernel_thread_id);
|
||||
LogTarget(Info, os, thread) lt;
|
||||
if (lt.is_enabled()) {
|
||||
address low_address = thread->stack_end();
|
||||
address high_address = thread->stack_base();
|
||||
lt.print("Thread is alive (tid: " UINTX_FORMAT ", kernel thread id: " UINTX_FORMAT
|
||||
", stack [" PTR_FORMAT " - " PTR_FORMAT " (" SIZE_FORMAT "k using %uk pages)).",
|
||||
os::current_thread_id(), (uintx) kernel_thread_id, low_address, high_address,
|
||||
(high_address - low_address) / K, os::Aix::query_pagesize(low_address) / K);
|
||||
}
|
||||
|
||||
// Normally, pthread stacks on AIX live in the data segment (are allocated with malloc()
|
||||
// by the pthread library). In rare cases, this may not be the case, e.g. when third-party
|
||||
@ -864,6 +871,14 @@ bool os::create_thread(Thread* thread, ThreadType thr_type,
|
||||
// Calculate stack size if it's not specified by caller.
|
||||
size_t stack_size = os::Posix::get_initial_stack_size(thr_type, req_stack_size);
|
||||
|
||||
// JDK-8187028: It was observed that on some configurations (4K backed thread stacks)
|
||||
// the real thread stack size may be smaller than the requested stack size, by as much as 64K.
|
||||
// This very much looks like a pthread lib error. As a workaround, increase the stack size
|
||||
// by 64K for small thread stacks (arbitrarily choosen to be < 4MB)
|
||||
if (stack_size < 4096 * K) {
|
||||
stack_size += 64 * K;
|
||||
}
|
||||
|
||||
// On Aix, pthread_attr_setstacksize fails with huge values and leaves the
|
||||
// thread size in attr unchanged. If this is the minimal stack size as set
|
||||
// by pthread_attr_init this leads to crashes after thread creation. E.g. the
|
||||
@ -3443,8 +3458,6 @@ void os::init(void) {
|
||||
|
||||
init_random(1234567);
|
||||
|
||||
ThreadCritical::initialize();
|
||||
|
||||
// Main_thread points to the aboriginal thread.
|
||||
Aix::_main_thread = pthread_self();
|
||||
|
||||
|
@ -38,12 +38,6 @@ static pthread_t tc_owner = 0;
|
||||
static pthread_mutex_t tc_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
static int tc_count = 0;
|
||||
|
||||
void ThreadCritical::initialize() {
|
||||
}
|
||||
|
||||
void ThreadCritical::release() {
|
||||
}
|
||||
|
||||
ThreadCritical::ThreadCritical() {
|
||||
pthread_t self = pthread_self();
|
||||
if (self != tc_owner) {
|
||||
|
@ -3353,8 +3353,6 @@ void os::init(void) {
|
||||
|
||||
init_random(1234567);
|
||||
|
||||
ThreadCritical::initialize();
|
||||
|
||||
Bsd::set_page_size(getpagesize());
|
||||
if (Bsd::page_size() == -1) {
|
||||
fatal("os_bsd.cpp: os::init: sysconf failed (%s)", os::strerror(errno));
|
||||
|
@ -37,12 +37,6 @@ static pthread_t tc_owner = 0;
|
||||
static pthread_mutex_t tc_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
static int tc_count = 0;
|
||||
|
||||
void ThreadCritical::initialize() {
|
||||
}
|
||||
|
||||
void ThreadCritical::release() {
|
||||
}
|
||||
|
||||
ThreadCritical::ThreadCritical() {
|
||||
pthread_t self = pthread_self();
|
||||
if (self != tc_owner) {
|
||||
|
@ -4768,8 +4768,6 @@ void os::init(void) {
|
||||
|
||||
init_random(1234567);
|
||||
|
||||
ThreadCritical::initialize();
|
||||
|
||||
Linux::set_page_size(sysconf(_SC_PAGESIZE));
|
||||
if (Linux::page_size() == -1) {
|
||||
fatal("os_linux.cpp: os::init: sysconf failed (%s)",
|
||||
|
@ -98,6 +98,11 @@ inline int os::ftruncate(int fd, jlong length) {
|
||||
|
||||
inline struct dirent* os::readdir(DIR* dirp, dirent *dbuf)
|
||||
{
|
||||
// readdir_r has been deprecated since glibc 2.24.
|
||||
// See https://sourceware.org/bugzilla/show_bug.cgi?id=19056 for more details.
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
||||
|
||||
dirent* p;
|
||||
int status;
|
||||
assert(dirp != NULL, "just checking");
|
||||
@ -111,6 +116,8 @@ inline struct dirent* os::readdir(DIR* dirp, dirent *dbuf)
|
||||
return NULL;
|
||||
} else
|
||||
return p;
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
}
|
||||
|
||||
inline int os::closedir(DIR *dirp) {
|
||||
|
@ -37,12 +37,6 @@ static pthread_t tc_owner = 0;
|
||||
static pthread_mutex_t tc_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
static int tc_count = 0;
|
||||
|
||||
void ThreadCritical::initialize() {
|
||||
}
|
||||
|
||||
void ThreadCritical::release() {
|
||||
}
|
||||
|
||||
ThreadCritical::ThreadCritical() {
|
||||
pthread_t self = pthread_self();
|
||||
if (self != tc_owner) {
|
||||
|
@ -1770,6 +1770,12 @@ int os::PlatformEvent::park(jlong millis) {
|
||||
|
||||
if (v == 0) { // Do this the hard way by blocking ...
|
||||
struct timespec abst;
|
||||
// We have to watch for overflow when converting millis to nanos,
|
||||
// but if millis is that large then we will end up limiting to
|
||||
// MAX_SECS anyway, so just do that here.
|
||||
if (millis / MILLIUNITS > MAX_SECS) {
|
||||
millis = jlong(MAX_SECS) * MILLIUNITS;
|
||||
}
|
||||
to_abstime(&abst, millis * (NANOUNITS / MILLIUNITS), false);
|
||||
|
||||
int ret = OS_TIMEOUT;
|
||||
|
@ -4076,6 +4076,7 @@ int_fnP_cond_tP os::Solaris::_cond_broadcast;
|
||||
int_fnP_cond_tP_i_vP os::Solaris::_cond_init;
|
||||
int_fnP_cond_tP os::Solaris::_cond_destroy;
|
||||
int os::Solaris::_cond_scope = USYNC_THREAD;
|
||||
bool os::Solaris::_synchronization_initialized;
|
||||
|
||||
void os::Solaris::synchronization_init() {
|
||||
if (UseLWPSynchronization) {
|
||||
@ -4125,6 +4126,7 @@ void os::Solaris::synchronization_init() {
|
||||
os::Solaris::set_cond_destroy(::cond_destroy);
|
||||
}
|
||||
}
|
||||
_synchronization_initialized = true;
|
||||
}
|
||||
|
||||
bool os::Solaris::liblgrp_init() {
|
||||
@ -4198,9 +4200,6 @@ void os::init(void) {
|
||||
dladdr1_func = CAST_TO_FN_PTR(dladdr1_func_type, dlsym(hdl, "dladdr1"));
|
||||
}
|
||||
|
||||
// (Solaris only) this switches to calls that actually do locking.
|
||||
ThreadCritical::initialize();
|
||||
|
||||
main_thread = thr_self();
|
||||
|
||||
// dynamic lookup of functions that may not be available in our lowest
|
||||
|
@ -65,6 +65,8 @@ class Solaris {
|
||||
static int_fnP_cond_tP _cond_destroy;
|
||||
static int _cond_scope;
|
||||
|
||||
static bool _synchronization_initialized;
|
||||
|
||||
typedef uintptr_t lgrp_cookie_t;
|
||||
typedef id_t lgrp_id_t;
|
||||
typedef int lgrp_rsrc_t;
|
||||
@ -227,6 +229,8 @@ class Solaris {
|
||||
static void set_cond_destroy(int_fnP_cond_tP func) { _cond_destroy = func; }
|
||||
static void set_cond_scope(int scope) { _cond_scope = scope; }
|
||||
|
||||
static bool synchronization_initialized() { return _synchronization_initialized; }
|
||||
|
||||
static void set_lgrp_home(lgrp_home_func_t func) { _lgrp_home = func; }
|
||||
static void set_lgrp_init(lgrp_init_func_t func) { _lgrp_init = func; }
|
||||
static void set_lgrp_fini(lgrp_fini_func_t func) { _lgrp_fini = func; }
|
||||
|
@ -42,10 +42,9 @@
|
||||
static mutex_t global_mut;
|
||||
static thread_t global_mut_owner = -1;
|
||||
static int global_mut_count = 0;
|
||||
static bool initialized = false;
|
||||
|
||||
ThreadCritical::ThreadCritical() {
|
||||
if (initialized) {
|
||||
if (os::Solaris::synchronization_initialized()) {
|
||||
thread_t owner = thr_self();
|
||||
if (global_mut_owner != owner) {
|
||||
if (os::Solaris::mutex_lock(&global_mut))
|
||||
@ -62,7 +61,7 @@ ThreadCritical::ThreadCritical() {
|
||||
}
|
||||
|
||||
ThreadCritical::~ThreadCritical() {
|
||||
if (initialized) {
|
||||
if (os::Solaris::synchronization_initialized()) {
|
||||
assert(global_mut_owner == thr_self(), "must have correct owner");
|
||||
assert(global_mut_count > 0, "must have correct count");
|
||||
--global_mut_count;
|
||||
@ -75,12 +74,3 @@ ThreadCritical::~ThreadCritical() {
|
||||
assert (Threads::number_of_threads() == 0, "valid only during initialization");
|
||||
}
|
||||
}
|
||||
|
||||
void ThreadCritical::initialize() {
|
||||
// This method is called at the end of os::init(). Until
|
||||
// then, we don't do real locking.
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
void ThreadCritical::release() {
|
||||
}
|
||||
|
@ -428,7 +428,7 @@ static unsigned __stdcall thread_native_entry(Thread* thread) {
|
||||
// When the VMThread gets here, the main thread may have already exited
|
||||
// which frees the CodeHeap containing the Atomic::add code
|
||||
if (thread != VMThread::vm_thread() && VMThread::vm_thread() != NULL) {
|
||||
Atomic::dec_ptr((intptr_t*)&os::win32::_os_thread_count);
|
||||
Atomic::dec(&os::win32::_os_thread_count);
|
||||
}
|
||||
|
||||
// If a thread has not deleted itself ("delete this") as part of its
|
||||
@ -634,7 +634,7 @@ bool os::create_thread(Thread* thread, ThreadType thr_type,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Atomic::inc_ptr((intptr_t*)&os::win32::_os_thread_count);
|
||||
Atomic::inc(&os::win32::_os_thread_count);
|
||||
|
||||
// Store info on the Win32 thread into the OSThread
|
||||
osthread->set_thread_handle(thread_handle);
|
||||
|
@ -51,16 +51,6 @@ static DWORD lock_owner = -1;
|
||||
// and found them ~30 times slower than the critical region code.
|
||||
//
|
||||
|
||||
void ThreadCritical::initialize() {
|
||||
}
|
||||
|
||||
void ThreadCritical::release() {
|
||||
assert(lock_owner == -1, "Mutex being deleted while owned.");
|
||||
assert(lock_count == -1, "Mutex being deleted while recursively locked");
|
||||
assert(lock_event != NULL, "Sanity check");
|
||||
CloseHandle(lock_event);
|
||||
}
|
||||
|
||||
ThreadCritical::ThreadCritical() {
|
||||
DWORD current_thread = GetCurrentThreadId();
|
||||
|
||||
|
@ -34,22 +34,6 @@
|
||||
|
||||
// Implementation of class atomic
|
||||
|
||||
inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
|
||||
|
||||
inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
|
||||
|
||||
inline jlong Atomic::load(const volatile jlong* src) { return *src; }
|
||||
|
||||
//
|
||||
// machine barrier instructions:
|
||||
//
|
||||
@ -148,90 +132,15 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
inline void Atomic::inc (volatile jint* dest) {
|
||||
|
||||
unsigned int temp;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
strasm_nobarrier
|
||||
"1: lwarx %0, 0, %2 \n"
|
||||
" addic %0, %0, 1 \n"
|
||||
" stwcx. %0, 0, %2 \n"
|
||||
" bne- 1b \n"
|
||||
strasm_nobarrier
|
||||
: /*%0*/"=&r" (temp), "=m" (*dest)
|
||||
: /*%2*/"r" (dest), "m" (*dest)
|
||||
: "cc" strasm_nobarrier_clobber_memory);
|
||||
|
||||
}
|
||||
|
||||
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
|
||||
|
||||
long temp;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
strasm_nobarrier
|
||||
"1: ldarx %0, 0, %2 \n"
|
||||
" addic %0, %0, 1 \n"
|
||||
" stdcx. %0, 0, %2 \n"
|
||||
" bne- 1b \n"
|
||||
strasm_nobarrier
|
||||
: /*%0*/"=&r" (temp), "=m" (*dest)
|
||||
: /*%2*/"r" (dest), "m" (*dest)
|
||||
: "cc" strasm_nobarrier_clobber_memory);
|
||||
|
||||
}
|
||||
|
||||
inline void Atomic::inc_ptr(volatile void* dest) {
|
||||
inc_ptr((volatile intptr_t*)dest);
|
||||
}
|
||||
|
||||
|
||||
inline void Atomic::dec (volatile jint* dest) {
|
||||
|
||||
unsigned int temp;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
strasm_nobarrier
|
||||
"1: lwarx %0, 0, %2 \n"
|
||||
" addic %0, %0, -1 \n"
|
||||
" stwcx. %0, 0, %2 \n"
|
||||
" bne- 1b \n"
|
||||
strasm_nobarrier
|
||||
: /*%0*/"=&r" (temp), "=m" (*dest)
|
||||
: /*%2*/"r" (dest), "m" (*dest)
|
||||
: "cc" strasm_nobarrier_clobber_memory);
|
||||
|
||||
}
|
||||
|
||||
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
|
||||
|
||||
long temp;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
strasm_nobarrier
|
||||
"1: ldarx %0, 0, %2 \n"
|
||||
" addic %0, %0, -1 \n"
|
||||
" stdcx. %0, 0, %2 \n"
|
||||
" bne- 1b \n"
|
||||
strasm_nobarrier
|
||||
: /*%0*/"=&r" (temp), "=m" (*dest)
|
||||
: /*%2*/"r" (dest), "m" (*dest)
|
||||
: "cc" strasm_nobarrier_clobber_memory);
|
||||
|
||||
}
|
||||
|
||||
inline void Atomic::dec_ptr(volatile void* dest) {
|
||||
dec_ptr((volatile intptr_t*)dest);
|
||||
}
|
||||
|
||||
inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
|
||||
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(4 == sizeof(T));
|
||||
// Note that xchg_ptr doesn't necessarily do an acquire
|
||||
// (see synchronizer.cpp).
|
||||
|
||||
unsigned int old_value;
|
||||
T old_value;
|
||||
const uint64_t zero = 0;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
@ -259,15 +168,18 @@ inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
|
||||
"memory"
|
||||
);
|
||||
|
||||
return (jint) old_value;
|
||||
return old_value;
|
||||
}
|
||||
|
||||
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
|
||||
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
// Note that xchg_ptr doesn't necessarily do an acquire
|
||||
// (see synchronizer.cpp).
|
||||
|
||||
long old_value;
|
||||
T old_value;
|
||||
const uint64_t zero = 0;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
@ -295,11 +207,7 @@ inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* des
|
||||
"memory"
|
||||
);
|
||||
|
||||
return (intptr_t) old_value;
|
||||
}
|
||||
|
||||
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
|
||||
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
|
||||
return old_value;
|
||||
}
|
||||
|
||||
inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {
|
||||
|
@ -78,16 +78,17 @@ inline void OrderAccess::acquire() { inlasm_lwsync(); }
|
||||
inline void OrderAccess::release() { inlasm_lwsync(); }
|
||||
inline void OrderAccess::fence() { inlasm_sync(); }
|
||||
|
||||
template<> inline jbyte OrderAccess::specialized_load_acquire<jbyte> (const volatile jbyte* p) { register jbyte t = load(p); inlasm_acquire_reg(t); return t; }
|
||||
template<> inline jshort OrderAccess::specialized_load_acquire<jshort>(const volatile jshort* p) { register jshort t = load(p); inlasm_acquire_reg(t); return t; }
|
||||
template<> inline jint OrderAccess::specialized_load_acquire<jint> (const volatile jint* p) { register jint t = load(p); inlasm_acquire_reg(t); return t; }
|
||||
template<> inline jlong OrderAccess::specialized_load_acquire<jlong> (const volatile jlong* p) { register jlong t = load(p); inlasm_acquire_reg(t); return t; }
|
||||
template<size_t byte_size>
|
||||
struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
T operator()(const volatile T* p) const { register T t = Atomic::load(p); inlasm_acquire_reg(t); return t; }
|
||||
};
|
||||
|
||||
#undef inlasm_sync
|
||||
#undef inlasm_lwsync
|
||||
#undef inlasm_eieio
|
||||
#undef inlasm_isync
|
||||
|
||||
#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
|
||||
|
||||
#endif // OS_CPU_AIX_OJDKPPC_VM_ORDERACCESS_AIX_PPC_INLINE_HPP
|
||||
|
@ -27,19 +27,6 @@
|
||||
|
||||
// Implementation of class atomic
|
||||
|
||||
inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
|
||||
|
||||
inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
|
||||
|
||||
|
||||
template<size_t byte_size>
|
||||
struct Atomic::PlatformAdd
|
||||
: Atomic::FetchAndAdd<Atomic::PlatformAdd<byte_size> >
|
||||
@ -61,25 +48,11 @@ inline D Atomic::PlatformAdd<4>::fetch_and_add(I add_value, D volatile* dest) co
|
||||
return old_value;
|
||||
}
|
||||
|
||||
inline void Atomic::inc (volatile jint* dest) {
|
||||
__asm__ volatile ( "lock addl $1,(%0)" :
|
||||
: "r" (dest) : "cc", "memory");
|
||||
}
|
||||
|
||||
inline void Atomic::inc_ptr(volatile void* dest) {
|
||||
inc_ptr((volatile intptr_t*)dest);
|
||||
}
|
||||
|
||||
inline void Atomic::dec (volatile jint* dest) {
|
||||
__asm__ volatile ( "lock subl $1,(%0)" :
|
||||
: "r" (dest) : "cc", "memory");
|
||||
}
|
||||
|
||||
inline void Atomic::dec_ptr(volatile void* dest) {
|
||||
dec_ptr((volatile intptr_t*)dest);
|
||||
}
|
||||
|
||||
inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(4 == sizeof(T));
|
||||
__asm__ volatile ( "xchgl (%2),%0"
|
||||
: "=r" (exchange_value)
|
||||
: "0" (exchange_value), "r" (dest)
|
||||
@ -87,10 +60,6 @@ inline jint Atomic::xchg (jint exchange_value, volatile jint* des
|
||||
return exchange_value;
|
||||
}
|
||||
|
||||
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
|
||||
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
|
||||
@ -120,9 +89,6 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
|
||||
}
|
||||
|
||||
#ifdef AMD64
|
||||
inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
|
||||
|
||||
template<>
|
||||
template<typename I, typename D>
|
||||
inline D Atomic::PlatformAdd<8>::fetch_and_add(I add_value, D volatile* dest) const {
|
||||
@ -136,21 +102,11 @@ inline D Atomic::PlatformAdd<8>::fetch_and_add(I add_value, D volatile* dest) co
|
||||
return old_value;
|
||||
}
|
||||
|
||||
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
|
||||
__asm__ __volatile__ ( "lock addq $1,(%0)"
|
||||
:
|
||||
: "r" (dest)
|
||||
: "cc", "memory");
|
||||
}
|
||||
|
||||
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
|
||||
__asm__ __volatile__ ( "lock subq $1,(%0)"
|
||||
:
|
||||
: "r" (dest)
|
||||
: "cc", "memory");
|
||||
}
|
||||
|
||||
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
__asm__ __volatile__ ("xchgq (%2),%0"
|
||||
: "=r" (exchange_value)
|
||||
: "0" (exchange_value), "r" (dest)
|
||||
@ -172,22 +128,8 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
|
||||
return exchange_value;
|
||||
}
|
||||
|
||||
inline jlong Atomic::load(const volatile jlong* src) { return *src; }
|
||||
|
||||
#else // !AMD64
|
||||
|
||||
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
|
||||
inc((volatile jint*)dest);
|
||||
}
|
||||
|
||||
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
|
||||
dec((volatile jint*)dest);
|
||||
}
|
||||
|
||||
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
|
||||
return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
// defined in bsd_x86.s
|
||||
jlong _Atomic_cmpxchg_long(jlong, volatile jlong*, jlong, bool);
|
||||
@ -204,18 +146,21 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
|
||||
return cmpxchg_using_helper<jlong>(_Atomic_cmpxchg_long, exchange_value, dest, compare_value);
|
||||
}
|
||||
|
||||
inline jlong Atomic::load(const volatile jlong* src) {
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
volatile jlong dest;
|
||||
_Atomic_move_long(src, &dest);
|
||||
return dest;
|
||||
_Atomic_move_long(reinterpret_cast<const volatile jlong*>(src), reinterpret_cast<volatile jlong*>(&dest));
|
||||
return PrimitiveConversions::cast<T>(dest);
|
||||
}
|
||||
|
||||
inline void Atomic::store(jlong store_value, jlong* dest) {
|
||||
_Atomic_move_long((volatile jlong*)&store_value, (volatile jlong*)dest);
|
||||
}
|
||||
|
||||
inline void Atomic::store(jlong store_value, volatile jlong* dest) {
|
||||
_Atomic_move_long((volatile jlong*)&store_value, dest);
|
||||
template<>
|
||||
template<typename T>
|
||||
inline void Atomic::PlatformStore<8>::operator()(T store_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
_Atomic_move_long(reinterpret_cast<const volatile jlong*>(&store_value), reinterpret_cast<volatile jlong*>(dest));
|
||||
}
|
||||
|
||||
#endif // AMD64
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -64,46 +64,57 @@ inline void OrderAccess::fence() {
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void OrderAccess::specialized_release_store_fence<jbyte> (volatile jbyte* p, jbyte v) {
|
||||
__asm__ volatile ( "xchgb (%2),%0"
|
||||
: "=q" (v)
|
||||
: "0" (v), "r" (p)
|
||||
: "memory");
|
||||
}
|
||||
struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
void operator()(T v, volatile T* p) const {
|
||||
__asm__ volatile ( "xchgb (%2),%0"
|
||||
: "=q" (v)
|
||||
: "0" (v), "r" (p)
|
||||
: "memory");
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
inline void OrderAccess::specialized_release_store_fence<jshort>(volatile jshort* p, jshort v) {
|
||||
__asm__ volatile ( "xchgw (%2),%0"
|
||||
: "=r" (v)
|
||||
: "0" (v), "r" (p)
|
||||
: "memory");
|
||||
}
|
||||
struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
void operator()(T v, volatile T* p) const {
|
||||
__asm__ volatile ( "xchgw (%2),%0"
|
||||
: "=r" (v)
|
||||
: "0" (v), "r" (p)
|
||||
: "memory");
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
inline void OrderAccess::specialized_release_store_fence<jint> (volatile jint* p, jint v) {
|
||||
__asm__ volatile ( "xchgl (%2),%0"
|
||||
: "=r" (v)
|
||||
: "0" (v), "r" (p)
|
||||
: "memory");
|
||||
}
|
||||
struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
void operator()(T v, volatile T* p) const {
|
||||
__asm__ volatile ( "xchgl (%2),%0"
|
||||
: "=r" (v)
|
||||
: "0" (v), "r" (p)
|
||||
: "memory");
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef AMD64
|
||||
template<>
|
||||
inline void OrderAccess::specialized_release_store_fence<jlong> (volatile jlong* p, jlong v) {
|
||||
__asm__ volatile ( "xchgq (%2), %0"
|
||||
: "=r" (v)
|
||||
: "0" (v), "r" (p)
|
||||
: "memory");
|
||||
}
|
||||
struct OrderAccess::PlatformOrderedStore<8, RELEASE_X_FENCE>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
void operator()(T v, volatile T* p) const {
|
||||
__asm__ volatile ( "xchgq (%2), %0"
|
||||
: "=r" (v)
|
||||
: "0" (v), "r" (p)
|
||||
: "memory");
|
||||
}
|
||||
};
|
||||
#endif // AMD64
|
||||
|
||||
template<>
|
||||
inline void OrderAccess::specialized_release_store_fence<jfloat> (volatile jfloat* p, jfloat v) {
|
||||
release_store_fence((volatile jint*)p, jint_cast(v));
|
||||
}
|
||||
template<>
|
||||
inline void OrderAccess::specialized_release_store_fence<jdouble>(volatile jdouble* p, jdouble v) {
|
||||
release_store_fence((volatile jlong*)p, jlong_cast(v));
|
||||
}
|
||||
|
||||
#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
|
||||
|
||||
#endif // OS_CPU_BSD_X86_VM_ORDERACCESS_BSD_X86_INLINE_HPP
|
||||
|
@ -87,7 +87,7 @@ static inline int m68k_add_and_fetch(int add_value, volatile int *ptr) {
|
||||
|
||||
/* Atomically write VALUE into `*PTR' and returns the previous
|
||||
contents of `*PTR'. */
|
||||
static inline int m68k_lock_test_and_set(volatile int *ptr, int newval) {
|
||||
static inline int m68k_lock_test_and_set(int newval, volatile int *ptr) {
|
||||
for (;;) {
|
||||
// Loop until success.
|
||||
int prev = *ptr;
|
||||
@ -148,7 +148,7 @@ static inline int arm_add_and_fetch(int add_value, volatile int *ptr) {
|
||||
|
||||
/* Atomically write VALUE into `*PTR' and returns the previous
|
||||
contents of `*PTR'. */
|
||||
static inline int arm_lock_test_and_set(volatile int *ptr, int newval) {
|
||||
static inline int arm_lock_test_and_set(int newval, volatile int *ptr) {
|
||||
for (;;) {
|
||||
// Loop until a __kernel_cmpxchg succeeds.
|
||||
int prev = *ptr;
|
||||
@ -159,20 +159,6 @@ static inline int arm_lock_test_and_set(volatile int *ptr, int newval) {
|
||||
}
|
||||
#endif // ARM
|
||||
|
||||
inline void Atomic::store(jint store_value, volatile jint* dest) {
|
||||
#if !defined(ARM) && !defined(M68K)
|
||||
__sync_synchronize();
|
||||
#endif
|
||||
*dest = store_value;
|
||||
}
|
||||
|
||||
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) {
|
||||
#if !defined(ARM) && !defined(M68K)
|
||||
__sync_synchronize();
|
||||
#endif
|
||||
*dest = store_value;
|
||||
}
|
||||
|
||||
template<size_t byte_size>
|
||||
struct Atomic::PlatformAdd
|
||||
: Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
|
||||
@ -207,42 +193,22 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
|
||||
return __sync_add_and_fetch(dest, add_value);
|
||||
}
|
||||
|
||||
inline void Atomic::inc(volatile jint* dest) {
|
||||
add(1, dest);
|
||||
}
|
||||
|
||||
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
|
||||
add_ptr(1, dest);
|
||||
}
|
||||
|
||||
inline void Atomic::inc_ptr(volatile void* dest) {
|
||||
add_ptr(1, dest);
|
||||
}
|
||||
|
||||
inline void Atomic::dec(volatile jint* dest) {
|
||||
add(-1, dest);
|
||||
}
|
||||
|
||||
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
|
||||
add_ptr(-1, dest);
|
||||
}
|
||||
|
||||
inline void Atomic::dec_ptr(volatile void* dest) {
|
||||
add_ptr(-1, dest);
|
||||
}
|
||||
|
||||
inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(4 == sizeof(T));
|
||||
#ifdef ARM
|
||||
return arm_lock_test_and_set(dest, exchange_value);
|
||||
return xchg_using_helper<int>(arm_lock_test_and_set, exchange_value, dest);
|
||||
#else
|
||||
#ifdef M68K
|
||||
return m68k_lock_test_and_set(dest, exchange_value);
|
||||
return xchg_using_helper<int>(m68k_lock_test_and_set, exchange_value, dest);
|
||||
#else
|
||||
// __sync_lock_test_and_set is a bizarrely named atomic exchange
|
||||
// operation. Note that some platforms only support this with the
|
||||
// limitation that the only valid value to store is the immediate
|
||||
// constant 1. There is a test for this in JNI_CreateJavaVM().
|
||||
jint result = __sync_lock_test_and_set (dest, exchange_value);
|
||||
T result = __sync_lock_test_and_set (dest, exchange_value);
|
||||
// All atomic operations are expected to be full memory barriers
|
||||
// (see atomic.hpp). However, __sync_lock_test_and_set is not
|
||||
// a full memory barrier, but an acquire barrier. Hence, this added
|
||||
@ -253,24 +219,14 @@ inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
|
||||
#endif // ARM
|
||||
}
|
||||
|
||||
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value,
|
||||
volatile intptr_t* dest) {
|
||||
#ifdef ARM
|
||||
return arm_lock_test_and_set(dest, exchange_value);
|
||||
#else
|
||||
#ifdef M68K
|
||||
return m68k_lock_test_and_set(dest, exchange_value);
|
||||
#else
|
||||
intptr_t result = __sync_lock_test_and_set (dest, exchange_value);
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
T result = __sync_lock_test_and_set (dest, exchange_value);
|
||||
__sync_synchronize();
|
||||
return result;
|
||||
#endif // M68K
|
||||
#endif // ARM
|
||||
}
|
||||
|
||||
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
|
||||
return (void *) xchg_ptr((intptr_t) exchange_value,
|
||||
(volatile intptr_t*) dest);
|
||||
}
|
||||
|
||||
// No direct support for cmpxchg of bytes; emulate using int.
|
||||
@ -305,18 +261,21 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
|
||||
return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
|
||||
}
|
||||
|
||||
inline jlong Atomic::load(const volatile jlong* src) {
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
volatile jlong dest;
|
||||
os::atomic_copy64(src, &dest);
|
||||
return dest;
|
||||
os::atomic_copy64(reinterpret_cast<const volatile jlong*>(src), reinterpret_cast<volatile jlong*>(&dest));
|
||||
return PrimitiveConversions::cast<T>(dest);
|
||||
}
|
||||
|
||||
inline void Atomic::store(jlong store_value, jlong* dest) {
|
||||
os::atomic_copy64((volatile jlong*)&store_value, (volatile jlong*)dest);
|
||||
}
|
||||
|
||||
inline void Atomic::store(jlong store_value, volatile jlong* dest) {
|
||||
os::atomic_copy64((volatile jlong*)&store_value, dest);
|
||||
template<>
|
||||
template<typename T>
|
||||
inline void Atomic::PlatformStore<8>::operator()(T store_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
os::atomic_copy64(reinterpret_cast<const volatile jlong*>(&store_value), reinterpret_cast<volatile jlong*>(dest));
|
||||
}
|
||||
|
||||
#endif // OS_CPU_BSD_ZERO_VM_ATOMIC_BSD_ZERO_HPP
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2007, 2008, 2009 Red Hat, Inc.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -74,6 +74,4 @@ inline void OrderAccess::acquire() { LIGHT_MEM_BARRIER; }
|
||||
inline void OrderAccess::release() { LIGHT_MEM_BARRIER; }
|
||||
inline void OrderAccess::fence() { FULL_MEM_BARRIER; }
|
||||
|
||||
#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
|
||||
|
||||
#endif // OS_CPU_BSD_ZERO_VM_ORDERACCESS_BSD_ZERO_INLINE_HPP
|
||||
|
@ -34,19 +34,6 @@
|
||||
#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE);
|
||||
#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
|
||||
|
||||
inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
|
||||
|
||||
inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
|
||||
|
||||
|
||||
template<size_t byte_size>
|
||||
struct Atomic::PlatformAdd
|
||||
: Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
|
||||
@ -57,39 +44,16 @@ struct Atomic::PlatformAdd
|
||||
}
|
||||
};
|
||||
|
||||
inline void Atomic::inc(volatile jint* dest)
|
||||
{
|
||||
add(1, dest);
|
||||
}
|
||||
|
||||
inline void Atomic::inc_ptr(volatile void* dest)
|
||||
{
|
||||
add_ptr(1, dest);
|
||||
}
|
||||
|
||||
inline void Atomic::dec (volatile jint* dest)
|
||||
{
|
||||
add(-1, dest);
|
||||
}
|
||||
|
||||
inline void Atomic::dec_ptr(volatile void* dest)
|
||||
{
|
||||
add_ptr(-1, dest);
|
||||
}
|
||||
|
||||
inline jint Atomic::xchg (jint exchange_value, volatile jint* dest)
|
||||
{
|
||||
jint res = __sync_lock_test_and_set (dest, exchange_value);
|
||||
template<size_t byte_size>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformXchg<byte_size>::operator()(T exchange_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(byte_size == sizeof(T));
|
||||
T res = __sync_lock_test_and_set(dest, exchange_value);
|
||||
FULL_MEM_BARRIER;
|
||||
return res;
|
||||
}
|
||||
|
||||
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest)
|
||||
{
|
||||
return (void *) xchg_ptr((intptr_t) exchange_value,
|
||||
(volatile intptr_t*) dest);
|
||||
}
|
||||
|
||||
template<size_t byte_size>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T exchange_value,
|
||||
@ -107,26 +71,4 @@ inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T exchange_value,
|
||||
}
|
||||
}
|
||||
|
||||
inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
|
||||
|
||||
inline void Atomic::inc_ptr(volatile intptr_t* dest)
|
||||
{
|
||||
add_ptr(1, dest);
|
||||
}
|
||||
|
||||
inline void Atomic::dec_ptr(volatile intptr_t* dest)
|
||||
{
|
||||
add_ptr(-1, dest);
|
||||
}
|
||||
|
||||
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest)
|
||||
{
|
||||
intptr_t res = __sync_lock_test_and_set (dest, exchange_value);
|
||||
FULL_MEM_BARRIER;
|
||||
return res;
|
||||
}
|
||||
|
||||
inline jlong Atomic::load(const volatile jlong* src) { return *src; }
|
||||
|
||||
#endif // OS_CPU_LINUX_AARCH64_VM_ATOMIC_LINUX_AARCH64_HPP
|
||||
|
@ -50,93 +50,28 @@ inline void OrderAccess::fence() {
|
||||
FULL_MEM_BARRIER;
|
||||
}
|
||||
|
||||
inline jbyte OrderAccess::load_acquire(const volatile jbyte* p)
|
||||
{ jbyte data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
|
||||
inline jshort OrderAccess::load_acquire(const volatile jshort* p)
|
||||
{ jshort data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
|
||||
inline jint OrderAccess::load_acquire(const volatile jint* p)
|
||||
{ jint data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
|
||||
inline jlong OrderAccess::load_acquire(const volatile jlong* p)
|
||||
{ jlong data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
|
||||
inline jubyte OrderAccess::load_acquire(const volatile jubyte* p)
|
||||
{ jubyte data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
|
||||
inline jushort OrderAccess::load_acquire(const volatile jushort* p)
|
||||
{ jushort data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
|
||||
inline juint OrderAccess::load_acquire(const volatile juint* p)
|
||||
{ juint data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
|
||||
inline julong OrderAccess::load_acquire(const volatile julong* p)
|
||||
{ julong data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
|
||||
inline jfloat OrderAccess::load_acquire(const volatile jfloat* p)
|
||||
{ jfloat data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
|
||||
inline jdouble OrderAccess::load_acquire(const volatile jdouble* p)
|
||||
{ jdouble data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
|
||||
inline intptr_t OrderAccess::load_ptr_acquire(const volatile intptr_t* p)
|
||||
{ intptr_t data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
|
||||
inline void* OrderAccess::load_ptr_acquire(const volatile void* p)
|
||||
{ void* data; __atomic_load((void* const volatile *)p, &data, __ATOMIC_ACQUIRE); return data; }
|
||||
template<size_t byte_size>
|
||||
struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
T operator()(const volatile T* p) const { T data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
|
||||
};
|
||||
|
||||
inline void OrderAccess::release_store(volatile jbyte* p, jbyte v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
|
||||
inline void OrderAccess::release_store(volatile jshort* p, jshort v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
|
||||
inline void OrderAccess::release_store(volatile jint* p, jint v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
|
||||
inline void OrderAccess::release_store(volatile jlong* p, jlong v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
|
||||
inline void OrderAccess::release_store(volatile jubyte* p, jubyte v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
|
||||
inline void OrderAccess::release_store(volatile jushort* p, jushort v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
|
||||
inline void OrderAccess::release_store(volatile juint* p, juint v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
|
||||
inline void OrderAccess::release_store(volatile julong* p, julong v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
|
||||
inline void OrderAccess::release_store(volatile jfloat* p, jfloat v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
|
||||
inline void OrderAccess::release_store(volatile jdouble* p, jdouble v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
|
||||
inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
|
||||
inline void OrderAccess::release_store_ptr(volatile void* p, void* v)
|
||||
{ __atomic_store((void* volatile *)p, &v, __ATOMIC_RELEASE); }
|
||||
template<size_t byte_size>
|
||||
struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
void operator()(T v, volatile T* p) const { __atomic_store(p, &v, __ATOMIC_RELEASE); }
|
||||
};
|
||||
|
||||
inline void OrderAccess::store_fence(jbyte* p, jbyte v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
|
||||
inline void OrderAccess::store_fence(jshort* p, jshort v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
|
||||
inline void OrderAccess::store_fence(jint* p, jint v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
|
||||
inline void OrderAccess::store_fence(jlong* p, jlong v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
|
||||
inline void OrderAccess::store_fence(jubyte* p, jubyte v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
|
||||
inline void OrderAccess::store_fence(jushort* p, jushort v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
|
||||
inline void OrderAccess::store_fence(juint* p, juint v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
|
||||
inline void OrderAccess::store_fence(julong* p, julong v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
|
||||
inline void OrderAccess::store_fence(jfloat* p, jfloat v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
|
||||
inline void OrderAccess::store_fence(jdouble* p, jdouble v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
|
||||
inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
|
||||
inline void OrderAccess::store_ptr_fence(void** p, void* v)
|
||||
{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
|
||||
|
||||
inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { release_store(p, v); fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { release_store(p, v); fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { release_store(p, v); fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { release_store(p, v); fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { release_store(p, v); fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { release_store(p, v); fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { release_store(p, v); fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { release_store(p, v); fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { release_store(p, v); fence(); }
|
||||
inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store(p, v); fence(); }
|
||||
|
||||
inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { release_store_ptr(p, v); fence(); }
|
||||
inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { release_store_ptr(p, v); fence(); }
|
||||
template<size_t byte_size>
|
||||
struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
void operator()(T v, volatile T* p) const { release_store(p, v); fence(); }
|
||||
};
|
||||
|
||||
#endif // OS_CPU_LINUX_AARCH64_VM_ORDERACCESS_LINUX_AARCH64_INLINE_HPP
|
||||
|
@ -44,39 +44,24 @@
|
||||
* kernel source or kernel_user_helpers.txt in Linux Doc.
|
||||
*/
|
||||
|
||||
inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
|
||||
#ifndef AARCH64
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
return PrimitiveConversions::cast<T>(
|
||||
(*os::atomic_load_long_func)(reinterpret_cast<const volatile jlong*>(src)));
|
||||
}
|
||||
|
||||
inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
|
||||
|
||||
inline jlong Atomic::load (const volatile jlong* src) {
|
||||
assert(((intx)src & (sizeof(jlong)-1)) == 0, "Atomic load jlong mis-aligned");
|
||||
#ifdef AARCH64
|
||||
return *src;
|
||||
#else
|
||||
return (*os::atomic_load_long_func)(src);
|
||||
template<>
|
||||
template<typename T>
|
||||
inline void Atomic::PlatformStore<8>::operator()(T store_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
(*os::atomic_store_long_func)(
|
||||
PrimitiveConversions::cast<jlong>(store_value), reinterpret_cast<volatile jlong*>(dest));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void Atomic::store (jlong value, volatile jlong* dest) {
|
||||
assert(((intx)dest & (sizeof(jlong)-1)) == 0, "Atomic store jlong mis-aligned");
|
||||
#ifdef AARCH64
|
||||
*dest = value;
|
||||
#else
|
||||
(*os::atomic_store_long_func)(value, dest);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void Atomic::store (jlong value, jlong* dest) {
|
||||
store(value, (volatile jlong*)dest);
|
||||
}
|
||||
|
||||
// As per atomic.hpp all read-modify-write operations have to provide two-way
|
||||
// barriers semantics. For AARCH64 we are using load-acquire-with-reservation and
|
||||
@ -122,14 +107,6 @@ inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) co
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void Atomic::inc(volatile jint* dest) {
|
||||
Atomic::add(1, (volatile jint *)dest);
|
||||
}
|
||||
|
||||
inline void Atomic::dec(volatile jint* dest) {
|
||||
Atomic::add(-1, (volatile jint *)dest);
|
||||
}
|
||||
|
||||
#ifdef AARCH64
|
||||
template<>
|
||||
template<typename I, typename D>
|
||||
@ -149,28 +126,15 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
|
||||
: "memory");
|
||||
return val;
|
||||
}
|
||||
#endif // AARCH64
|
||||
#endif
|
||||
|
||||
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
|
||||
Atomic::add_ptr(1, dest);
|
||||
}
|
||||
|
||||
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
|
||||
Atomic::add_ptr(-1, dest);
|
||||
}
|
||||
|
||||
inline void Atomic::inc_ptr(volatile void* dest) {
|
||||
inc_ptr((volatile intptr_t*)dest);
|
||||
}
|
||||
|
||||
inline void Atomic::dec_ptr(volatile void* dest) {
|
||||
dec_ptr((volatile intptr_t*)dest);
|
||||
}
|
||||
|
||||
|
||||
inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(4 == sizeof(T));
|
||||
#ifdef AARCH64
|
||||
jint old_val;
|
||||
T old_val;
|
||||
int tmp;
|
||||
__asm__ volatile(
|
||||
"1:\n\t"
|
||||
@ -182,13 +146,17 @@ inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
|
||||
: "memory");
|
||||
return old_val;
|
||||
#else
|
||||
return (*os::atomic_xchg_func)(exchange_value, dest);
|
||||
return xchg_using_helper<jint>(os::atomic_xchg_func, exchange_value, dest);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
|
||||
#ifdef AARCH64
|
||||
intptr_t old_val;
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
T old_val;
|
||||
int tmp;
|
||||
__asm__ volatile(
|
||||
"1:\n\t"
|
||||
@ -199,14 +167,8 @@ inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* des
|
||||
: [new_val] "r" (exchange_value), [dest] "r" (dest)
|
||||
: "memory");
|
||||
return old_val;
|
||||
#else
|
||||
return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
|
||||
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
|
||||
}
|
||||
#endif // AARCH64
|
||||
|
||||
// The memory_order parameter is ignored - we always provide the strongest/most-conservative ordering
|
||||
|
||||
|
@ -33,7 +33,6 @@
|
||||
// - we define the high level barriers below and use the general
|
||||
// implementation in orderAccess.inline.hpp, with customizations
|
||||
// on AARCH64 via the specialized_* template functions
|
||||
#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
|
||||
|
||||
// Memory Ordering on ARM is weak.
|
||||
//
|
||||
@ -131,91 +130,126 @@ inline void OrderAccess::fence() { dmb_sy(); }
|
||||
|
||||
#ifdef AARCH64
|
||||
|
||||
template<> inline jbyte OrderAccess::specialized_load_acquire<jbyte>(const volatile jbyte* p) {
|
||||
volatile jbyte result;
|
||||
__asm__ volatile(
|
||||
"ldarb %w[res], [%[ptr]]"
|
||||
: [res] "=&r" (result)
|
||||
: [ptr] "r" (p)
|
||||
: "memory");
|
||||
return result;
|
||||
}
|
||||
template<>
|
||||
struct OrderAccess::PlatformOrderedLoad<1, X_ACQUIRE>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
T operator()(const volatile T* p) const {
|
||||
volatile T result;
|
||||
__asm__ volatile(
|
||||
"ldarb %w[res], [%[ptr]]"
|
||||
: [res] "=&r" (result)
|
||||
: [ptr] "r" (p)
|
||||
: "memory");
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
template<> inline jshort OrderAccess::specialized_load_acquire<jshort>(const volatile jshort* p) {
|
||||
volatile jshort result;
|
||||
__asm__ volatile(
|
||||
"ldarh %w[res], [%[ptr]]"
|
||||
: [res] "=&r" (result)
|
||||
: [ptr] "r" (p)
|
||||
: "memory");
|
||||
return result;
|
||||
}
|
||||
template<>
|
||||
struct OrderAccess::PlatformOrderedLoad<2, X_ACQUIRE>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
T operator()(const volatile T* p) const {
|
||||
volatile T result;
|
||||
__asm__ volatile(
|
||||
"ldarh %w[res], [%[ptr]]"
|
||||
: [res] "=&r" (result)
|
||||
: [ptr] "r" (p)
|
||||
: "memory");
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
template<> inline jint OrderAccess::specialized_load_acquire<jint>(const volatile jint* p) {
|
||||
volatile jint result;
|
||||
__asm__ volatile(
|
||||
"ldar %w[res], [%[ptr]]"
|
||||
: [res] "=&r" (result)
|
||||
: [ptr] "r" (p)
|
||||
: "memory");
|
||||
return result;
|
||||
}
|
||||
template<>
|
||||
struct OrderAccess::PlatformOrderedLoad<4, X_ACQUIRE>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
T operator()(const volatile T* p) const {
|
||||
volatile T result;
|
||||
__asm__ volatile(
|
||||
"ldar %w[res], [%[ptr]]"
|
||||
: [res] "=&r" (result)
|
||||
: [ptr] "r" (p)
|
||||
: "memory");
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
template<> inline jfloat OrderAccess::specialized_load_acquire<jfloat>(const volatile jfloat* p) {
|
||||
return jfloat_cast(specialized_load_acquire((const volatile jint*)p));
|
||||
}
|
||||
template<>
|
||||
struct OrderAccess::PlatformOrderedLoad<8, X_ACQUIRE>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
T operator()(const volatile T* p) const {
|
||||
volatile T result;
|
||||
__asm__ volatile(
|
||||
"ldar %[res], [%[ptr]]"
|
||||
: [res] "=&r" (result)
|
||||
: [ptr] "r" (p)
|
||||
: "memory");
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
// This is implicit as jlong and intptr_t are both "long int"
|
||||
//template<> inline jlong OrderAccess::specialized_load_acquire(const volatile jlong* p) {
|
||||
// return (volatile jlong)specialized_load_acquire((const volatile intptr_t*)p);
|
||||
//}
|
||||
template<>
|
||||
struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
void operator()(T v, volatile T* p) const {
|
||||
__asm__ volatile(
|
||||
"stlrb %w[val], [%[ptr]]"
|
||||
:
|
||||
: [ptr] "r" (p), [val] "r" (v)
|
||||
: "memory");
|
||||
}
|
||||
};
|
||||
|
||||
template<> inline intptr_t OrderAccess::specialized_load_acquire<intptr_t>(const volatile intptr_t* p) {
|
||||
volatile intptr_t result;
|
||||
__asm__ volatile(
|
||||
"ldar %[res], [%[ptr]]"
|
||||
: [res] "=&r" (result)
|
||||
: [ptr] "r" (p)
|
||||
: "memory");
|
||||
return result;
|
||||
}
|
||||
template<>
|
||||
struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
void operator()(T v, volatile T* p) const {
|
||||
__asm__ volatile(
|
||||
"stlrh %w[val], [%[ptr]]"
|
||||
:
|
||||
: [ptr] "r" (p), [val] "r" (v)
|
||||
: "memory");
|
||||
}
|
||||
};
|
||||
|
||||
template<> inline jdouble OrderAccess::specialized_load_acquire<jdouble>(const volatile jdouble* p) {
|
||||
return jdouble_cast(specialized_load_acquire((const volatile intptr_t*)p));
|
||||
}
|
||||
template<>
|
||||
struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
void operator()(T v, volatile T* p) const {
|
||||
__asm__ volatile(
|
||||
"stlr %w[val], [%[ptr]]"
|
||||
:
|
||||
: [ptr] "r" (p), [val] "r" (v)
|
||||
: "memory");
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct OrderAccess::PlatformOrderedStore<8, RELEASE_X_FENCE>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
void operator()(T v, volatile T* p) const {
|
||||
__asm__ volatile(
|
||||
"stlr %[val], [%[ptr]]"
|
||||
:
|
||||
: [ptr] "r" (p), [val] "r" (v)
|
||||
: "memory");
|
||||
}
|
||||
};
|
||||
|
||||
template<> inline void OrderAccess::specialized_release_store<jbyte>(volatile jbyte* p, jbyte v) {
|
||||
__asm__ volatile(
|
||||
"stlrb %w[val], [%[ptr]]"
|
||||
:
|
||||
: [ptr] "r" (p), [val] "r" (v)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
template<> inline void OrderAccess::specialized_release_store<jshort>(volatile jshort* p, jshort v) {
|
||||
__asm__ volatile(
|
||||
"stlrh %w[val], [%[ptr]]"
|
||||
:
|
||||
: [ptr] "r" (p), [val] "r" (v)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
template<> inline void OrderAccess::specialized_release_store<jint>(volatile jint* p, jint v) {
|
||||
__asm__ volatile(
|
||||
"stlr %w[val], [%[ptr]]"
|
||||
:
|
||||
: [ptr] "r" (p), [val] "r" (v)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
template<> inline void OrderAccess::specialized_release_store<jlong>(volatile jlong* p, jlong v) {
|
||||
__asm__ volatile(
|
||||
"stlr %[val], [%[ptr]]"
|
||||
:
|
||||
: [ptr] "r" (p), [val] "r" (v)
|
||||
: "memory");
|
||||
}
|
||||
#endif // AARCH64
|
||||
|
||||
#endif // OS_CPU_LINUX_ARM_VM_ORDERACCESS_LINUX_ARM_INLINE_HPP
|
||||
|
@ -32,22 +32,6 @@
|
||||
|
||||
// Implementation of class atomic
|
||||
|
||||
inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
|
||||
|
||||
inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
|
||||
|
||||
inline jlong Atomic::load(const volatile jlong* src) { return *src; }
|
||||
|
||||
//
|
||||
// machine barrier instructions:
|
||||
//
|
||||
@ -146,90 +130,14 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
inline void Atomic::inc (volatile jint* dest) {
|
||||
|
||||
unsigned int temp;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
strasm_nobarrier
|
||||
"1: lwarx %0, 0, %2 \n"
|
||||
" addic %0, %0, 1 \n"
|
||||
" stwcx. %0, 0, %2 \n"
|
||||
" bne- 1b \n"
|
||||
strasm_nobarrier
|
||||
: /*%0*/"=&r" (temp), "=m" (*dest)
|
||||
: /*%2*/"r" (dest), "m" (*dest)
|
||||
: "cc" strasm_nobarrier_clobber_memory);
|
||||
|
||||
}
|
||||
|
||||
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
|
||||
|
||||
long temp;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
strasm_nobarrier
|
||||
"1: ldarx %0, 0, %2 \n"
|
||||
" addic %0, %0, 1 \n"
|
||||
" stdcx. %0, 0, %2 \n"
|
||||
" bne- 1b \n"
|
||||
strasm_nobarrier
|
||||
: /*%0*/"=&r" (temp), "=m" (*dest)
|
||||
: /*%2*/"r" (dest), "m" (*dest)
|
||||
: "cc" strasm_nobarrier_clobber_memory);
|
||||
|
||||
}
|
||||
|
||||
inline void Atomic::inc_ptr(volatile void* dest) {
|
||||
inc_ptr((volatile intptr_t*)dest);
|
||||
}
|
||||
|
||||
|
||||
inline void Atomic::dec (volatile jint* dest) {
|
||||
|
||||
unsigned int temp;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
strasm_nobarrier
|
||||
"1: lwarx %0, 0, %2 \n"
|
||||
" addic %0, %0, -1 \n"
|
||||
" stwcx. %0, 0, %2 \n"
|
||||
" bne- 1b \n"
|
||||
strasm_nobarrier
|
||||
: /*%0*/"=&r" (temp), "=m" (*dest)
|
||||
: /*%2*/"r" (dest), "m" (*dest)
|
||||
: "cc" strasm_nobarrier_clobber_memory);
|
||||
|
||||
}
|
||||
|
||||
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
|
||||
|
||||
long temp;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
strasm_nobarrier
|
||||
"1: ldarx %0, 0, %2 \n"
|
||||
" addic %0, %0, -1 \n"
|
||||
" stdcx. %0, 0, %2 \n"
|
||||
" bne- 1b \n"
|
||||
strasm_nobarrier
|
||||
: /*%0*/"=&r" (temp), "=m" (*dest)
|
||||
: /*%2*/"r" (dest), "m" (*dest)
|
||||
: "cc" strasm_nobarrier_clobber_memory);
|
||||
|
||||
}
|
||||
|
||||
inline void Atomic::dec_ptr(volatile void* dest) {
|
||||
dec_ptr((volatile intptr_t*)dest);
|
||||
}
|
||||
|
||||
inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
|
||||
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
|
||||
T volatile* dest) const {
|
||||
// Note that xchg_ptr doesn't necessarily do an acquire
|
||||
// (see synchronizer.cpp).
|
||||
|
||||
unsigned int old_value;
|
||||
T old_value;
|
||||
const uint64_t zero = 0;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
@ -257,15 +165,18 @@ inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
|
||||
"memory"
|
||||
);
|
||||
|
||||
return (jint) old_value;
|
||||
return old_value;
|
||||
}
|
||||
|
||||
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
|
||||
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
// Note that xchg_ptr doesn't necessarily do an acquire
|
||||
// (see synchronizer.cpp).
|
||||
|
||||
long old_value;
|
||||
T old_value;
|
||||
const uint64_t zero = 0;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
@ -293,11 +204,7 @@ inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* des
|
||||
"memory"
|
||||
);
|
||||
|
||||
return (intptr_t) old_value;
|
||||
}
|
||||
|
||||
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
|
||||
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
|
||||
return old_value;
|
||||
}
|
||||
|
||||
inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {
|
||||
|
@ -80,10 +80,14 @@ inline void OrderAccess::acquire() { inlasm_lwsync(); }
|
||||
inline void OrderAccess::release() { inlasm_lwsync(); }
|
||||
inline void OrderAccess::fence() { inlasm_sync(); }
|
||||
|
||||
template<> inline jbyte OrderAccess::specialized_load_acquire<jbyte> (const volatile jbyte* p) { register jbyte t = load(p); inlasm_acquire_reg(t); return t; }
|
||||
template<> inline jshort OrderAccess::specialized_load_acquire<jshort>(const volatile jshort* p) { register jshort t = load(p); inlasm_acquire_reg(t); return t; }
|
||||
template<> inline jint OrderAccess::specialized_load_acquire<jint> (const volatile jint* p) { register jint t = load(p); inlasm_acquire_reg(t); return t; }
|
||||
template<> inline jlong OrderAccess::specialized_load_acquire<jlong> (const volatile jlong* p) { register jlong t = load(p); inlasm_acquire_reg(t); return t; }
|
||||
|
||||
template<size_t byte_size>
|
||||
struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
T operator()(const volatile T* p) const { register T t = Atomic::load(p); inlasm_acquire_reg(t); return t; }
|
||||
};
|
||||
|
||||
#undef inlasm_sync
|
||||
#undef inlasm_lwsync
|
||||
@ -91,6 +95,4 @@ template<> inline jlong OrderAccess::specialized_load_acquire<jlong> (const vol
|
||||
#undef inlasm_isync
|
||||
#undef inlasm_acquire_reg
|
||||
|
||||
#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
|
||||
|
||||
#endif // OS_CPU_LINUX_PPC_VM_ORDERACCESS_LINUX_PPC_INLINE_HPP
|
||||
|
@ -53,20 +53,6 @@
|
||||
// is an integer multiple of the data length. Furthermore, all stores are ordered:
|
||||
// a store which occurs conceptually before another store becomes visible to other CPUs
|
||||
// before the other store becomes visible.
|
||||
inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
|
||||
|
||||
inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
|
||||
|
||||
|
||||
//------------
|
||||
// Atomic::add
|
||||
@ -192,219 +178,6 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I inc, D volatile* dest) const {
|
||||
}
|
||||
|
||||
|
||||
//------------
|
||||
// Atomic::inc
|
||||
//------------
|
||||
// These methods force the value in memory to be incremented (augmented by 1).
|
||||
// Both, memory value and increment, are treated as 32bit signed binary integers.
|
||||
// No overflow exceptions are recognized, and the condition code does not hold
|
||||
// information about the value in memory.
|
||||
//
|
||||
// The value in memory is updated by using a compare-and-swap instruction. The
|
||||
// instruction is retried as often as required.
|
||||
|
||||
inline void Atomic::inc(volatile jint* dest) {
|
||||
unsigned int old, upd;
|
||||
|
||||
if (VM_Version::has_LoadAndALUAtomicV1()) {
|
||||
// tty->print_cr("Atomic::inc called... dest @%p", dest);
|
||||
__asm__ __volatile__ (
|
||||
" LGHI 2,1 \n\t" // load increment
|
||||
" LA 3,%[mem] \n\t" // force data address into ARG2
|
||||
// " LAA %[upd],%[inc],%[mem] \n\t" // increment and get old value
|
||||
// " LAA 2,2,0(3) \n\t" // actually coded instruction
|
||||
" .byte 0xeb \n\t" // LAA main opcode
|
||||
" .byte 0x22 \n\t" // R1,R3
|
||||
" .byte 0x30 \n\t" // R2,disp1
|
||||
" .byte 0x00 \n\t" // disp2,disp3
|
||||
" .byte 0x00 \n\t" // disp4,disp5
|
||||
" .byte 0xf8 \n\t" // LAA minor opcode
|
||||
" AGHI 2,1 \n\t" // calc new value in register
|
||||
" LR %[upd],2 \n\t" // move to result register
|
||||
//---< outputs >---
|
||||
: [upd] "=&d" (upd) // write-only, updated counter value
|
||||
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
|
||||
//---< inputs >---
|
||||
:
|
||||
// : [inc] "a" (inc) // read-only.
|
||||
//---< clobbered >---
|
||||
: "cc", "r2", "r3", "memory"
|
||||
);
|
||||
} else {
|
||||
__asm__ __volatile__ (
|
||||
" LLGF %[old],%[mem] \n\t" // get old value
|
||||
"0: LA %[upd],1(,%[old]) \n\t" // calc result
|
||||
" CS %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
|
||||
" JNE 0b \n\t" // no success? -> retry
|
||||
//---< outputs >---
|
||||
: [old] "=&a" (old) // write-only, old counter value
|
||||
, [upd] "=&d" (upd) // write-only, updated counter value
|
||||
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
|
||||
//---< inputs >---
|
||||
:
|
||||
//---< clobbered >---
|
||||
: "cc", "memory"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
|
||||
unsigned long old, upd;
|
||||
|
||||
if (VM_Version::has_LoadAndALUAtomicV1()) {
|
||||
__asm__ __volatile__ (
|
||||
" LGHI 2,1 \n\t" // load increment
|
||||
" LA 3,%[mem] \n\t" // force data address into ARG2
|
||||
// " LAAG %[upd],%[inc],%[mem] \n\t" // increment and get old value
|
||||
// " LAAG 2,2,0(3) \n\t" // actually coded instruction
|
||||
" .byte 0xeb \n\t" // LAA main opcode
|
||||
" .byte 0x22 \n\t" // R1,R3
|
||||
" .byte 0x30 \n\t" // R2,disp1
|
||||
" .byte 0x00 \n\t" // disp2,disp3
|
||||
" .byte 0x00 \n\t" // disp4,disp5
|
||||
" .byte 0xe8 \n\t" // LAA minor opcode
|
||||
" AGHI 2,1 \n\t" // calc new value in register
|
||||
" LR %[upd],2 \n\t" // move to result register
|
||||
//---< outputs >---
|
||||
: [upd] "=&d" (upd) // write-only, updated counter value
|
||||
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
|
||||
//---< inputs >---
|
||||
:
|
||||
// : [inc] "a" (inc) // read-only.
|
||||
//---< clobbered >---
|
||||
: "cc", "r2", "r3", "memory"
|
||||
);
|
||||
} else {
|
||||
__asm__ __volatile__ (
|
||||
" LG %[old],%[mem] \n\t" // get old value
|
||||
"0: LA %[upd],1(,%[old]) \n\t" // calc result
|
||||
" CSG %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
|
||||
" JNE 0b \n\t" // no success? -> retry
|
||||
//---< outputs >---
|
||||
: [old] "=&a" (old) // write-only, old counter value
|
||||
, [upd] "=&d" (upd) // write-only, updated counter value
|
||||
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
|
||||
//---< inputs >---
|
||||
:
|
||||
//---< clobbered >---
|
||||
: "cc", "memory"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
inline void Atomic::inc_ptr(volatile void* dest) {
|
||||
inc_ptr((volatile intptr_t*)dest);
|
||||
}
|
||||
|
||||
//------------
|
||||
// Atomic::dec
|
||||
//------------
|
||||
// These methods force the value in memory to be decremented (augmented by -1).
|
||||
// Both, memory value and decrement, are treated as 32bit signed binary integers.
|
||||
// No overflow exceptions are recognized, and the condition code does not hold
|
||||
// information about the value in memory.
|
||||
//
|
||||
// The value in memory is updated by using a compare-and-swap instruction. The
|
||||
// instruction is retried as often as required.
|
||||
|
||||
inline void Atomic::dec(volatile jint* dest) {
|
||||
unsigned int old, upd;
|
||||
|
||||
if (VM_Version::has_LoadAndALUAtomicV1()) {
|
||||
__asm__ __volatile__ (
|
||||
" LGHI 2,-1 \n\t" // load increment
|
||||
" LA 3,%[mem] \n\t" // force data address into ARG2
|
||||
// " LAA %[upd],%[inc],%[mem] \n\t" // increment and get old value
|
||||
// " LAA 2,2,0(3) \n\t" // actually coded instruction
|
||||
" .byte 0xeb \n\t" // LAA main opcode
|
||||
" .byte 0x22 \n\t" // R1,R3
|
||||
" .byte 0x30 \n\t" // R2,disp1
|
||||
" .byte 0x00 \n\t" // disp2,disp3
|
||||
" .byte 0x00 \n\t" // disp4,disp5
|
||||
" .byte 0xf8 \n\t" // LAA minor opcode
|
||||
" AGHI 2,-1 \n\t" // calc new value in register
|
||||
" LR %[upd],2 \n\t" // move to result register
|
||||
//---< outputs >---
|
||||
: [upd] "=&d" (upd) // write-only, updated counter value
|
||||
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
|
||||
//---< inputs >---
|
||||
:
|
||||
// : [inc] "a" (inc) // read-only.
|
||||
//---< clobbered >---
|
||||
: "cc", "r2", "r3", "memory"
|
||||
);
|
||||
} else {
|
||||
__asm__ __volatile__ (
|
||||
" LLGF %[old],%[mem] \n\t" // get old value
|
||||
// LAY not supported by inline assembler
|
||||
// "0: LAY %[upd],-1(,%[old]) \n\t" // calc result
|
||||
"0: LR %[upd],%[old] \n\t" // calc result
|
||||
" AHI %[upd],-1 \n\t"
|
||||
" CS %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
|
||||
" JNE 0b \n\t" // no success? -> retry
|
||||
//---< outputs >---
|
||||
: [old] "=&a" (old) // write-only, old counter value
|
||||
, [upd] "=&d" (upd) // write-only, updated counter value
|
||||
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
|
||||
//---< inputs >---
|
||||
:
|
||||
//---< clobbered >---
|
||||
: "cc", "memory"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
|
||||
unsigned long old, upd;
|
||||
|
||||
if (VM_Version::has_LoadAndALUAtomicV1()) {
|
||||
__asm__ __volatile__ (
|
||||
" LGHI 2,-1 \n\t" // load increment
|
||||
" LA 3,%[mem] \n\t" // force data address into ARG2
|
||||
// " LAAG %[upd],%[inc],%[mem] \n\t" // increment and get old value
|
||||
// " LAAG 2,2,0(3) \n\t" // actually coded instruction
|
||||
" .byte 0xeb \n\t" // LAA main opcode
|
||||
" .byte 0x22 \n\t" // R1,R3
|
||||
" .byte 0x30 \n\t" // R2,disp1
|
||||
" .byte 0x00 \n\t" // disp2,disp3
|
||||
" .byte 0x00 \n\t" // disp4,disp5
|
||||
" .byte 0xe8 \n\t" // LAA minor opcode
|
||||
" AGHI 2,-1 \n\t" // calc new value in register
|
||||
" LR %[upd],2 \n\t" // move to result register
|
||||
//---< outputs >---
|
||||
: [upd] "=&d" (upd) // write-only, updated counter value
|
||||
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
|
||||
//---< inputs >---
|
||||
:
|
||||
// : [inc] "a" (inc) // read-only.
|
||||
//---< clobbered >---
|
||||
: "cc", "r2", "r3", "memory"
|
||||
);
|
||||
} else {
|
||||
__asm__ __volatile__ (
|
||||
" LG %[old],%[mem] \n\t" // get old value
|
||||
// LAY not supported by inline assembler
|
||||
// "0: LAY %[upd],-1(,%[old]) \n\t" // calc result
|
||||
"0: LGR %[upd],%[old] \n\t" // calc result
|
||||
" AGHI %[upd],-1 \n\t"
|
||||
" CSG %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
|
||||
" JNE 0b \n\t" // no success? -> retry
|
||||
//---< outputs >---
|
||||
: [old] "=&a" (old) // write-only, old counter value
|
||||
, [upd] "=&d" (upd) // write-only, updated counter value
|
||||
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
|
||||
//---< inputs >---
|
||||
:
|
||||
//---< clobbered >---
|
||||
: "cc", "memory"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
inline void Atomic::dec_ptr(volatile void* dest) {
|
||||
dec_ptr((volatile intptr_t*)dest);
|
||||
}
|
||||
|
||||
//-------------
|
||||
// Atomic::xchg
|
||||
//-------------
|
||||
@ -421,8 +194,12 @@ inline void Atomic::dec_ptr(volatile void* dest) {
|
||||
//
|
||||
// The return value is the (unchanged) value from memory as it was when the
|
||||
// replacement succeeded.
|
||||
inline jint Atomic::xchg (jint xchg_val, volatile jint* dest) {
|
||||
unsigned int old;
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(4 == sizeof(T));
|
||||
T old;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
" LLGF %[old],%[mem] \n\t" // get old value
|
||||
@ -432,16 +209,20 @@ inline jint Atomic::xchg (jint xchg_val, volatile jint* dest) {
|
||||
: [old] "=&d" (old) // write-only, prev value irrelevant
|
||||
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
|
||||
//---< inputs >---
|
||||
: [upd] "d" (xchg_val) // read-only, value to be written to memory
|
||||
: [upd] "d" (exchange_value) // read-only, value to be written to memory
|
||||
//---< clobbered >---
|
||||
: "cc", "memory"
|
||||
);
|
||||
|
||||
return (jint)old;
|
||||
return old;
|
||||
}
|
||||
|
||||
inline intptr_t Atomic::xchg_ptr(intptr_t xchg_val, volatile intptr_t* dest) {
|
||||
unsigned long old;
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
T old;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
" LG %[old],%[mem] \n\t" // get old value
|
||||
@ -451,16 +232,12 @@ inline intptr_t Atomic::xchg_ptr(intptr_t xchg_val, volatile intptr_t* dest) {
|
||||
: [old] "=&d" (old) // write-only, init from memory
|
||||
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
|
||||
//---< inputs >---
|
||||
: [upd] "d" (xchg_val) // read-only, value to be written to memory
|
||||
: [upd] "d" (exchange_value) // read-only, value to be written to memory
|
||||
//---< clobbered >---
|
||||
: "cc", "memory"
|
||||
);
|
||||
|
||||
return (intptr_t)old;
|
||||
}
|
||||
|
||||
inline void *Atomic::xchg_ptr(void *exchange_value, volatile void *dest) {
|
||||
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
|
||||
return old;
|
||||
}
|
||||
|
||||
//----------------
|
||||
@ -544,6 +321,4 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T xchg_val,
|
||||
return old;
|
||||
}
|
||||
|
||||
inline jlong Atomic::load(const volatile jlong* src) { return *src; }
|
||||
|
||||
#endif // OS_CPU_LINUX_S390_VM_ATOMIC_LINUX_S390_INLINE_HPP
|
||||
|
@ -74,10 +74,13 @@ inline void OrderAccess::acquire() { inlasm_zarch_acquire(); }
|
||||
inline void OrderAccess::release() { inlasm_zarch_release(); }
|
||||
inline void OrderAccess::fence() { inlasm_zarch_sync(); }
|
||||
|
||||
template<> inline jbyte OrderAccess::specialized_load_acquire<jbyte> (const volatile jbyte* p) { register jbyte t = *p; inlasm_zarch_acquire(); return t; }
|
||||
template<> inline jshort OrderAccess::specialized_load_acquire<jshort>(const volatile jshort* p) { register jshort t = *p; inlasm_zarch_acquire(); return t; }
|
||||
template<> inline jint OrderAccess::specialized_load_acquire<jint> (const volatile jint* p) { register jint t = *p; inlasm_zarch_acquire(); return t; }
|
||||
template<> inline jlong OrderAccess::specialized_load_acquire<jlong> (const volatile jlong* p) { register jlong t = *p; inlasm_zarch_acquire(); return t; }
|
||||
template<size_t byte_size>
|
||||
struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
T operator()(const volatile T* p) const { register T t = *p; inlasm_zarch_acquire(); return t; }
|
||||
};
|
||||
|
||||
#undef inlasm_compiler_barrier
|
||||
#undef inlasm_zarch_sync
|
||||
@ -85,8 +88,4 @@ template<> inline jlong OrderAccess::specialized_load_acquire<jlong> (const vol
|
||||
#undef inlasm_zarch_acquire
|
||||
#undef inlasm_zarch_fence
|
||||
|
||||
#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
|
||||
|
||||
#endif // OS_CPU_LINUX_S390_VM_ORDERACCESS_LINUX_S390_INLINE_HPP
|
||||
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016 SAP SE. All rights reserved.
|
||||
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -448,11 +448,17 @@ JVM_handle_linux_signal(int sig,
|
||||
}
|
||||
|
||||
else { // thread->thread_state() != _thread_in_Java
|
||||
if (sig == SIGILL && VM_Version::is_determine_features_test_running()) {
|
||||
// SIGILL must be caused by VM_Version::determine_features().
|
||||
if ((sig == SIGILL) && VM_Version::is_determine_features_test_running()) {
|
||||
// SIGILL must be caused by VM_Version::determine_features()
|
||||
// when attempting to execute a non-existing instruction.
|
||||
//*(int *) (pc-6)=0; // Patch instruction to 0 to indicate that it causes a SIGILL.
|
||||
// Flushing of icache is not necessary.
|
||||
stub = pc; // Continue with next instruction.
|
||||
} else if ((sig == SIGFPE) && VM_Version::is_determine_features_test_running()) {
|
||||
// SIGFPE is known to be caused by trying to execute a vector instruction
|
||||
// when the vector facility is installed, but operating system support is missing.
|
||||
VM_Version::reset_has_VectorFacility();
|
||||
stub = pc; // Continue with next instruction.
|
||||
} else if (thread->thread_state() == _thread_in_vm &&
|
||||
sig == SIGBUS && thread->doing_unsafe_access()) {
|
||||
// We don't really need a stub here! Just set the pending exeption and
|
||||
@ -471,7 +477,7 @@ JVM_handle_linux_signal(int sig,
|
||||
// Info->si_addr need not be the exact address, it is only
|
||||
// guaranteed to be on the same page as the address that caused
|
||||
// the SIGSEGV.
|
||||
if ((sig == SIGSEGV) &&
|
||||
if ((sig == SIGSEGV) && !UseMembar &&
|
||||
(os::get_memory_serialize_page() ==
|
||||
(address)((uintptr_t)info->si_addr & ~(os::vm_page_size()-1)))) {
|
||||
return true;
|
||||
@ -510,7 +516,7 @@ JVM_handle_linux_signal(int sig,
|
||||
// Note: this should be combined with the trap_pc handling above,
|
||||
// because it handles the same issue.
|
||||
if (sig == SIGILL || sig == SIGFPE) {
|
||||
pc = (address) info->si_addr;
|
||||
pc = (address)info->si_addr;
|
||||
}
|
||||
|
||||
VMError::report_and_die(t, sig, pc, info, ucVoid);
|
||||
|
@ -27,30 +27,6 @@
|
||||
|
||||
// Implementation of class atomic
|
||||
|
||||
inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
|
||||
|
||||
inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
|
||||
|
||||
inline void Atomic::inc (volatile jint* dest) { (void)add (1, dest); }
|
||||
inline void Atomic::inc_ptr(volatile intptr_t* dest) { (void)add_ptr(1, dest); }
|
||||
inline void Atomic::inc_ptr(volatile void* dest) { (void)add_ptr(1, dest); }
|
||||
|
||||
inline void Atomic::dec (volatile jint* dest) { (void)add (-1, dest); }
|
||||
inline void Atomic::dec_ptr(volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
|
||||
inline void Atomic::dec_ptr(volatile void* dest) { (void)add_ptr(-1, dest); }
|
||||
|
||||
inline jlong Atomic::load(const volatile jlong* src) { return *src; }
|
||||
|
||||
template<size_t byte_size>
|
||||
struct Atomic::PlatformAdd
|
||||
: Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
|
||||
@ -103,9 +79,12 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
|
||||
intptr_t rv = exchange_value;
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(4 == sizeof(T));
|
||||
T rv = exchange_value;
|
||||
__asm__ volatile(
|
||||
" swap [%2],%1\n\t"
|
||||
: "=r" (rv)
|
||||
@ -114,8 +93,12 @@ inline jint Atomic::xchg (jint exchange_value, volatile jint* des
|
||||
return rv;
|
||||
}
|
||||
|
||||
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
|
||||
intptr_t rv = exchange_value;
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
T rv = exchange_value;
|
||||
__asm__ volatile(
|
||||
"1:\n\t"
|
||||
" mov %1, %%o3\n\t"
|
||||
@ -131,10 +114,6 @@ inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* des
|
||||
return rv;
|
||||
}
|
||||
|
||||
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
|
||||
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
|
||||
}
|
||||
|
||||
// No direct support for cmpxchg of bytes; emulate using int.
|
||||
template<>
|
||||
struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -48,6 +48,4 @@ inline void OrderAccess::fence() {
|
||||
__asm__ volatile ("membar #StoreLoad" : : : "memory");
|
||||
}
|
||||
|
||||
#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
|
||||
|
||||
#endif // OS_CPU_LINUX_SPARC_VM_ORDERACCESS_LINUX_SPARC_INLINE_HPP
|
||||
|
@ -27,19 +27,6 @@
|
||||
|
||||
// Implementation of class atomic
|
||||
|
||||
inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
|
||||
|
||||
inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
|
||||
|
||||
|
||||
template<size_t byte_size>
|
||||
struct Atomic::PlatformAdd
|
||||
: Atomic::FetchAndAdd<Atomic::PlatformAdd<byte_size> >
|
||||
@ -61,25 +48,11 @@ inline D Atomic::PlatformAdd<4>::fetch_and_add(I add_value, D volatile* dest) co
|
||||
return old_value;
|
||||
}
|
||||
|
||||
inline void Atomic::inc (volatile jint* dest) {
|
||||
__asm__ volatile ( "lock addl $1,(%0)" :
|
||||
: "r" (dest) : "cc", "memory");
|
||||
}
|
||||
|
||||
inline void Atomic::inc_ptr(volatile void* dest) {
|
||||
inc_ptr((volatile intptr_t*)dest);
|
||||
}
|
||||
|
||||
inline void Atomic::dec (volatile jint* dest) {
|
||||
__asm__ volatile ( "lock subl $1,(%0)" :
|
||||
: "r" (dest) : "cc", "memory");
|
||||
}
|
||||
|
||||
inline void Atomic::dec_ptr(volatile void* dest) {
|
||||
dec_ptr((volatile intptr_t*)dest);
|
||||
}
|
||||
|
||||
inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(4 == sizeof(T));
|
||||
__asm__ volatile ( "xchgl (%2),%0"
|
||||
: "=r" (exchange_value)
|
||||
: "0" (exchange_value), "r" (dest)
|
||||
@ -87,10 +60,6 @@ inline jint Atomic::xchg (jint exchange_value, volatile jint* des
|
||||
return exchange_value;
|
||||
}
|
||||
|
||||
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
|
||||
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
|
||||
}
|
||||
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
|
||||
@ -120,8 +89,6 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
|
||||
}
|
||||
|
||||
#ifdef AMD64
|
||||
inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
|
||||
|
||||
template<>
|
||||
template<typename I, typename D>
|
||||
@ -136,21 +103,11 @@ inline D Atomic::PlatformAdd<8>::fetch_and_add(I add_value, D volatile* dest) co
|
||||
return old_value;
|
||||
}
|
||||
|
||||
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
|
||||
__asm__ __volatile__ ("lock addq $1,(%0)"
|
||||
:
|
||||
: "r" (dest)
|
||||
: "cc", "memory");
|
||||
}
|
||||
|
||||
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
|
||||
__asm__ __volatile__ ("lock subq $1,(%0)"
|
||||
:
|
||||
: "r" (dest)
|
||||
: "cc", "memory");
|
||||
}
|
||||
|
||||
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
__asm__ __volatile__ ("xchgq (%2),%0"
|
||||
: "=r" (exchange_value)
|
||||
: "0" (exchange_value), "r" (dest)
|
||||
@ -172,22 +129,8 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
|
||||
return exchange_value;
|
||||
}
|
||||
|
||||
inline jlong Atomic::load(const volatile jlong* src) { return *src; }
|
||||
|
||||
#else // !AMD64
|
||||
|
||||
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
|
||||
inc((volatile jint*)dest);
|
||||
}
|
||||
|
||||
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
|
||||
dec((volatile jint*)dest);
|
||||
}
|
||||
|
||||
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
|
||||
return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
// defined in linux_x86.s
|
||||
jlong _Atomic_cmpxchg_long(jlong, volatile jlong*, jlong);
|
||||
@ -204,18 +147,21 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
|
||||
return cmpxchg_using_helper<jlong>(_Atomic_cmpxchg_long, exchange_value, dest, compare_value);
|
||||
}
|
||||
|
||||
inline jlong Atomic::load(const volatile jlong* src) {
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
volatile jlong dest;
|
||||
_Atomic_move_long(src, &dest);
|
||||
return dest;
|
||||
_Atomic_move_long(reinterpret_cast<const volatile jlong*>(src), reinterpret_cast<volatile jlong*>(&dest));
|
||||
return PrimitiveConversions::cast<T>(dest);
|
||||
}
|
||||
|
||||
inline void Atomic::store(jlong store_value, jlong* dest) {
|
||||
_Atomic_move_long((volatile jlong*)&store_value, (volatile jlong*)dest);
|
||||
}
|
||||
|
||||
inline void Atomic::store(jlong store_value, volatile jlong* dest) {
|
||||
_Atomic_move_long((volatile jlong*)&store_value, dest);
|
||||
template<>
|
||||
template<typename T>
|
||||
inline void Atomic::PlatformStore<8>::operator()(T store_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
_Atomic_move_long(reinterpret_cast<const volatile jlong*>(&store_value), reinterpret_cast<volatile jlong*>(dest));
|
||||
}
|
||||
|
||||
#endif // AMD64
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -60,46 +60,57 @@ inline void OrderAccess::fence() {
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void OrderAccess::specialized_release_store_fence<jbyte> (volatile jbyte* p, jbyte v) {
|
||||
__asm__ volatile ( "xchgb (%2),%0"
|
||||
: "=q" (v)
|
||||
: "0" (v), "r" (p)
|
||||
: "memory");
|
||||
}
|
||||
struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
void operator()(T v, volatile T* p) const {
|
||||
__asm__ volatile ( "xchgb (%2),%0"
|
||||
: "=q" (v)
|
||||
: "0" (v), "r" (p)
|
||||
: "memory");
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
inline void OrderAccess::specialized_release_store_fence<jshort>(volatile jshort* p, jshort v) {
|
||||
__asm__ volatile ( "xchgw (%2),%0"
|
||||
: "=r" (v)
|
||||
: "0" (v), "r" (p)
|
||||
: "memory");
|
||||
}
|
||||
struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
void operator()(T v, volatile T* p) const {
|
||||
__asm__ volatile ( "xchgw (%2),%0"
|
||||
: "=r" (v)
|
||||
: "0" (v), "r" (p)
|
||||
: "memory");
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
inline void OrderAccess::specialized_release_store_fence<jint> (volatile jint* p, jint v) {
|
||||
__asm__ volatile ( "xchgl (%2),%0"
|
||||
: "=r" (v)
|
||||
: "0" (v), "r" (p)
|
||||
: "memory");
|
||||
}
|
||||
struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
void operator()(T v, volatile T* p) const {
|
||||
__asm__ volatile ( "xchgl (%2),%0"
|
||||
: "=r" (v)
|
||||
: "0" (v), "r" (p)
|
||||
: "memory");
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef AMD64
|
||||
template<>
|
||||
inline void OrderAccess::specialized_release_store_fence<jlong> (volatile jlong* p, jlong v) {
|
||||
__asm__ volatile ( "xchgq (%2), %0"
|
||||
: "=r" (v)
|
||||
: "0" (v), "r" (p)
|
||||
: "memory");
|
||||
}
|
||||
struct OrderAccess::PlatformOrderedStore<8, RELEASE_X_FENCE>
|
||||
VALUE_OBJ_CLASS_SPEC
|
||||
{
|
||||
template <typename T>
|
||||
void operator()(T v, volatile T* p) const {
|
||||
__asm__ volatile ( "xchgq (%2), %0"
|
||||
: "=r" (v)
|
||||
: "0" (v), "r" (p)
|
||||
: "memory");
|
||||
}
|
||||
};
|
||||
#endif // AMD64
|
||||
|
||||
template<>
|
||||
inline void OrderAccess::specialized_release_store_fence<jfloat> (volatile jfloat* p, jfloat v) {
|
||||
release_store_fence((volatile jint*)p, jint_cast(v));
|
||||
}
|
||||
template<>
|
||||
inline void OrderAccess::specialized_release_store_fence<jdouble>(volatile jdouble* p, jdouble v) {
|
||||
release_store_fence((volatile jlong*)p, jlong_cast(v));
|
||||
}
|
||||
|
||||
#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
|
||||
|
||||
#endif // OS_CPU_LINUX_X86_VM_ORDERACCESS_LINUX_X86_INLINE_HPP
|
||||
|
@ -87,7 +87,7 @@ static inline int m68k_add_and_fetch(int add_value, volatile int *ptr) {
|
||||
|
||||
/* Atomically write VALUE into `*PTR' and returns the previous
|
||||
contents of `*PTR'. */
|
||||
static inline int m68k_lock_test_and_set(volatile int *ptr, int newval) {
|
||||
static inline int m68k_lock_test_and_set(int newval, volatile int *ptr) {
|
||||
for (;;) {
|
||||
// Loop until success.
|
||||
int prev = *ptr;
|
||||
@ -148,7 +148,7 @@ static inline int arm_add_and_fetch(int add_value, volatile int *ptr) {
|
||||
|
||||
/* Atomically write VALUE into `*PTR' and returns the previous
|
||||
contents of `*PTR'. */
|
||||
static inline int arm_lock_test_and_set(volatile int *ptr, int newval) {
|
||||
static inline int arm_lock_test_and_set(int newval, volatile int *ptr) {
|
||||
for (;;) {
|
||||
// Loop until a __kernel_cmpxchg succeeds.
|
||||
int prev = *ptr;
|
||||
@ -159,14 +159,6 @@ static inline int arm_lock_test_and_set(volatile int *ptr, int newval) {
|
||||
}
|
||||
#endif // ARM
|
||||
|
||||
inline void Atomic::store(jint store_value, volatile jint* dest) {
|
||||
*dest = store_value;
|
||||
}
|
||||
|
||||
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) {
|
||||
*dest = store_value;
|
||||
}
|
||||
|
||||
template<size_t byte_size>
|
||||
struct Atomic::PlatformAdd
|
||||
: Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
|
||||
@ -201,42 +193,22 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
|
||||
return __sync_add_and_fetch(dest, add_value);
|
||||
}
|
||||
|
||||
inline void Atomic::inc(volatile jint* dest) {
|
||||
add(1, dest);
|
||||
}
|
||||
|
||||
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
|
||||
add_ptr(1, dest);
|
||||
}
|
||||
|
||||
inline void Atomic::inc_ptr(volatile void* dest) {
|
||||
add_ptr(1, dest);
|
||||
}
|
||||
|
||||
inline void Atomic::dec(volatile jint* dest) {
|
||||
add(-1, dest);
|
||||
}
|
||||
|
||||
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
|
||||
add_ptr(-1, dest);
|
||||
}
|
||||
|
||||
inline void Atomic::dec_ptr(volatile void* dest) {
|
||||
add_ptr(-1, dest);
|
||||
}
|
||||
|
||||
inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(4 == sizeof(T));
|
||||
#ifdef ARM
|
||||
return arm_lock_test_and_set(dest, exchange_value);
|
||||
return xchg_using_helper<int>(arm_lock_test_and_set, exchange_value, dest);
|
||||
#else
|
||||
#ifdef M68K
|
||||
return m68k_lock_test_and_set(dest, exchange_value);
|
||||
return xchg_using_helper<int>(m68k_lock_test_and_set, exchange_value, dest);
|
||||
#else
|
||||
// __sync_lock_test_and_set is a bizarrely named atomic exchange
|
||||
// operation. Note that some platforms only support this with the
|
||||
// limitation that the only valid value to store is the immediate
|
||||
// constant 1. There is a test for this in JNI_CreateJavaVM().
|
||||
jint result = __sync_lock_test_and_set (dest, exchange_value);
|
||||
T result = __sync_lock_test_and_set (dest, exchange_value);
|
||||
// All atomic operations are expected to be full memory barriers
|
||||
// (see atomic.hpp). However, __sync_lock_test_and_set is not
|
||||
// a full memory barrier, but an acquire barrier. Hence, this added
|
||||
@ -247,24 +219,14 @@ inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
|
||||
#endif // ARM
|
||||
}
|
||||
|
||||
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value,
|
||||
volatile intptr_t* dest) {
|
||||
#ifdef ARM
|
||||
return arm_lock_test_and_set(dest, exchange_value);
|
||||
#else
|
||||
#ifdef M68K
|
||||
return m68k_lock_test_and_set(dest, exchange_value);
|
||||
#else
|
||||
intptr_t result = __sync_lock_test_and_set (dest, exchange_value);
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
T result = __sync_lock_test_and_set (dest, exchange_value);
|
||||
__sync_synchronize();
|
||||
return result;
|
||||
#endif // M68K
|
||||
#endif // ARM
|
||||
}
|
||||
|
||||
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
|
||||
return (void *) xchg_ptr((intptr_t) exchange_value,
|
||||
(volatile intptr_t*) dest);
|
||||
}
|
||||
|
||||
// No direct support for cmpxchg of bytes; emulate using int.
|
||||
@ -299,18 +261,21 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
|
||||
return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
|
||||
}
|
||||
|
||||
inline jlong Atomic::load(const volatile jlong* src) {
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
volatile jlong dest;
|
||||
os::atomic_copy64(src, &dest);
|
||||
return dest;
|
||||
os::atomic_copy64(reinterpret_cast<const volatile jlong*>(src), reinterpret_cast<volatile jlong*>(&dest));
|
||||
return PrimitiveConversions::cast<T>(dest);
|
||||
}
|
||||
|
||||
inline void Atomic::store(jlong store_value, jlong* dest) {
|
||||
os::atomic_copy64((volatile jlong*)&store_value, (volatile jlong*)dest);
|
||||
}
|
||||
|
||||
inline void Atomic::store(jlong store_value, volatile jlong* dest) {
|
||||
os::atomic_copy64((volatile jlong*)&store_value, dest);
|
||||
template<>
|
||||
template<typename T>
|
||||
inline void Atomic::PlatformStore<8>::operator()(T store_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
os::atomic_copy64(reinterpret_cast<const volatile jlong*>(&store_value), reinterpret_cast<volatile jlong*>(dest));
|
||||
}
|
||||
|
||||
#endif // OS_CPU_LINUX_ZERO_VM_ATOMIC_LINUX_ZERO_HPP
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2007, 2008, 2009 Red Hat, Inc.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -56,8 +56,16 @@ typedef void (__kernel_dmb_t) (void);
|
||||
|
||||
#else // PPC
|
||||
|
||||
#ifdef ALPHA
|
||||
|
||||
#define LIGHT_MEM_BARRIER __sync_synchronize()
|
||||
|
||||
#else // ALPHA
|
||||
|
||||
#define LIGHT_MEM_BARRIER __asm __volatile ("":::"memory")
|
||||
|
||||
#endif // ALPHA
|
||||
|
||||
#endif // PPC
|
||||
|
||||
#endif // ARM
|
||||
@ -75,6 +83,4 @@ inline void OrderAccess::release() { LIGHT_MEM_BARRIER; }
|
||||
|
||||
inline void OrderAccess::fence() { FULL_MEM_BARRIER; }
|
||||
|
||||
#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
|
||||
|
||||
#endif // OS_CPU_LINUX_ZERO_VM_ORDERACCESS_LINUX_ZERO_INLINE_HPP
|
||||
|
@ -27,41 +27,6 @@
|
||||
|
||||
// Implementation of class atomic
|
||||
|
||||
inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
|
||||
|
||||
inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
|
||||
inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
|
||||
inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
|
||||
|
||||
inline void Atomic::inc (volatile jint* dest) { (void)add (1, dest); }
|
||||
inline void Atomic::inc_ptr(volatile intptr_t* dest) { (void)add_ptr(1, dest); }
|
||||
inline void Atomic::inc_ptr(volatile void* dest) { (void)add_ptr(1, dest); }
|
||||
|
||||
inline void Atomic::dec (volatile jint* dest) { (void)add (-1, dest); }
|
||||
inline void Atomic::dec_ptr(volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
|
||||
inline void Atomic::dec_ptr(volatile void* dest) { (void)add_ptr(-1, dest); }
|
||||
|
||||
|
||||
inline void Atomic::store(jlong store_value, jlong* dest) { *dest = store_value; }
|
||||
inline void Atomic::store(jlong store_value, volatile jlong* dest) { *dest = store_value; }
|
||||
inline jlong Atomic::load(const volatile jlong* src) { return *src; }
|
||||
|
||||
|
||||
// This is the interface to the atomic instructions in solaris_sparc.il.
|
||||
// It's very messy because we need to support v8 and these instructions
|
||||
// are illegal there. When sparc v8 is dropped, we can drop out lots of
|
||||
// this code. Also compiler2 does not support v8 so the conditional code
|
||||
// omits the instruction set check.
|
||||
|
||||
extern "C" jint _Atomic_swap32(jint exchange_value, volatile jint* dest);
|
||||
extern "C" intptr_t _Atomic_swap64(intptr_t exchange_value, volatile intptr_t* dest);
|
||||
|
||||
// Implement ADD using a CAS loop.
|
||||
template<size_t byte_size>
|
||||
struct Atomic::PlatformAdd VALUE_OBJ_CLASS_SPEC {
|
||||
@ -78,16 +43,30 @@ struct Atomic::PlatformAdd VALUE_OBJ_CLASS_SPEC {
|
||||
}
|
||||
};
|
||||
|
||||
inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
|
||||
return _Atomic_swap32(exchange_value, dest);
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(4 == sizeof(T));
|
||||
__asm__ volatile ( "swap [%2],%0"
|
||||
: "=r" (exchange_value)
|
||||
: "0" (exchange_value), "r" (dest)
|
||||
: "memory");
|
||||
return exchange_value;
|
||||
}
|
||||
|
||||
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
|
||||
return _Atomic_swap64(exchange_value, dest);
|
||||
}
|
||||
|
||||
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
|
||||
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
|
||||
template<>
|
||||
template<typename T>
|
||||
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
|
||||
T volatile* dest) const {
|
||||
STATIC_ASSERT(8 == sizeof(T));
|
||||
T old_value = *dest;
|
||||
while (true) {
|
||||
T result = cmpxchg(exchange_value, dest, old_value);
|
||||
if (result == old_value) break;
|
||||
old_value = result;
|
||||
}
|
||||
return old_value;
|
||||
}
|
||||
|
||||
// No direct support for cmpxchg of bytes; emulate using int.
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user