This commit is contained in:
Jesper Wilhelmsson 2017-10-30 21:23:10 +01:00
commit b97f1bcb37
720 changed files with 17256 additions and 20763 deletions

View File

@ -463,7 +463,7 @@ tar -xzf freetype-2.5.3.tar.gz</code></pre>
<li><code>--with-native-debug-symbols=&lt;method&gt;</code> - Specify if and how native debug symbols should be built. Available methods are <code>none</code>, <code>internal</code>, <code>external</code>, <code>zipped</code>. Default behavior depends on platform. See <a href="#native-debug-symbols">Native Debug Symbols</a> for more details.</li>
<li><code>--with-version-string=&lt;string&gt;</code> - Specify the version string this build will be identified with.</li>
<li><code>--with-version-&lt;part&gt;=&lt;value&gt;</code> - A group of options, where <code>&lt;part&gt;</code> can be any of <code>pre</code>, <code>opt</code>, <code>build</code>, <code>major</code>, <code>minor</code>, <code>security</code> or <code>patch</code>. Use these options to modify just the corresponding part of the version string from the default, or the value provided by <code>--with-version-string</code>.</li>
<li><code>--with-jvm-variants=&lt;variant&gt;[,&lt;variant&gt;...]</code> - Build the specified variant (or variants) of Hotspot. Valid variants are: <code>server</code>, <code>client</code>, <code>minimal</code>, <code>core</code>, <code>zero</code>, <code>zeroshark</code>, <code>custom</code>. Note that not all variants are possible to combine in a single build.</li>
<li><code>--with-jvm-variants=&lt;variant&gt;[,&lt;variant&gt;...]</code> - Build the specified variant (or variants) of Hotspot. Valid variants are: <code>server</code>, <code>client</code>, <code>minimal</code>, <code>core</code>, <code>zero</code>, <code>custom</code>. Note that not all variants are possible to combine in a single build.</li>
<li><code>--with-jvm-features=&lt;feature&gt;[,&lt;feature&gt;...]</code> - Use the specified JVM features when building Hotspot. The list of features will be enabled on top of the default list. For the <code>custom</code> JVM variant, this default list is empty. A complete list of available JVM features can be found using <code>bash configure --help</code>.</li>
<li><code>--with-target-bits=&lt;bits&gt;</code> - Create a target binary suitable for running on a <code>&lt;bits&gt;</code> platform. Use this to create 32-bit output on a 64-bit build platform, instead of doing a full cross-compile. (This is known as a <em>reduced</em> build.)</li>
</ul>

View File

@ -668,7 +668,7 @@ features, use `bash configure --help=short` instead.)
from the default, or the value provided by `--with-version-string`.
* `--with-jvm-variants=<variant>[,<variant>...]` - Build the specified variant
(or variants) of Hotspot. Valid variants are: `server`, `client`,
`minimal`, `core`, `zero`, `zeroshark`, `custom`. Note that not all
`minimal`, `core`, `zero`, `custom`. Note that not all
variants are possible to combine in a single build.
* `--with-jvm-features=<feature>[,<feature>...]` - Use the specified JVM
features when building Hotspot. The list of features will be enabled on top

View File

@ -1097,7 +1097,7 @@ AC_DEFUN([FLAGS_SETUP_COMPILER_FLAGS_FOR_JDK_HELPER],
]
)
fi
if ! HOTSPOT_CHECK_JVM_VARIANT(zero) && ! HOTSPOT_CHECK_JVM_VARIANT(zeroshark); then
if ! HOTSPOT_CHECK_JVM_VARIANT(zero); then
# Non-zero builds have stricter warnings
$2JVM_CFLAGS="[$]$2JVM_CFLAGS -Wreturn-type -Wundef -Wformat=2"
else

View File

@ -24,12 +24,12 @@
#
# All valid JVM features, regardless of platform
VALID_JVM_FEATURES="compiler1 compiler2 zero shark minimal dtrace jvmti jvmci \
VALID_JVM_FEATURES="compiler1 compiler2 zero minimal dtrace jvmti jvmci \
graal vm-structs jni-check services management all-gcs nmt cds \
static-build link-time-opt aot"
# All valid JVM variants
VALID_JVM_VARIANTS="server client minimal core zero zeroshark custom"
VALID_JVM_VARIANTS="server client minimal core zero custom"
###############################################################################
# Check if the specified JVM variant should be built. To be used in shell if
@ -62,13 +62,12 @@ AC_DEFUN([HOTSPOT_CHECK_JVM_FEATURE],
# minimal: reduced form of client with optional features stripped out
# core: normal interpreter only, no compiler
# zero: C++ based interpreter only, no compiler
# zeroshark: C++ based interpreter, and a llvm-based compiler
# custom: baseline JVM with no default features
#
AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_VARIANTS],
[
AC_ARG_WITH([jvm-variants], [AS_HELP_STRING([--with-jvm-variants],
[JVM variants (separated by commas) to build (server,client,minimal,core,zero,zeroshark,custom) @<:@server@:>@])])
[JVM variants (separated by commas) to build (server,client,minimal,core,zero,custom) @<:@server@:>@])])
SETUP_HOTSPOT_TARGET_CPU_PORT
@ -132,7 +131,7 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_VARIANTS],
AC_SUBST(VALID_JVM_VARIANTS)
AC_SUBST(JVM_VARIANT_MAIN)
if HOTSPOT_CHECK_JVM_VARIANT(zero) || HOTSPOT_CHECK_JVM_VARIANT(zeroshark); then
if HOTSPOT_CHECK_JVM_VARIANT(zero); then
# zero behaves as a platform and rewrites these values. This is really weird. :(
# We are guaranteed that we do not build any other variants when building zero.
HOTSPOT_TARGET_CPU=zero
@ -325,15 +324,9 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
fi
fi
if ! HOTSPOT_CHECK_JVM_VARIANT(zero) && ! HOTSPOT_CHECK_JVM_VARIANT(zeroshark); then
if ! HOTSPOT_CHECK_JVM_VARIANT(zero); then
if HOTSPOT_CHECK_JVM_FEATURE(zero); then
AC_MSG_ERROR([To enable zero/zeroshark, you must use --with-jvm-variants=zero/zeroshark])
fi
fi
if ! HOTSPOT_CHECK_JVM_VARIANT(zeroshark); then
if HOTSPOT_CHECK_JVM_FEATURE(shark); then
AC_MSG_ERROR([To enable shark, you must use --with-jvm-variants=zeroshark])
AC_MSG_ERROR([To enable zero, you must use --with-jvm-variants=zero])
fi
fi
@ -408,7 +401,6 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
JVM_FEATURES_core="$NON_MINIMAL_FEATURES $JVM_FEATURES"
JVM_FEATURES_minimal="compiler1 minimal $JVM_FEATURES $JVM_FEATURES_link_time_opt"
JVM_FEATURES_zero="zero $NON_MINIMAL_FEATURES $JVM_FEATURES"
JVM_FEATURES_zeroshark="zero shark $NON_MINIMAL_FEATURES $JVM_FEATURES"
JVM_FEATURES_custom="$JVM_FEATURES"
AC_SUBST(JVM_FEATURES_server)
@ -416,7 +408,6 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
AC_SUBST(JVM_FEATURES_core)
AC_SUBST(JVM_FEATURES_minimal)
AC_SUBST(JVM_FEATURES_zero)
AC_SUBST(JVM_FEATURES_zeroshark)
AC_SUBST(JVM_FEATURES_custom)
# Used for verification of Makefiles by check-jvm-feature
@ -437,7 +428,6 @@ AC_DEFUN_ONCE([HOTSPOT_VALIDATE_JVM_FEATURES],
JVM_FEATURES_core="$($ECHO $($PRINTF '%s\n' $JVM_FEATURES_core | $SORT -u))"
JVM_FEATURES_minimal="$($ECHO $($PRINTF '%s\n' $JVM_FEATURES_minimal | $SORT -u))"
JVM_FEATURES_zero="$($ECHO $($PRINTF '%s\n' $JVM_FEATURES_zero | $SORT -u))"
JVM_FEATURES_zeroshark="$($ECHO $($PRINTF '%s\n' $JVM_FEATURES_zeroshark | $SORT -u))"
JVM_FEATURES_custom="$($ECHO $($PRINTF '%s\n' $JVM_FEATURES_custom | $SORT -u))"
# Validate features

View File

@ -232,7 +232,7 @@ AC_DEFUN_ONCE([JDKOPT_SETUP_JDK_OPTIONS],
# Should we build the serviceability agent (SA)?
INCLUDE_SA=true
if HOTSPOT_CHECK_JVM_VARIANT(zero) || HOTSPOT_CHECK_JVM_VARIANT(zeroshark); then
if HOTSPOT_CHECK_JVM_VARIANT(zero); then
INCLUDE_SA=false
fi
if test "x$OPENJDK_TARGET_OS" = xaix ; then

View File

@ -65,8 +65,7 @@ AC_DEFUN_ONCE([LIB_SETUP_STD_LIBS],
# If dynamic was requested, it's available since it would fail above otherwise.
# If dynamic wasn't requested, go with static unless it isn't available.
AC_MSG_CHECKING([how to link with libstdc++])
if test "x$with_stdc__lib" = xdynamic || test "x$has_static_libstdcxx" = xno \
|| HOTSPOT_CHECK_JVM_VARIANT(zeroshark); then
if test "x$with_stdc__lib" = xdynamic || test "x$has_static_libstdcxx" = xno ; then
AC_MSG_RESULT([dynamic])
else
LIBCXX="$LIBCXX $STATIC_STDCXX_FLAGS"

View File

@ -79,7 +79,7 @@ AC_DEFUN_ONCE([LIB_DETERMINE_DEPENDENCIES],
fi
# Check if ffi is needed
if HOTSPOT_CHECK_JVM_VARIANT(zero) || HOTSPOT_CHECK_JVM_VARIANT(zeroshark); then
if HOTSPOT_CHECK_JVM_VARIANT(zero); then
NEEDS_LIB_FFI=true
else
NEEDS_LIB_FFI=false
@ -98,69 +98,11 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES],
LIB_SETUP_FREETYPE
LIB_SETUP_ALSA
LIB_SETUP_LIBFFI
LIB_SETUP_LLVM
LIB_SETUP_BUNDLED_LIBS
LIB_SETUP_MISC_LIBS
LIB_SETUP_SOLARIS_STLPORT
])
################################################################################
# Setup llvm (Low-Level VM)
################################################################################
AC_DEFUN_ONCE([LIB_SETUP_LLVM],
[
if HOTSPOT_CHECK_JVM_VARIANT(zeroshark); then
AC_CHECK_PROG([LLVM_CONFIG], [llvm-config], [llvm-config])
if test "x$LLVM_CONFIG" != xllvm-config; then
AC_MSG_ERROR([llvm-config not found in $PATH.])
fi
llvm_components="jit mcjit engine nativecodegen native"
unset LLVM_CFLAGS
for flag in $("$LLVM_CONFIG" --cxxflags); do
if echo "${flag}" | grep -q '^-@<:@ID@:>@'; then
if test "${flag}" != "-D_DEBUG" ; then
if test "${LLVM_CFLAGS}" != "" ; then
LLVM_CFLAGS="${LLVM_CFLAGS} "
fi
LLVM_CFLAGS="${LLVM_CFLAGS}${flag}"
fi
fi
done
llvm_version=$("${LLVM_CONFIG}" --version | $SED 's/\.//; s/svn.*//')
LLVM_CFLAGS="${LLVM_CFLAGS} -DSHARK_LLVM_VERSION=${llvm_version}"
unset LLVM_LDFLAGS
for flag in $("${LLVM_CONFIG}" --ldflags); do
if echo "${flag}" | grep -q '^-L'; then
if test "${LLVM_LDFLAGS}" != ""; then
LLVM_LDFLAGS="${LLVM_LDFLAGS} "
fi
LLVM_LDFLAGS="${LLVM_LDFLAGS}${flag}"
fi
done
unset LLVM_LIBS
for flag in $("${LLVM_CONFIG}" --libs ${llvm_components}); do
if echo "${flag}" | grep -q '^-l'; then
if test "${LLVM_LIBS}" != ""; then
LLVM_LIBS="${LLVM_LIBS} "
fi
LLVM_LIBS="${LLVM_LIBS}${flag}"
fi
done
# Due to https://llvm.org/bugs/show_bug.cgi?id=16902, llvm does not
# always properly detect -ltinfo
LLVM_LIBS="${LLVM_LIBS} -ltinfo"
AC_SUBST(LLVM_CFLAGS)
AC_SUBST(LLVM_LDFLAGS)
AC_SUBST(LLVM_LIBS)
fi
])
################################################################################
# Setup various libraries, typically small system libraries
################################################################################

View File

@ -219,7 +219,6 @@ JVM_FEATURES_client := @JVM_FEATURES_client@
JVM_FEATURES_core := @JVM_FEATURES_core@
JVM_FEATURES_minimal := @JVM_FEATURES_minimal@
JVM_FEATURES_zero := @JVM_FEATURES_zero@
JVM_FEATURES_zeroshark := @JVM_FEATURES_zeroshark@
JVM_FEATURES_custom := @JVM_FEATURES_custom@
# Used for make-time verifications
@ -403,11 +402,6 @@ JVM_ASFLAGS := @JVM_ASFLAGS@
JVM_LIBS := @JVM_LIBS@
JVM_RCFLAGS := @JVM_RCFLAGS@
# Flags for zeroshark
LLVM_CFLAGS := @LLVM_CFLAGS@
LLVM_LIBS := @LLVM_LIBS@
LLVM_LDFLAGS := @LLVM_LDFLAGS@
# These flags might contain variables set by a custom extension that is included later.
EXTRA_CFLAGS = @EXTRA_CFLAGS@
EXTRA_CXXFLAGS = @EXTRA_CXXFLAGS@

View File

@ -113,6 +113,7 @@ PLATFORM_MODULES += \
jdk.dynalink \
jdk.httpserver \
jdk.incubator.httpclient \
jdk.internal.vm.compiler.management \
jdk.jsobject \
jdk.localedata \
jdk.naming.dns \
@ -215,6 +216,7 @@ endif
ifeq ($(INCLUDE_GRAAL), false)
MODULES_FILTER += jdk.internal.vm.compiler
MODULES_FILTER += jdk.internal.vm.compiler.management
endif
################################################################################

View File

@ -1060,7 +1060,7 @@ var getJibProfilesDependencies = function (input, common) {
jtreg: {
server: "javare",
revision: "4.2",
build_number: "b08",
build_number: "b09",
checksum_file: "MD5_VALUES",
file: "jtreg_bin-4.2.zip",
environment_name: "JT_HOME",

View File

@ -87,7 +87,7 @@ endif
#
# How to install jvm.cfg.
#
ifeq ($(call check-jvm-variant, zero zeroshark), true)
ifeq ($(call check-jvm-variant, zero), true)
JVMCFG_ARCH := zero
else
JVMCFG_ARCH := $(OPENJDK_TARGET_CPU_LEGACY)
@ -102,8 +102,6 @@ else
endif
JVMCFG := $(LIB_DST_DIR)/jvm.cfg
# To do: should this also support -zeroshark?
ifeq ($(OPENJDK_TARGET_CPU_BITS), 64)
COPY_JVM_CFG_FILE := true
else
@ -120,7 +118,7 @@ else
COPY_JVM_CFG_FILE := true
else
# For zero, the default jvm.cfg file is sufficient
ifeq ($(call check-jvm-variant, zero zeroshark), true)
ifeq ($(call check-jvm-variant, zero), true)
COPY_JVM_CFG_FILE := true
endif
endif

View File

@ -54,15 +54,4 @@ $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/module/ModuleLoaderMap.java:
GENSRC_JAVA_BASE += $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/module/ModuleLoaderMap.java
$(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat: \
$(TOPDIR)/src/java.base/share/classes/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat \
$(VARDEPS_FILE) $(BUILD_TOOLS_JDK)
$(MKDIR) -p $(@D)
$(RM) $@ $@.tmp
$(TOOL_GENCLASSLOADERMAP) -boot $(BOOT_MODULES_LIST) \
-platform $(PLATFORM_MODULES_LIST) -o $@.tmp $<
$(MV) $@.tmp $@
GENSRC_JAVA_BASE += $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat
################################################################################

View File

@ -75,7 +75,6 @@ ifeq ($(OPENJDK_TARGET_OS), windows)
-ignorePath linux \
-ignorePath posix \
-ignorePath ppc \
-ignorePath shark \
-ignorePath solaris \
-ignorePath sparc \
-ignorePath x86_32 \

View File

@ -58,6 +58,7 @@ JVM_CFLAGS_INCLUDES += \
-I$(JVM_VARIANT_OUTPUTDIR)/gensrc \
-I$(TOPDIR)/src/hotspot/share/precompiled \
-I$(TOPDIR)/src/hotspot/share/prims \
-I$(TOPDIR)/src/java.base/share/native/include \
#
# INCLUDE_SUFFIX_* is only meant for including the proper

View File

@ -47,14 +47,9 @@ endif
ifeq ($(call check-jvm-feature, zero), true)
JVM_CFLAGS_FEATURES += -DZERO -DCC_INTERP -DZERO_LIBARCH='"$(OPENJDK_TARGET_CPU_LEGACY_LIB)"' $(LIBFFI_CFLAGS)
JVM_LIBS_FEATURES += $(LIBFFI_LIBS)
ifeq ($(OPENJDK_TARGET_CPU), sparcv9)
BUILD_LIBJVM_EXTRA_FILES := $(TOPDIR)/src/hotspot/cpu/sparc/memset_with_concurrent_readers_sparc.cpp
endif
ifeq ($(call check-jvm-feature, shark), true)
JVM_CFLAGS_FEATURES += -DSHARK $(LLVM_CFLAGS)
JVM_LDFLAGS_FEATURES += $(LLVM_LDFLAGS)
JVM_LIBS_FEATURES += $(LLVM_LIBS)
else
JVM_EXCLUDES += shark
endif
ifeq ($(call check-jvm-feature, minimal), true)
@ -129,6 +124,7 @@ ifneq ($(call check-jvm-feature, all-gcs), true)
cms/ g1/ parallel/
JVM_EXCLUDE_FILES += \
concurrentGCThread.cpp \
suspendibleThreadSet.cpp \
plab.cpp
JVM_EXCLUDE_FILES += \
g1MemoryPool.cpp \

View File

@ -77,30 +77,22 @@ public class GenModuleLoaderMap {
throw new IllegalArgumentException(source + " not exist");
}
boolean needsQuotes = outfile.toString().contains(".java.tmp");
try (BufferedWriter bw = Files.newBufferedWriter(outfile, StandardCharsets.UTF_8);
PrintWriter writer = new PrintWriter(bw)) {
for (String line : Files.readAllLines(source)) {
if (line.contains("@@BOOT_MODULE_NAMES@@")) {
line = patch(line, "@@BOOT_MODULE_NAMES@@", bootModules, needsQuotes);
line = patch(line, "@@BOOT_MODULE_NAMES@@", bootModules);
} else if (line.contains("@@PLATFORM_MODULE_NAMES@@")) {
line = patch(line, "@@PLATFORM_MODULE_NAMES@@", platformModules, needsQuotes);
line = patch(line, "@@PLATFORM_MODULE_NAMES@@", platformModules);
}
writer.println(line);
}
}
}
private static String patch(String s, String tag, Stream<String> stream, boolean needsQuotes) {
String mns = null;
if (needsQuotes) {
mns = stream.sorted()
private static String patch(String s, String tag, Stream<String> stream) {
String mns = stream.sorted()
.collect(Collectors.joining("\",\n \""));
} else {
mns = stream.sorted()
.collect(Collectors.joining("\n"));
}
return s.replace(tag, mns);
}

View File

@ -300,7 +300,7 @@ LIBJLI_SRC_DIRS := $(call FindSrcDirsForLib, java.base, jli)
LIBJLI_CFLAGS := $(CFLAGS_JDKLIB)
ifeq ($(call check-jvm-variant, zero zeroshark), true)
ifeq ($(call check-jvm-variant, zero), true)
ERGO_FAMILY := zero
else
ifeq ($(OPENJDK_TARGET_CPU_ARCH), x86)

View File

@ -50,6 +50,7 @@ BUILD_HOTSPOT_JTREG_NATIVE_SRC += \
$(TOPDIR)/test/hotspot/jtreg/runtime/jni/8025979 \
$(TOPDIR)/test/hotspot/jtreg/runtime/jni/8033445 \
$(TOPDIR)/test/hotspot/jtreg/runtime/jni/checked \
$(TOPDIR)/test/hotspot/jtreg/runtime/jni/FindClass \
$(TOPDIR)/test/hotspot/jtreg/runtime/jni/PrivateInterfaceMethods \
$(TOPDIR)/test/hotspot/jtreg/runtime/jni/ToStringInInterfaceTest \
$(TOPDIR)/test/hotspot/jtreg/runtime/jni/CalleeSavedRegisters \
@ -59,6 +60,7 @@ BUILD_HOTSPOT_JTREG_NATIVE_SRC += \
$(TOPDIR)/test/hotspot/jtreg/runtime/SameObject \
$(TOPDIR)/test/hotspot/jtreg/runtime/BoolReturn \
$(TOPDIR)/test/hotspot/jtreg/runtime/noClassDefFoundMsg \
$(TOPDIR)/test/hotspot/jtreg/runtime/RedefineTests \
$(TOPDIR)/test/hotspot/jtreg/compiler/floatingpoint/ \
$(TOPDIR)/test/hotspot/jtreg/compiler/calls \
$(TOPDIR)/test/hotspot/jtreg/serviceability/jvmti/GetOwnedMonitorInfo \
@ -103,6 +105,7 @@ ifeq ($(TOOLCHAIN_TYPE), solstudio)
BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libMAAClassLoadPrepare := -lc
BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libMAAThreadStart := -lc
BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libAllowedFunctions := -lc
BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libRedefineDoubleDelete := -lc
endif
ifeq ($(OPENJDK_TARGET_OS), linux)

View File

@ -70,7 +70,7 @@
</toolChain>
</folderInfo>
<sourceEntries>
<entry excluding="cpu/vm/templateTable_x86_32.cpp|cpu/vm/templateInterpreter_x86_32.cpp|cpu/vm/stubRoutines_x86_32.cpp|cpu/vm/stubGenerator_x86_32.cpp|cpu/vm/sharedRuntime_x86_32.cpp|cpu/vm/jniFastGetField_x86_32.cpp|cpu/vm/interpreterRT_x86_32.cpp|cpu/vm/interpreter_x86_32.cpp|cpu/vm/interp_masm_x86_32.cpp|cpu/vm/vtableStubs_x86_32.cpp" flags="VALUE_WORKSPACE_PATH" kind="sourcePath" name=""/>
<entry excluding="cpu/x86/templateTable_x86_32.cpp|cpu/x86/templateInterpreter_x86_32.cpp|cpu/x86/stubRoutines_x86_32.cpp|cpu/x86/stubGenerator_x86_32.cpp|cpu/x86/sharedRuntime_x86_32.cpp|cpu/x86/jniFastGetField_x86_32.cpp|cpu/x86/interpreterRT_x86_32.cpp|cpu/x86/interpreter_x86_32.cpp|cpu/x86/interp_masm_x86_32.cpp|cpu/x86/vtableStubs_x86_32.cpp" flags="VALUE_WORKSPACE_PATH" kind="sourcePath" name=""/>
</sourceEntries>
</configuration>
</storageModule>

View File

@ -256,14 +256,10 @@ class HotSpotProject(mx.NativeProject):
"""
roots = [
'ASSEMBLY_EXCEPTION',
'LICENSE',
'README',
'THIRD_PARTY_README',
'agent',
'make',
'src',
'test'
'cpu',
'os',
'os_cpu',
'share'
]
for jvmVariant in _jdkJvmVariants:
@ -605,6 +601,16 @@ def _get_openjdk_cpu():
def _get_openjdk_os_cpu():
return _get_openjdk_os() + '-' + _get_openjdk_cpu()
def _get_jdk_dir():
suiteParentDir = dirname(_suite.dir)
# suitParentDir is now something like: /some_prefix/jdk10-hs/open/src
pathComponents = suiteParentDir.split(os.sep)
for i in range(0, len(pathComponents)):
if pathComponents[i] in ["open", "src"]:
del pathComponents[i:]
break
return os.path.join(os.sep, *pathComponents)
def _get_jdk_build_dir(debugLevel=None):
"""
Gets the directory into which the JDK is built. This directory contains
@ -613,7 +619,7 @@ def _get_jdk_build_dir(debugLevel=None):
if debugLevel is None:
debugLevel = _vm.debugLevel
name = '{}-{}-{}-{}'.format(_get_openjdk_os_cpu(), 'normal', _vm.jvmVariant, debugLevel)
return join(dirname(_suite.dir), 'build', name)
return join(_get_jdk_dir(), 'build', name)
_jvmci_bootclasspath_prepends = []

View File

@ -24,9 +24,7 @@ suite = {
"defaultLicense" : "GPLv2-CPE",
# This puts mx/ as a sibling of the JDK build configuration directories
# (e.g., macosx-x86_64-normal-server-release).
"outputRoot" : "../build/mx/hotspot",
"outputRoot" : "../../build/mx/hotspot",
# ------------- Libraries -------------
@ -43,7 +41,7 @@ suite = {
# ------------- JVMCI:Service -------------
"jdk.vm.ci.services" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"javaCompliance" : "9",
"workingSets" : "API,JVMCI",
@ -52,7 +50,7 @@ suite = {
# ------------- JVMCI:API -------------
"jdk.vm.ci.common" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"checkstyle" : "jdk.vm.ci.services",
"javaCompliance" : "9",
@ -60,7 +58,7 @@ suite = {
},
"jdk.vm.ci.meta" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"checkstyle" : "jdk.vm.ci.services",
"javaCompliance" : "9",
@ -68,7 +66,7 @@ suite = {
},
"jdk.vm.ci.code" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : ["jdk.vm.ci.meta"],
"checkstyle" : "jdk.vm.ci.services",
@ -77,7 +75,7 @@ suite = {
},
"jdk.vm.ci.code.test" : {
"subDir" : "test/compiler/jvmci",
"subDir" : "../../test/hotspot/jtreg/compiler/jvmci",
"sourceDirs" : ["src"],
"dependencies" : [
"mx:JUNIT",
@ -92,7 +90,7 @@ suite = {
},
"jdk.vm.ci.runtime" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : [
"jdk.vm.ci.code",
@ -104,7 +102,7 @@ suite = {
},
"jdk.vm.ci.runtime.test" : {
"subDir" : "test/compiler/jvmci",
"subDir" : "../../test/hotspot/jtreg/compiler/jvmci",
"sourceDirs" : ["src"],
"dependencies" : [
"mx:JUNIT",
@ -119,7 +117,7 @@ suite = {
# ------------- JVMCI:HotSpot -------------
"jdk.vm.ci.aarch64" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : ["jdk.vm.ci.code"],
"checkstyle" : "jdk.vm.ci.services",
@ -128,7 +126,7 @@ suite = {
},
"jdk.vm.ci.amd64" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : ["jdk.vm.ci.code"],
"checkstyle" : "jdk.vm.ci.services",
@ -137,7 +135,7 @@ suite = {
},
"jdk.vm.ci.sparc" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : ["jdk.vm.ci.code"],
"checkstyle" : "jdk.vm.ci.services",
@ -146,7 +144,7 @@ suite = {
},
"jdk.vm.ci.hotspot" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : [
"jdk.vm.ci.common",
@ -163,7 +161,7 @@ suite = {
},
"jdk.vm.ci.hotspot.test" : {
"subDir" : "test/compiler/jvmci",
"subDir" : "../../test/hotspot/jtreg/compiler/jvmci",
"sourceDirs" : ["src"],
"dependencies" : [
"TESTNG",
@ -175,7 +173,7 @@ suite = {
},
"jdk.vm.ci.hotspot.aarch64" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : [
"jdk.vm.ci.aarch64",
@ -187,7 +185,7 @@ suite = {
},
"jdk.vm.ci.hotspot.amd64" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : [
"jdk.vm.ci.amd64",
@ -199,7 +197,7 @@ suite = {
},
"jdk.vm.ci.hotspot.sparc" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : [
"jdk.vm.ci.sparc",
@ -221,12 +219,12 @@ suite = {
# ------------- Distributions -------------
"JVMCI_SERVICES" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"dependencies" : ["jdk.vm.ci.services"],
},
"JVMCI_API" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"dependencies" : [
"jdk.vm.ci.runtime",
"jdk.vm.ci.common",
@ -240,7 +238,7 @@ suite = {
},
"JVMCI_HOTSPOT" : {
"subDir" : "src/jdk.internal.vm.ci/share/classes",
"subDir" : "../jdk.internal.vm.ci/share/classes",
"dependencies" : [
"jdk.vm.ci.hotspot.aarch64",
"jdk.vm.ci.hotspot.amd64",
@ -253,7 +251,7 @@ suite = {
},
"JVMCI_TEST" : {
"subDir" : "test/compiler/jvmci",
"subDir" : "../../test/hotspot/jtreg/compiler/jvmci",
"dependencies" : [
"jdk.vm.ci.runtime.test",
],

View File

@ -2575,13 +2575,9 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
Register mdo = op->mdo()->as_register();
__ mov_metadata(mdo, md->constant_encoding());
Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
Bytecodes::Code bc = method->java_code_at_bci(bci);
const bool callee_is_static = callee->is_loaded() && callee->is_static();
// Perform additional virtual call profiling for invokevirtual and
// invokeinterface bytecodes
if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
!callee_is_static && // required for optimized MH invokes
C1ProfileVirtualCalls) {
if (op->should_profile_receiver_type()) {
assert(op->recv()->is_single_cpu(), "recv must be allocated");
Register recv = op->recv()->as_register();
assert_different_registers(mdo, recv);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -26,9 +26,9 @@
#ifndef CPU_AARCH64_VM_JNITYPES_AARCH64_HPP
#define CPU_AARCH64_VM_JNITYPES_AARCH64_HPP
#include "jni.h"
#include "memory/allocation.hpp"
#include "oops/oop.hpp"
#include "prims/jni.h"
// This file holds platform-dependent routines used to write primitive jni
// types to the array of arguments passed into JavaCalls::call

View File

@ -2840,6 +2840,44 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi
bind(L_done);
}
// Code for BigInteger::mulAdd instrinsic
// out = r0
// in = r1
// offset = r2 (already out.length-offset)
// len = r3
// k = r4
//
// pseudo code from java implementation:
// carry = 0;
// offset = out.length-offset - 1;
// for (int j=len-1; j >= 0; j--) {
// product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
// out[offset--] = (int)product;
// carry = product >>> 32;
// }
// return (int)carry;
void MacroAssembler::mul_add(Register out, Register in, Register offset,
Register len, Register k) {
Label LOOP, END;
// pre-loop
cmp(len, zr); // cmp, not cbz/cbnz: to use condition twice => less branches
csel(out, zr, out, Assembler::EQ);
br(Assembler::EQ, END);
add(in, in, len, LSL, 2); // in[j+1] address
add(offset, out, offset, LSL, 2); // out[offset + 1] address
mov(out, zr); // used to keep carry now
BIND(LOOP);
ldrw(rscratch1, Address(pre(in, -4)));
madd(rscratch1, rscratch1, k, out);
ldrw(rscratch2, Address(pre(offset, -4)));
add(rscratch1, rscratch1, rscratch2);
strw(rscratch1, Address(offset));
lsr(out, rscratch1, 32);
subs(len, len, 1);
br(Assembler::NE, LOOP);
BIND(END);
}
/**
* Emits code to update CRC-32 with a byte value according to constants in table
*
@ -3291,6 +3329,7 @@ void MacroAssembler::load_mirror(Register dst, Register method) {
ldr(dst, Address(dst, ConstMethod::constants_offset()));
ldr(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes()));
ldr(dst, Address(dst, mirror_offset));
resolve_oop_handle(dst);
}
void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) {

View File

@ -1265,6 +1265,7 @@ public:
void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z,
Register zlen, Register tmp1, Register tmp2, Register tmp3,
Register tmp4, Register tmp5, Register tmp6, Register tmp7);
void mul_add(Register out, Register in, Register offs, Register len, Register k);
// ISB may be needed because of a safepoint
void maybe_isb() { isb(); }

View File

@ -3607,6 +3607,63 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
address generate_squareToLen() {
// squareToLen algorithm for sizes 1..127 described in java code works
// faster than multiply_to_len on some CPUs and slower on others, but
// multiply_to_len shows a bit better overall results
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "squareToLen");
address start = __ pc();
const Register x = r0;
const Register xlen = r1;
const Register z = r2;
const Register zlen = r3;
const Register y = r4; // == x
const Register ylen = r5; // == xlen
const Register tmp1 = r10;
const Register tmp2 = r11;
const Register tmp3 = r12;
const Register tmp4 = r13;
const Register tmp5 = r14;
const Register tmp6 = r15;
const Register tmp7 = r16;
RegSet spilled_regs = RegSet::of(y, ylen);
BLOCK_COMMENT("Entry:");
__ enter();
__ push(spilled_regs, sp);
__ mov(y, x);
__ mov(ylen, xlen);
__ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
__ pop(spilled_regs, sp);
__ leave();
__ ret(lr);
return start;
}
address generate_mulAdd() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "mulAdd");
address start = __ pc();
const Register out = r0;
const Register in = r1;
const Register offset = r2;
const Register len = r3;
const Register k = r4;
BLOCK_COMMENT("Entry:");
__ enter();
__ mul_add(out, in, offset, len, k);
__ leave();
__ ret(lr);
return start;
}
void ghash_multiply(FloatRegister result_lo, FloatRegister result_hi,
FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0,
FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3, FloatRegister tmp4) {
@ -4913,6 +4970,14 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_multiplyToLen = generate_multiplyToLen();
}
if (UseSquareToLenIntrinsic) {
StubRoutines::_squareToLen = generate_squareToLen();
}
if (UseMulAddIntrinsic) {
StubRoutines::_mulAdd = generate_mulAdd();
}
if (UseMontgomeryMultiplyIntrinsic) {
StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);

View File

@ -2195,6 +2195,13 @@ void TemplateTable::_return(TosState state)
__ bind(skip_register_finalizer);
}
// Explicitly reset last_sp, for handling special case in TemplateInterpreter::deopt_reexecute_entry
#ifdef ASSERT
if (state == vtos) {
__ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
}
#endif
// Issue a StoreStore barrier after all stores but before return
// from any constructor for any class with a final field. We don't
// know if this is a finalizer, so we always do so.
@ -2297,6 +2304,7 @@ void TemplateTable::load_field_cp_cache_entry(Register obj,
ConstantPoolCacheEntry::f1_offset())));
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ ldr(obj, Address(obj, mirror_offset));
__ resolve_oop_handle(obj);
}
}

View File

@ -340,6 +340,14 @@ void VM_Version::get_processor_features() {
UseMultiplyToLenIntrinsic = true;
}
if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
UseSquareToLenIntrinsic = true;
}
if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
UseMulAddIntrinsic = true;
}
if (FLAG_IS_DEFAULT(UseBarriersForVolatile)) {
UseBarriersForVolatile = (_features & CPU_DMB_ATOMICS) != 0;
}

View File

@ -3168,14 +3168,9 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
}
Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias);
Bytecodes::Code bc = method->java_code_at_bci(bci);
const bool callee_is_static = callee->is_loaded() && callee->is_static();
// Perform additional virtual call profiling for invokevirtual and
// invokeinterface bytecodes
if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
!callee_is_static && // required for optimized MH invokes
C1ProfileVirtualCalls) {
if (op->should_profile_receiver_type()) {
assert(op->recv()->is_single_cpu(), "recv must be allocated");
Register recv = op->recv()->as_register();
assert_different_registers(mdo, tmp1, recv);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,9 +25,9 @@
#ifndef CPU_ARM_VM_JNITYPES_ARM_HPP
#define CPU_ARM_VM_JNITYPES_ARM_HPP
#include "jni.h"
#include "memory/allocation.hpp"
#include "oops/oop.hpp"
#include "prims/jni.h"
// This file holds platform-dependent routines used to write primitive jni
// types to the array of arguments passed into JavaCalls::call

View File

@ -2899,6 +2899,7 @@ void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp)
ldr(tmp, Address(tmp, ConstMethod::constants_offset()));
ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes()));
ldr(mirror, Address(tmp, mirror_offset));
resolve_oop_handle(mirror);
}

View File

@ -42,10 +42,6 @@
#ifdef COMPILER2
#include "opto/runtime.hpp"
#endif
#ifdef SHARK
#include "compiler/compileBroker.hpp"
#include "shark/sharkCompiler.hpp"
#endif
#define __ masm->

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -2867,10 +2867,9 @@ class StubGenerator: public StubCodeGenerator {
// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR) except for callee_saved_regs.
void gen_write_ref_array_pre_barrier(Register addr, Register count, int callee_saved_regs) {
BarrierSet* bs = Universe::heap()->barrier_set();
if (bs->has_write_ref_pre_barrier()) {
assert(bs->has_write_ref_array_pre_opt(),
"Else unsupported barrier set.");
switch (bs->kind()) {
case BarrierSet::G1SATBCTLogging:
{
assert( addr->encoding() < callee_saved_regs, "addr must be saved");
assert(count->encoding() < callee_saved_regs, "count must be saved");
@ -2908,6 +2907,12 @@ class StubGenerator: public StubCodeGenerator {
__ pop(saved_regs | R9ifScratched);
#endif // AARCH64
}
case BarrierSet::CardTableForRS:
case BarrierSet::CardTableExtension:
break;
default:
ShouldNotReachHere();
}
}
#endif // INCLUDE_ALL_GCS

View File

@ -2844,6 +2844,19 @@ void TemplateTable::_return(TosState state) {
__ bind(skip_register_finalizer);
}
// Explicitly reset last_sp, for handling special case in TemplateInterpreter::deopt_reexecute_entry
#ifdef ASSERT
if (state == vtos) {
#ifndef AARCH64
__ mov(Rtemp, 0);
__ str(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
#else
__ restore_sp_after_call(Rtemp);
__ restore_stack_top();
#endif
}
#endif
// Narrow result if state is itos but result type is smaller.
// Need to narrow in the return bytecode rather than in generate_return_entry
// since compiled code callers expect the result to already be narrowed.
@ -2963,6 +2976,7 @@ void TemplateTable::load_field_cp_cache_entry(Register Rcache,
cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ ldr(Robj, Address(Robj, mirror_offset));
__ resolve_oop_handle(Robj);
}
}

View File

@ -517,6 +517,9 @@ class Assembler : public AbstractAssembler {
XXPERMDI_OPCODE= (60u << OPCODE_SHIFT | 10u << 3),
XXMRGHW_OPCODE = (60u << OPCODE_SHIFT | 18u << 3),
XXMRGLW_OPCODE = (60u << OPCODE_SHIFT | 50u << 3),
XXSPLTW_OPCODE = (60u << OPCODE_SHIFT | 164u << 2),
XXLXOR_OPCODE = (60u << OPCODE_SHIFT | 154u << 3),
XXLEQV_OPCODE = (60u << OPCODE_SHIFT | 186u << 3),
// Vector Permute and Formatting
VPKPX_OPCODE = (4u << OPCODE_SHIFT | 782u ),
@ -1125,6 +1128,7 @@ class Assembler : public AbstractAssembler {
static int vsplti_sim(int x) { return opp_u_field(x, 15, 11); } // for vsplti* instructions
static int vsldoi_shb(int x) { return opp_u_field(x, 25, 22); } // for vsldoi instruction
static int vcmp_rc( int x) { return opp_u_field(x, 21, 21); } // for vcmp* instructions
static int xxsplt_uim(int x) { return opp_u_field(x, 15, 14); } // for xxsplt* instructions
//static int xo1( int x) { return opp_u_field(x, 29, 21); }// is contained in our opcodes
//static int xo2( int x) { return opp_u_field(x, 30, 21); }// is contained in our opcodes
@ -1308,6 +1312,7 @@ class Assembler : public AbstractAssembler {
inline void li( Register d, int si16);
inline void lis( Register d, int si16);
inline void addir(Register d, int si16, Register a);
inline void subi( Register d, Register a, int si16);
static bool is_addi(int x) {
return ADDI_OPCODE == (x & ADDI_OPCODE_MASK);
@ -2154,6 +2159,11 @@ class Assembler : public AbstractAssembler {
inline void xxpermdi( VectorSRegister d, VectorSRegister a, VectorSRegister b, int dm);
inline void xxmrghw( VectorSRegister d, VectorSRegister a, VectorSRegister b);
inline void xxmrglw( VectorSRegister d, VectorSRegister a, VectorSRegister b);
inline void mtvsrd( VectorSRegister d, Register a);
inline void mtvsrwz( VectorSRegister d, Register a);
inline void xxspltw( VectorSRegister d, VectorSRegister b, int ui2);
inline void xxlxor( VectorSRegister d, VectorSRegister a, VectorSRegister b);
inline void xxleqv( VectorSRegister d, VectorSRegister a, VectorSRegister b);
// VSX Extended Mnemonics
inline void xxspltd( VectorSRegister d, VectorSRegister a, int x);
@ -2174,7 +2184,8 @@ class Assembler : public AbstractAssembler {
inline void vsbox( VectorRegister d, VectorRegister a);
// SHA (introduced with Power 8)
// Not yet implemented.
inline void vshasigmad(VectorRegister d, VectorRegister a, bool st, int six);
inline void vshasigmaw(VectorRegister d, VectorRegister a, bool st, int six);
// Vector Binary Polynomial Multiplication (introduced with Power 8)
inline void vpmsumb( VectorRegister d, VectorRegister a, VectorRegister b);
@ -2285,6 +2296,11 @@ class Assembler : public AbstractAssembler {
inline void lvsl( VectorRegister d, Register s2);
inline void lvsr( VectorRegister d, Register s2);
// Endianess specific concatenation of 2 loaded vectors.
inline void load_perm(VectorRegister perm, Register addr);
inline void vec_perm(VectorRegister first_dest, VectorRegister second, VectorRegister perm);
inline void vec_perm(VectorRegister dest, VectorRegister first, VectorRegister second, VectorRegister perm);
// RegisterOrConstant versions.
// These emitters choose between the versions using two registers and
// those with register and immediate, depending on the content of roc.

View File

@ -164,6 +164,7 @@ inline void Assembler::divwo_( Register d, Register a, Register b) { emit_int32
inline void Assembler::li( Register d, int si16) { Assembler::addi_r0ok( d, R0, si16); }
inline void Assembler::lis( Register d, int si16) { Assembler::addis_r0ok(d, R0, si16); }
inline void Assembler::addir(Register d, int si16, Register a) { Assembler::addi(d, a, si16); }
inline void Assembler::subi( Register d, Register a, int si16) { Assembler::addi(d, a, -si16); }
// PPC 1, section 3.3.9, Fixed-Point Compare Instructions
inline void Assembler::cmpi( ConditionRegister f, int l, Register a, int si16) { emit_int32( CMPI_OPCODE | bf(f) | l10(l) | ra(a) | simm(si16,16)); }
@ -760,8 +761,13 @@ inline void Assembler::lvsr( VectorRegister d, Register s1, Register s2) { emit
// Vector-Scalar (VSX) instructions.
inline void Assembler::lxvd2x( VectorSRegister d, Register s1) { emit_int32( LXVD2X_OPCODE | vsrt(d) | ra(0) | rb(s1)); }
inline void Assembler::lxvd2x( VectorSRegister d, Register s1, Register s2) { emit_int32( LXVD2X_OPCODE | vsrt(d) | ra0mem(s1) | rb(s2)); }
inline void Assembler::stxvd2x( VectorSRegister d, Register s1) { emit_int32( STXVD2X_OPCODE | vsrt(d) | ra(0) | rb(s1)); }
inline void Assembler::stxvd2x( VectorSRegister d, Register s1, Register s2) { emit_int32( STXVD2X_OPCODE | vsrt(d) | ra0mem(s1) | rb(s2)); }
inline void Assembler::stxvd2x( VectorSRegister d, Register s1) { emit_int32( STXVD2X_OPCODE | vsrs(d) | ra(0) | rb(s1)); }
inline void Assembler::stxvd2x( VectorSRegister d, Register s1, Register s2) { emit_int32( STXVD2X_OPCODE | vsrs(d) | ra0mem(s1) | rb(s2)); }
inline void Assembler::mtvsrd( VectorSRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vsrt(d) | ra(a)); }
inline void Assembler::mtvsrwz( VectorSRegister d, Register a) { emit_int32( MTVSRWZ_OPCODE | vsrt(d) | ra(a)); }
inline void Assembler::xxspltw( VectorSRegister d, VectorSRegister b, int ui2) { emit_int32( XXSPLTW_OPCODE | vsrt(d) | vsrb(b) | xxsplt_uim(uimm(ui2,2))); }
inline void Assembler::xxlxor( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XXLXOR_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
inline void Assembler::xxleqv( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XXLEQV_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
inline void Assembler::mtvrd( VectorRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
inline void Assembler::mfvrd( Register a, VectorRegister d) { emit_int32( MFVSRD_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
inline void Assembler::mtvrwz( VectorRegister d, Register a) { emit_int32( MTVSRWZ_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
@ -925,7 +931,8 @@ inline void Assembler::vncipherlast(VectorRegister d, VectorRegister a, VectorRe
inline void Assembler::vsbox( VectorRegister d, VectorRegister a) { emit_int32( VSBOX_OPCODE | vrt(d) | vra(a) ); }
// SHA (introduced with Power 8)
// Not yet implemented.
inline void Assembler::vshasigmad(VectorRegister d, VectorRegister a, bool st, int six) { emit_int32( VSHASIGMAD_OPCODE | vrt(d) | vra(a) | vst(st) | vsix(six)); }
inline void Assembler::vshasigmaw(VectorRegister d, VectorRegister a, bool st, int six) { emit_int32( VSHASIGMAW_OPCODE | vrt(d) | vra(a) | vst(st) | vsix(six)); }
// Vector Binary Polynomial Multiplication (introduced with Power 8)
inline void Assembler::vpmsumb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPMSUMB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
@ -1034,6 +1041,30 @@ inline void Assembler::stvxl( VectorRegister d, Register s2) { emit_int32( STVXL
inline void Assembler::lvsl( VectorRegister d, Register s2) { emit_int32( LVSL_OPCODE | vrt(d) | rb(s2)); }
inline void Assembler::lvsr( VectorRegister d, Register s2) { emit_int32( LVSR_OPCODE | vrt(d) | rb(s2)); }
inline void Assembler::load_perm(VectorRegister perm, Register addr) {
#if defined(VM_LITTLE_ENDIAN)
lvsr(perm, addr);
#else
lvsl(perm, addr);
#endif
}
inline void Assembler::vec_perm(VectorRegister first_dest, VectorRegister second, VectorRegister perm) {
#if defined(VM_LITTLE_ENDIAN)
vperm(first_dest, second, first_dest, perm);
#else
vperm(first_dest, first_dest, second, perm);
#endif
}
inline void Assembler::vec_perm(VectorRegister dest, VectorRegister first, VectorRegister second, VectorRegister perm) {
#if defined(VM_LITTLE_ENDIAN)
vperm(dest, second, first, perm);
#else
vperm(dest, first, second, perm);
#endif
}
inline void Assembler::load_const(Register d, void* x, Register tmp) {
load_const(d, (long)x, tmp);
}

View File

@ -2774,13 +2774,9 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
__ add_const_optimized(mdo, mdo, mdo_offset_bias, R0);
}
Bytecodes::Code bc = method->java_code_at_bci(bci);
const bool callee_is_static = callee->is_loaded() && callee->is_static();
// Perform additional virtual call profiling for invokevirtual and
// invokeinterface bytecodes.
if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
!callee_is_static && // Required for optimized MH invokes.
C1ProfileVirtualCalls) {
// invokeinterface bytecodes
if (op->should_profile_receiver_type()) {
assert(op->recv()->is_single_cpu(), "recv must be allocated");
Register recv = op->recv()->as_register();
assert_different_registers(mdo, tmp1, recv);

View File

@ -32,7 +32,7 @@
// Sets the default values for platform dependent flags used by the runtime system.
// (see globals.hpp)
define_pd_global(bool, ShareVtableStubs, false); // Improves performance markedly for mtrt and compress.
define_pd_global(bool, ShareVtableStubs, true);
define_pd_global(bool, NeedsDeoptSuspend, false); // Only register window machines need this.
@ -103,6 +103,9 @@ define_pd_global(intx, InitArrayShortSize, 9*BytesPerLong);
"CPU Version: x for PowerX. Currently recognizes Power5 to " \
"Power8. Default is 0. Newer CPUs will be recognized as Power8.") \
\
product(bool, SuperwordUseVSX, false, \
"Use Power8 VSX instructions for superword optimization.") \
\
/* Reoptimize code-sequences of calls at runtime, e.g. replace an */ \
/* indirect call by a direct call. */ \
product(bool, ReoptimizeCallSequences, true, \

View File

@ -863,7 +863,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
//
// markOop displaced_header = obj->mark().set_unlocked();
// monitor->lock()->set_displaced_header(displaced_header);
// if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
// if (Atomic::cmpxchg(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
// // We stored the monitor address into the object's mark word.
// } else if (THREAD->is_lock_owned((address)displaced_header))
// // Simple recursive case.
@ -901,7 +901,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
std(displaced_header, BasicObjectLock::lock_offset_in_bytes() +
BasicLock::displaced_header_offset_in_bytes(), monitor);
// if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
// if (Atomic::cmpxchg(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
// Store stack address of the BasicObjectLock (this is monitor) into object.
addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes());
@ -977,7 +977,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor, bool check_for_e
// if ((displaced_header = monitor->displaced_header()) == NULL) {
// // Recursive unlock. Mark the monitor unlocked by setting the object field to NULL.
// monitor->set_obj(NULL);
// } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) {
// } else if (Atomic::cmpxchg(displaced_header, obj->mark_addr(), monitor) == monitor) {
// // We swapped the unlocked mark in displaced_header into the object's mark word.
// monitor->set_obj(NULL);
// } else {
@ -1010,7 +1010,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor, bool check_for_e
cmpdi(CCR0, displaced_header, 0);
beq(CCR0, free_slot); // recursive unlock
// } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) {
// } else if (Atomic::cmpxchg(displaced_header, obj->mark_addr(), monitor) == monitor) {
// // We swapped the unlocked mark in displaced_header into the object's mark word.
// monitor->set_obj(NULL);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2002, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2013 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -26,9 +26,9 @@
#ifndef CPU_PPC_VM_JNITYPES_PPC_HPP
#define CPU_PPC_VM_JNITYPES_PPC_HPP
#include "jni.h"
#include "memory/allocation.hpp"
#include "oops/oop.hpp"
#include "prims/jni.h"
// This file holds platform-dependent routines used to write primitive
// jni types to the array of arguments passed into JavaCalls::call.

View File

@ -129,7 +129,7 @@ void MacroAssembler::calculate_address_from_global_toc(Register dst, address add
}
}
int MacroAssembler::patch_calculate_address_from_global_toc_at(address a, address bound, address addr) {
address MacroAssembler::patch_calculate_address_from_global_toc_at(address a, address bound, address addr) {
const int offset = MacroAssembler::offset_to_global_toc(addr);
const address inst2_addr = a;
@ -155,7 +155,7 @@ int MacroAssembler::patch_calculate_address_from_global_toc_at(address a, addres
assert(is_addis(inst1) && inv_ra_field(inst1) == 29 /* R29 */, "source must be global TOC");
set_imm((int *)inst1_addr, MacroAssembler::largeoffset_si16_si16_hi(offset));
set_imm((int *)inst2_addr, MacroAssembler::largeoffset_si16_si16_lo(offset));
return (int)((intptr_t)addr - (intptr_t)inst1_addr);
return inst1_addr;
}
address MacroAssembler::get_address_of_calculate_address_from_global_toc_at(address a, address bound) {
@ -201,7 +201,7 @@ address MacroAssembler::get_address_of_calculate_address_from_global_toc_at(addr
// clrldi rx = rx & 0xFFFFffff // clearMS32b, optional
// ori rx = rx | const.lo
// Clrldi will be passed by.
int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
address MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
assert(UseCompressedOops, "Should only patch compressed oops");
const address inst2_addr = a;
@ -227,7 +227,7 @@ int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop dat
set_imm((int *)inst1_addr, (short)(xc)); // see enc_load_con_narrow_hi/_lo
set_imm((int *)inst2_addr, (xd)); // unsigned int
return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr);
return inst1_addr;
}
// Get compressed oop or klass constant.
@ -3382,6 +3382,7 @@ void MacroAssembler::load_mirror_from_const_method(Register mirror, Register con
ld(mirror, in_bytes(ConstMethod::constants_offset()), const_method);
ld(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
ld(mirror, in_bytes(Klass::java_mirror_offset()), mirror);
resolve_oop_handle(mirror);
}
// Clear Array
@ -5234,6 +5235,40 @@ void MacroAssembler::multiply_128_x_128_loop(Register x_xstart,
bind(L_post_third_loop_done);
} // multiply_128_x_128_loop
void MacroAssembler::muladd(Register out, Register in,
Register offset, Register len, Register k,
Register tmp1, Register tmp2, Register carry) {
// Labels
Label LOOP, SKIP;
// Make sure length is positive.
cmpdi (CCR0, len, 0);
// Prepare variables
subi (offset, offset, 4);
li (carry, 0);
ble (CCR0, SKIP);
mtctr (len);
subi (len, len, 1 );
sldi (len, len, 2 );
// Main loop
bind(LOOP);
lwzx (tmp1, len, in );
lwzx (tmp2, offset, out );
mulld (tmp1, tmp1, k );
add (tmp2, carry, tmp2 );
add (tmp2, tmp1, tmp2 );
stwx (tmp2, offset, out );
srdi (carry, tmp2, 32 );
subi (offset, offset, 4 );
subi (len, len, 4 );
bdnz (LOOP);
bind(SKIP);
}
void MacroAssembler::multiply_to_len(Register x, Register xlen,
Register y, Register ylen,
Register z, Register zlen,

View File

@ -105,13 +105,15 @@ class MacroAssembler: public Assembler {
};
inline static bool is_calculate_address_from_global_toc_at(address a, address bound);
static int patch_calculate_address_from_global_toc_at(address a, address addr, address bound);
// Returns address of first instruction in sequence.
static address patch_calculate_address_from_global_toc_at(address a, address bound, address addr);
static address get_address_of_calculate_address_from_global_toc_at(address a, address addr);
#ifdef _LP64
// Patch narrow oop constant.
inline static bool is_set_narrow_oop(address a, address bound);
static int patch_set_narrow_oop(address a, address bound, narrowOop data);
// Returns address of first instruction in sequence.
static address patch_set_narrow_oop(address a, address bound, narrowOop data);
static narrowOop get_narrow_oop(address a, address bound);
#endif
@ -813,6 +815,8 @@ class MacroAssembler: public Assembler {
Register yz_idx, Register idx, Register carry,
Register product_high, Register product,
Register carry2, Register tmp);
void muladd(Register out, Register in, Register offset, Register len, Register k,
Register tmp1, Register tmp2, Register carry);
void multiply_to_len(Register x, Register xlen,
Register y, Register ylen,
Register z, Register zlen,
@ -862,6 +866,40 @@ class MacroAssembler: public Assembler {
void kernel_crc32_singleByteReg(Register crc, Register val, Register table,
bool invertCRC);
// SHA-2 auxiliary functions and public interfaces
private:
void sha256_deque(const VectorRegister src,
const VectorRegister dst1, const VectorRegister dst2, const VectorRegister dst3);
void sha256_load_h_vec(const VectorRegister a, const VectorRegister e, const Register hptr);
void sha256_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
void sha256_load_w_plus_k_vec(const Register buf_in, const VectorRegister* ws,
const int total_ws, const Register k, const VectorRegister* kpws,
const int total_kpws);
void sha256_calc_4w(const VectorRegister w0, const VectorRegister w1,
const VectorRegister w2, const VectorRegister w3, const VectorRegister kpw0,
const VectorRegister kpw1, const VectorRegister kpw2, const VectorRegister kpw3,
const Register j, const Register k);
void sha256_update_sha_state(const VectorRegister a, const VectorRegister b,
const VectorRegister c, const VectorRegister d, const VectorRegister e,
const VectorRegister f, const VectorRegister g, const VectorRegister h,
const Register hptr);
void sha512_load_w_vec(const Register buf_in, const VectorRegister* ws, const int total_ws);
void sha512_update_sha_state(const Register state, const VectorRegister* hs, const int total_hs);
void sha512_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
void sha512_load_h_vec(const Register state, const VectorRegister* hs, const int total_hs);
void sha512_calc_2w(const VectorRegister w0, const VectorRegister w1,
const VectorRegister w2, const VectorRegister w3,
const VectorRegister w4, const VectorRegister w5,
const VectorRegister w6, const VectorRegister w7,
const VectorRegister kpw0, const VectorRegister kpw1, const Register j,
const VectorRegister vRb, const Register k);
public:
void sha256(bool multi_block);
void sha512(bool multi_block);
//
// Debugging
//

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -221,13 +221,13 @@ address NativeMovConstReg::set_data_plain(intptr_t data, CodeBlob *cb) {
// A calculation relative to the global TOC.
if (MacroAssembler::get_address_of_calculate_address_from_global_toc_at(addr, cb->content_begin()) !=
(address)data) {
const int invalidated_range =
MacroAssembler::patch_calculate_address_from_global_toc_at(addr, cb->content_begin(),
const address inst2_addr = addr;
const address inst1_addr =
MacroAssembler::patch_calculate_address_from_global_toc_at(inst2_addr, cb->content_begin(),
(address)data);
const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
// FIXME:
const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
ICache::ppc64_flush_icache_bytes(start, range);
assert(inst1_addr != NULL && inst1_addr < inst2_addr, "first instruction must be found");
const int range = inst2_addr - inst1_addr + BytesPerInstWord;
ICache::ppc64_flush_icache_bytes(inst1_addr, range);
}
next_address = addr + 1 * BytesPerInstWord;
} else if (MacroAssembler::is_load_const_at(addr)) {
@ -288,15 +288,15 @@ void NativeMovConstReg::set_data(intptr_t data) {
}
void NativeMovConstReg::set_narrow_oop(narrowOop data, CodeBlob *code /* = NULL */) {
address addr = addr_at(0);
address inst2_addr = addr_at(0);
CodeBlob* cb = (code) ? code : CodeCache::find_blob(instruction_address());
if (MacroAssembler::get_narrow_oop(addr, cb->content_begin()) == (long)data) return;
const int invalidated_range =
MacroAssembler::patch_set_narrow_oop(addr, cb->content_begin(), (long)data);
const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
// FIXME:
const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
ICache::ppc64_flush_icache_bytes(start, range);
if (MacroAssembler::get_narrow_oop(inst2_addr, cb->content_begin()) == (long)data)
return;
const address inst1_addr =
MacroAssembler::patch_set_narrow_oop(inst2_addr, cb->content_begin(), (long)data);
assert(inst1_addr != NULL && inst1_addr < inst2_addr, "first instruction must be found");
const int range = inst2_addr - inst1_addr + BytesPerInstWord;
ICache::ppc64_flush_icache_bytes(inst1_addr, range);
}
// Do not use an assertion here. Let clients decide whether they only

View File

@ -254,6 +254,73 @@ register %{
reg_def SR_SPEFSCR(SOC, SOC, Op_RegP, 4, SR_SPEFSCR->as_VMReg()); // v
reg_def SR_PPR( SOC, SOC, Op_RegP, 5, SR_PPR->as_VMReg()); // v
// ----------------------------
// Vector-Scalar Registers
// ----------------------------
reg_def VSR0 ( SOC, SOC, Op_VecX, 0, NULL);
reg_def VSR1 ( SOC, SOC, Op_VecX, 1, NULL);
reg_def VSR2 ( SOC, SOC, Op_VecX, 2, NULL);
reg_def VSR3 ( SOC, SOC, Op_VecX, 3, NULL);
reg_def VSR4 ( SOC, SOC, Op_VecX, 4, NULL);
reg_def VSR5 ( SOC, SOC, Op_VecX, 5, NULL);
reg_def VSR6 ( SOC, SOC, Op_VecX, 6, NULL);
reg_def VSR7 ( SOC, SOC, Op_VecX, 7, NULL);
reg_def VSR8 ( SOC, SOC, Op_VecX, 8, NULL);
reg_def VSR9 ( SOC, SOC, Op_VecX, 9, NULL);
reg_def VSR10 ( SOC, SOC, Op_VecX, 10, NULL);
reg_def VSR11 ( SOC, SOC, Op_VecX, 11, NULL);
reg_def VSR12 ( SOC, SOC, Op_VecX, 12, NULL);
reg_def VSR13 ( SOC, SOC, Op_VecX, 13, NULL);
reg_def VSR14 ( SOC, SOC, Op_VecX, 14, NULL);
reg_def VSR15 ( SOC, SOC, Op_VecX, 15, NULL);
reg_def VSR16 ( SOC, SOC, Op_VecX, 16, NULL);
reg_def VSR17 ( SOC, SOC, Op_VecX, 17, NULL);
reg_def VSR18 ( SOC, SOC, Op_VecX, 18, NULL);
reg_def VSR19 ( SOC, SOC, Op_VecX, 19, NULL);
reg_def VSR20 ( SOC, SOC, Op_VecX, 20, NULL);
reg_def VSR21 ( SOC, SOC, Op_VecX, 21, NULL);
reg_def VSR22 ( SOC, SOC, Op_VecX, 22, NULL);
reg_def VSR23 ( SOC, SOC, Op_VecX, 23, NULL);
reg_def VSR24 ( SOC, SOC, Op_VecX, 24, NULL);
reg_def VSR25 ( SOC, SOC, Op_VecX, 25, NULL);
reg_def VSR26 ( SOC, SOC, Op_VecX, 26, NULL);
reg_def VSR27 ( SOC, SOC, Op_VecX, 27, NULL);
reg_def VSR28 ( SOC, SOC, Op_VecX, 28, NULL);
reg_def VSR29 ( SOC, SOC, Op_VecX, 29, NULL);
reg_def VSR30 ( SOC, SOC, Op_VecX, 30, NULL);
reg_def VSR31 ( SOC, SOC, Op_VecX, 31, NULL);
reg_def VSR32 ( SOC, SOC, Op_VecX, 32, NULL);
reg_def VSR33 ( SOC, SOC, Op_VecX, 33, NULL);
reg_def VSR34 ( SOC, SOC, Op_VecX, 34, NULL);
reg_def VSR35 ( SOC, SOC, Op_VecX, 35, NULL);
reg_def VSR36 ( SOC, SOC, Op_VecX, 36, NULL);
reg_def VSR37 ( SOC, SOC, Op_VecX, 37, NULL);
reg_def VSR38 ( SOC, SOC, Op_VecX, 38, NULL);
reg_def VSR39 ( SOC, SOC, Op_VecX, 39, NULL);
reg_def VSR40 ( SOC, SOC, Op_VecX, 40, NULL);
reg_def VSR41 ( SOC, SOC, Op_VecX, 41, NULL);
reg_def VSR42 ( SOC, SOC, Op_VecX, 42, NULL);
reg_def VSR43 ( SOC, SOC, Op_VecX, 43, NULL);
reg_def VSR44 ( SOC, SOC, Op_VecX, 44, NULL);
reg_def VSR45 ( SOC, SOC, Op_VecX, 45, NULL);
reg_def VSR46 ( SOC, SOC, Op_VecX, 46, NULL);
reg_def VSR47 ( SOC, SOC, Op_VecX, 47, NULL);
reg_def VSR48 ( SOC, SOC, Op_VecX, 48, NULL);
reg_def VSR49 ( SOC, SOC, Op_VecX, 49, NULL);
reg_def VSR50 ( SOC, SOC, Op_VecX, 50, NULL);
reg_def VSR51 ( SOC, SOC, Op_VecX, 51, NULL);
reg_def VSR52 ( SOC, SOC, Op_VecX, 52, NULL);
reg_def VSR53 ( SOC, SOC, Op_VecX, 53, NULL);
reg_def VSR54 ( SOC, SOC, Op_VecX, 54, NULL);
reg_def VSR55 ( SOC, SOC, Op_VecX, 55, NULL);
reg_def VSR56 ( SOC, SOC, Op_VecX, 56, NULL);
reg_def VSR57 ( SOC, SOC, Op_VecX, 57, NULL);
reg_def VSR58 ( SOC, SOC, Op_VecX, 58, NULL);
reg_def VSR59 ( SOC, SOC, Op_VecX, 59, NULL);
reg_def VSR60 ( SOC, SOC, Op_VecX, 60, NULL);
reg_def VSR61 ( SOC, SOC, Op_VecX, 61, NULL);
reg_def VSR62 ( SOC, SOC, Op_VecX, 62, NULL);
reg_def VSR63 ( SOC, SOC, Op_VecX, 63, NULL);
// ----------------------------
// Specify priority of register selection within phases of register
@ -385,6 +452,73 @@ alloc_class chunk2 (
);
alloc_class chunk3 (
VSR0,
VSR1,
VSR2,
VSR3,
VSR4,
VSR5,
VSR6,
VSR7,
VSR8,
VSR9,
VSR10,
VSR11,
VSR12,
VSR13,
VSR14,
VSR15,
VSR16,
VSR17,
VSR18,
VSR19,
VSR20,
VSR21,
VSR22,
VSR23,
VSR24,
VSR25,
VSR26,
VSR27,
VSR28,
VSR29,
VSR30,
VSR31,
VSR32,
VSR33,
VSR34,
VSR35,
VSR36,
VSR37,
VSR38,
VSR39,
VSR40,
VSR41,
VSR42,
VSR43,
VSR44,
VSR45,
VSR46,
VSR47,
VSR48,
VSR49,
VSR50,
VSR51,
VSR52,
VSR53,
VSR54,
VSR55,
VSR56,
VSR57,
VSR58,
VSR59,
VSR60,
VSR61,
VSR62,
VSR63
);
alloc_class chunk4 (
// special registers
// These registers are not allocated, but used for nodes generated by postalloc expand.
SR_XER,
@ -769,6 +903,45 @@ reg_class dbl_reg(
F31, F31_H // nv!
);
// ----------------------------
// Vector-Scalar Register Class
// ----------------------------
reg_class vs_reg(
VSR32,
VSR33,
VSR34,
VSR35,
VSR36,
VSR37,
VSR38,
VSR39,
VSR40,
VSR41,
VSR42,
VSR43,
VSR44,
VSR45,
VSR46,
VSR47,
VSR48,
VSR49,
VSR50,
VSR51
// VSR52, // nv!
// VSR53, // nv!
// VSR54, // nv!
// VSR55, // nv!
// VSR56, // nv!
// VSR57, // nv!
// VSR58, // nv!
// VSR59, // nv!
// VSR60, // nv!
// VSR61, // nv!
// VSR62, // nv!
// VSR63 // nv!
);
%}
//----------DEFINITION BLOCK---------------------------------------------------
@ -1502,7 +1675,7 @@ static enum RC rc_class(OptoReg::Name reg) {
if (reg < 64+64) return rc_float;
// Between float regs & stack are the flags regs.
assert(OptoReg::is_stack(reg), "blow up if spilling flags");
assert(OptoReg::is_stack(reg) || reg < 64+64+64, "blow up if spilling flags");
return rc_stack;
}
@ -2048,15 +2221,25 @@ const bool Matcher::convL2FSupported(void) {
// Vector width in bytes.
const int Matcher::vector_width_in_bytes(BasicType bt) {
if (SuperwordUseVSX) {
assert(MaxVectorSize == 16, "");
return 16;
} else {
assert(MaxVectorSize == 8, "");
return 8;
}
}
// Vector ideal reg.
const uint Matcher::vector_ideal_reg(int size) {
if (SuperwordUseVSX) {
assert(MaxVectorSize == 16 && size == 16, "");
return Op_VecX;
} else {
assert(MaxVectorSize == 8 && size == 8, "");
return Op_RegL;
}
}
const uint Matcher::vector_shift_count_ideal_reg(int size) {
fatal("vector shift is not supported");
@ -2075,7 +2258,7 @@ const int Matcher::min_vector_size(const BasicType bt) {
// PPC doesn't support misaligned vectors store/load.
const bool Matcher::misaligned_vectors_ok() {
return false;
return !AlignVector; // can be changed by flag
}
// PPC AES support not yet implemented
@ -2217,10 +2400,31 @@ const MachRegisterNumbers farg_reg[13] = {
F13_num
};
const MachRegisterNumbers vsarg_reg[64] = {
VSR0_num, VSR1_num, VSR2_num, VSR3_num,
VSR4_num, VSR5_num, VSR6_num, VSR7_num,
VSR8_num, VSR9_num, VSR10_num, VSR11_num,
VSR12_num, VSR13_num, VSR14_num, VSR15_num,
VSR16_num, VSR17_num, VSR18_num, VSR19_num,
VSR20_num, VSR21_num, VSR22_num, VSR23_num,
VSR24_num, VSR23_num, VSR24_num, VSR25_num,
VSR28_num, VSR29_num, VSR30_num, VSR31_num,
VSR32_num, VSR33_num, VSR34_num, VSR35_num,
VSR36_num, VSR37_num, VSR38_num, VSR39_num,
VSR40_num, VSR41_num, VSR42_num, VSR43_num,
VSR44_num, VSR45_num, VSR46_num, VSR47_num,
VSR48_num, VSR49_num, VSR50_num, VSR51_num,
VSR52_num, VSR53_num, VSR54_num, VSR55_num,
VSR56_num, VSR57_num, VSR58_num, VSR59_num,
VSR60_num, VSR61_num, VSR62_num, VSR63_num
};
const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]);
const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]);
const int num_vsarg_registers = sizeof(vsarg_reg) / sizeof(vsarg_reg[0]);
// Return whether or not this register is ever used as an argument. This
// function is used on startup to build the trampoline stubs in generateOptoStub.
// Registers not mentioned will be killed by the VM call in the trampoline, and
@ -2552,6 +2756,115 @@ loadConLNodesTuple loadConLNodesTuple_create(PhaseRegAlloc *ra_, Node *toc, immL
return nodes;
}
typedef struct {
loadConL_hiNode *_large_hi;
loadConL_loNode *_large_lo;
mtvsrdNode *_moved;
xxspltdNode *_replicated;
loadConLNode *_small;
MachNode *_last;
} loadConLReplicatedNodesTuple;
loadConLReplicatedNodesTuple loadConLReplicatedNodesTuple_create(Compile *C, PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc,
vecXOper *dst, immI_0Oper *zero,
OptoReg::Name reg_second, OptoReg::Name reg_first,
OptoReg::Name reg_vec_second, OptoReg::Name reg_vec_first) {
loadConLReplicatedNodesTuple nodes;
const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
if (large_constant_pool) {
// Create new nodes.
loadConL_hiNode *m1 = new loadConL_hiNode();
loadConL_loNode *m2 = new loadConL_loNode();
mtvsrdNode *m3 = new mtvsrdNode();
xxspltdNode *m4 = new xxspltdNode();
// inputs for new nodes
m1->add_req(NULL, toc);
m2->add_req(NULL, m1);
m3->add_req(NULL, m2);
m4->add_req(NULL, m3);
// operands for new nodes
m1->_opnds[0] = new iRegLdstOper(); // dst
m1->_opnds[1] = immSrc; // src
m1->_opnds[2] = new iRegPdstOper(); // toc
m2->_opnds[0] = new iRegLdstOper(); // dst
m2->_opnds[1] = immSrc; // src
m2->_opnds[2] = new iRegLdstOper(); // base
m3->_opnds[0] = new vecXOper(); // dst
m3->_opnds[1] = new iRegLdstOper(); // src
m4->_opnds[0] = new vecXOper(); // dst
m4->_opnds[1] = new vecXOper(); // src
m4->_opnds[2] = zero;
// Initialize ins_attrib TOC fields.
m1->_const_toc_offset = -1;
m2->_const_toc_offset_hi_node = m1;
// Initialize ins_attrib instruction offset.
m1->_cbuf_insts_offset = -1;
// register allocation for new nodes
ra_->set_pair(m1->_idx, reg_second, reg_first);
ra_->set_pair(m2->_idx, reg_second, reg_first);
ra_->set1(m3->_idx, reg_second);
ra_->set2(m3->_idx, reg_vec_first);
ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
// Create result.
nodes._large_hi = m1;
nodes._large_lo = m2;
nodes._moved = m3;
nodes._replicated = m4;
nodes._small = NULL;
nodes._last = nodes._replicated;
assert(m2->bottom_type()->isa_long(), "must be long");
} else {
loadConLNode *m2 = new loadConLNode();
mtvsrdNode *m3 = new mtvsrdNode();
xxspltdNode *m4 = new xxspltdNode();
// inputs for new nodes
m2->add_req(NULL, toc);
// operands for new nodes
m2->_opnds[0] = new iRegLdstOper(); // dst
m2->_opnds[1] = immSrc; // src
m2->_opnds[2] = new iRegPdstOper(); // toc
m3->_opnds[0] = new vecXOper(); // dst
m3->_opnds[1] = new iRegLdstOper(); // src
m4->_opnds[0] = new vecXOper(); // dst
m4->_opnds[1] = new vecXOper(); // src
m4->_opnds[2] = zero;
// Initialize ins_attrib instruction offset.
m2->_cbuf_insts_offset = -1;
ra_->set1(m3->_idx, reg_second);
ra_->set2(m3->_idx, reg_vec_first);
ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
// register allocation for new nodes
ra_->set_pair(m2->_idx, reg_second, reg_first);
// Create result.
nodes._large_hi = NULL;
nodes._large_lo = NULL;
nodes._small = m2;
nodes._moved = m3;
nodes._replicated = m4;
nodes._last = nodes._replicated;
assert(m2->bottom_type()->isa_long(), "must be long");
}
return nodes;
}
%} // source
encode %{
@ -3212,6 +3525,27 @@ encode %{
assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
%}
enc_class postalloc_expand_load_replF_constant_vsx(vecX dst, immF src, iRegLdst toc) %{
// Create new nodes.
// Make an operand with the bit pattern to load as float.
immLOper *op_repl = new immLOper((jlong)replicate_immF(op_src->constantF()));
immI_0Oper *op_zero = new immI_0Oper(0);
loadConLReplicatedNodesTuple loadConLNodes =
loadConLReplicatedNodesTuple_create(C, ra_, n_toc, op_repl, op_dst, op_zero,
OptoReg::Name(R20_H_num), OptoReg::Name(R20_num),
OptoReg::Name(VSR11_num), OptoReg::Name(VSR10_num));
// Push new nodes.
if (loadConLNodes._large_hi) { nodes->push(loadConLNodes._large_hi); }
if (loadConLNodes._large_lo) { nodes->push(loadConLNodes._large_lo); }
if (loadConLNodes._moved) { nodes->push(loadConLNodes._moved); }
if (loadConLNodes._last) { nodes->push(loadConLNodes._last); }
assert(nodes->length() >= 1, "must have created at least 1 node");
%}
// This enc_class is needed so that scheduler gets proper
// input mapping for latency computation.
enc_class enc_poll(immI dst, iRegLdst poll) %{
@ -3840,6 +4174,14 @@ ins_attrib ins_field_load_ic_node(0);
//
// Formats are generated automatically for constants and base registers.
operand vecX() %{
constraint(ALLOC_IN_RC(vs_reg));
match(VecX);
format %{ %}
interface(REG_INTER);
%}
//----------Simple Operands----------------------------------------------------
// Immediate Operands
@ -5372,6 +5714,20 @@ instruct loadV8(iRegLdst dst, memoryAlg4 mem) %{
ins_pipe(pipe_class_memory);
%}
// Load Aligned Packed Byte
instruct loadV16(vecX dst, indirect mem) %{
predicate(n->as_LoadVector()->memory_size() == 16);
match(Set dst (LoadVector mem));
ins_cost(MEMORY_REF_COST);
format %{ "LXVD2X $dst, $mem \t// load 16-byte Vector" %}
size(4);
ins_encode %{
__ lxvd2x($dst$$VectorSRegister, $mem$$Register);
%}
ins_pipe(pipe_class_default);
%}
// Load Range, range = array length (=jint)
instruct loadRange(iRegIdst dst, memory mem) %{
match(Set dst (LoadRange mem));
@ -6368,6 +6724,20 @@ instruct storeA8B(memoryAlg4 mem, iRegLsrc src) %{
ins_pipe(pipe_class_memory);
%}
// Store Packed Byte long register to memory
instruct storeV16(indirect mem, vecX src) %{
predicate(n->as_StoreVector()->memory_size() == 16);
match(Set mem (StoreVector mem src));
ins_cost(MEMORY_REF_COST);
format %{ "STXVD2X $mem, $src \t// store 16-byte Vector" %}
size(4);
ins_encode %{
__ stxvd2x($src$$VectorSRegister, $mem$$Register);
%}
ins_pipe(pipe_class_default);
%}
// Store Compressed Oop
instruct storeN(memory dst, iRegN_P2N src) %{
match(Set dst (StoreN dst src));
@ -13239,6 +13609,26 @@ instruct storeS_reversed(iRegIsrc src, indirect mem) %{
ins_pipe(pipe_class_default);
%}
instruct mtvsrwz(vecX temp1, iRegIsrc src) %{
effect(DEF temp1, USE src);
size(4);
ins_encode %{
__ mtvsrwz($temp1$$VectorSRegister, $src$$Register);
%}
ins_pipe(pipe_class_default);
%}
instruct xxspltw(vecX dst, vecX src, immI8 imm1) %{
effect(DEF dst, USE src, USE imm1);
size(4);
ins_encode %{
__ xxspltw($dst$$VectorSRegister, $src$$VectorSRegister, $imm1$$constant);
%}
ins_pipe(pipe_class_default);
%}
//---------- Replicate Vector Instructions ------------------------------------
// Insrdi does replicate if src == dst.
@ -13318,6 +13708,46 @@ instruct repl8B_immIminus1(iRegLdst dst, immI_minus1 src) %{
ins_pipe(pipe_class_default);
%}
instruct repl16B_reg_Ex(vecX dst, iRegIsrc src) %{
match(Set dst (ReplicateB src));
predicate(n->as_Vector()->length() == 16);
expand %{
iRegLdst tmpL;
vecX tmpV;
immI8 imm1 %{ (int) 1 %}
moveReg(tmpL, src);
repl56(tmpL);
repl48(tmpL);
mtvsrwz(tmpV, tmpL);
xxspltw(dst, tmpV, imm1);
%}
%}
instruct repl16B_immI0(vecX dst, immI_0 zero) %{
match(Set dst (ReplicateB zero));
predicate(n->as_Vector()->length() == 16);
format %{ "XXLXOR $dst, $zero \t// replicate16B" %}
size(4);
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl16B_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (ReplicateB src));
predicate(n->as_Vector()->length() == 16);
format %{ "XXLEQV $dst, $src \t// replicate16B" %}
size(4);
ins_encode %{
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl4S_reg_Ex(iRegLdst dst, iRegIsrc src) %{
match(Set dst (ReplicateS src));
predicate(n->as_Vector()->length() == 4);
@ -13352,6 +13782,46 @@ instruct repl4S_immIminus1(iRegLdst dst, immI_minus1 src) %{
ins_pipe(pipe_class_default);
%}
instruct repl8S_reg_Ex(vecX dst, iRegIsrc src) %{
match(Set dst (ReplicateS src));
predicate(n->as_Vector()->length() == 8);
expand %{
iRegLdst tmpL;
vecX tmpV;
immI8 zero %{ (int) 0 %}
moveReg(tmpL, src);
repl48(tmpL);
repl32(tmpL);
mtvsrd(tmpV, tmpL);
xxpermdi(dst, tmpV, tmpV, zero);
%}
%}
instruct repl8S_immI0(vecX dst, immI_0 zero) %{
match(Set dst (ReplicateS zero));
predicate(n->as_Vector()->length() == 8);
format %{ "XXLXOR $dst, $zero \t// replicate8S" %}
size(4);
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl8S_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (ReplicateS src));
predicate(n->as_Vector()->length() == 8);
format %{ "XXLEQV $dst, $src \t// replicate16B" %}
size(4);
ins_encode %{
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl2I_reg_Ex(iRegLdst dst, iRegIsrc src) %{
match(Set dst (ReplicateI src));
predicate(n->as_Vector()->length() == 2);
@ -13386,6 +13856,46 @@ instruct repl2I_immIminus1(iRegLdst dst, immI_minus1 src) %{
ins_pipe(pipe_class_default);
%}
instruct repl4I_reg_Ex(vecX dst, iRegIsrc src) %{
match(Set dst (ReplicateI src));
predicate(n->as_Vector()->length() == 4);
ins_cost(2 * DEFAULT_COST);
expand %{
iRegLdst tmpL;
vecX tmpV;
immI8 zero %{ (int) 0 %}
moveReg(tmpL, src);
repl32(tmpL);
mtvsrd(tmpV, tmpL);
xxpermdi(dst, tmpV, tmpV, zero);
%}
%}
instruct repl4I_immI0(vecX dst, immI_0 zero) %{
match(Set dst (ReplicateI zero));
predicate(n->as_Vector()->length() == 4);
format %{ "XXLXOR $dst, $zero \t// replicate4I" %}
size(4);
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl4I_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (ReplicateI src));
predicate(n->as_Vector()->length() == 4);
format %{ "XXLEQV $dst, $dst, $dst \t// replicate4I" %}
size(4);
ins_encode %{
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
// Move float to int register via stack, replicate.
instruct repl2F_reg_Ex(iRegLdst dst, regF src) %{
match(Set dst (ReplicateF src));
@ -13484,6 +13994,154 @@ instruct overflowMulL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
%}
instruct repl4F_reg_Ex(vecX dst, regF src) %{
match(Set dst (ReplicateF src));
predicate(n->as_Vector()->length() == 4);
ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST);
expand %{
stackSlotL tmpS;
iRegIdst tmpI;
iRegLdst tmpL;
vecX tmpV;
immI8 zero %{ (int) 0 %}
moveF2I_reg_stack(tmpS, src); // Move float to stack.
moveF2I_stack_reg(tmpI, tmpS); // Move stack to int reg.
moveReg(tmpL, tmpI); // Move int to long reg.
repl32(tmpL); // Replicate bitpattern.
mtvsrd(tmpV, tmpL);
xxpermdi(dst, tmpV, tmpV, zero);
%}
%}
instruct repl4F_immF_Ex(vecX dst, immF src) %{
match(Set dst (ReplicateF src));
predicate(n->as_Vector()->length() == 4);
ins_cost(10 * DEFAULT_COST);
postalloc_expand( postalloc_expand_load_replF_constant_vsx(dst, src, constanttablebase) );
%}
instruct repl4F_immF0(vecX dst, immF_0 zero) %{
match(Set dst (ReplicateF zero));
predicate(n->as_Vector()->length() == 4);
format %{ "XXLXOR $dst, $zero \t// replicate4F" %}
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl2D_reg_Ex(vecX dst, regD src) %{
match(Set dst (ReplicateD src));
predicate(n->as_Vector()->length() == 2);
expand %{
stackSlotL tmpS;
iRegLdst tmpL;
iRegLdst tmp;
vecX tmpV;
immI8 zero %{ (int) 0 %}
moveD2L_reg_stack(tmpS, src);
moveD2L_stack_reg(tmpL, tmpS);
mtvsrd(tmpV, tmpL);
xxpermdi(dst, tmpV, tmpV, zero);
%}
%}
instruct repl2D_immI0(vecX dst, immI_0 zero) %{
match(Set dst (ReplicateD zero));
predicate(n->as_Vector()->length() == 2);
format %{ "XXLXOR $dst, $zero \t// replicate2D" %}
size(4);
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl2D_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (ReplicateD src));
predicate(n->as_Vector()->length() == 2);
format %{ "XXLEQV $dst, $src \t// replicate16B" %}
size(4);
ins_encode %{
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct mtvsrd(vecX dst, iRegLsrc src) %{
predicate(false);
effect(DEF dst, USE src);
format %{ "MTVSRD $dst, $src \t// Move to 16-byte register"%}
size(4);
ins_encode %{
__ mtvsrd($dst$$VectorSRegister, $src$$Register);
%}
ins_pipe(pipe_class_default);
%}
instruct xxspltd(vecX dst, vecX src, immI8 zero) %{
effect(DEF dst, USE src, USE zero);
format %{ "XXSPLATD $dst, $src, $zero \t// Permute 16-byte register"%}
size(4);
ins_encode %{
__ xxpermdi($dst$$VectorSRegister, $src$$VectorSRegister, $src$$VectorSRegister, $zero$$constant);
%}
ins_pipe(pipe_class_default);
%}
instruct xxpermdi(vecX dst, vecX src1, vecX src2, immI8 zero) %{
effect(DEF dst, USE src1, USE src2, USE zero);
format %{ "XXPERMDI $dst, $src1, $src2, $zero \t// Permute 16-byte register"%}
size(4);
ins_encode %{
__ xxpermdi($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister, $zero$$constant);
%}
ins_pipe(pipe_class_default);
%}
instruct repl2L_reg_Ex(vecX dst, iRegLsrc src) %{
match(Set dst (ReplicateL src));
predicate(n->as_Vector()->length() == 2);
expand %{
vecX tmpV;
immI8 zero %{ (int) 0 %}
mtvsrd(tmpV, src);
xxpermdi(dst, tmpV, tmpV, zero);
%}
%}
instruct repl2L_immI0(vecX dst, immI_0 zero) %{
match(Set dst (ReplicateL zero));
predicate(n->as_Vector()->length() == 2);
format %{ "XXLXOR $dst, $zero \t// replicate2L" %}
size(4);
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl2L_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (ReplicateL src));
predicate(n->as_Vector()->length() == 2);
format %{ "XXLEQV $dst, $src \t// replicate16B" %}
size(4);
ins_encode %{
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
// ============================================================================
// Safepoint Instruction

View File

@ -31,3 +31,5 @@
REGISTER_DEFINITION(Register, noreg);
REGISTER_DEFINITION(FloatRegister, fnoreg);
REGISTER_DEFINITION(VectorSRegister, vsnoreg);

View File

@ -677,7 +677,7 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
* 2 // register halves
+ ConditionRegisterImpl::number_of_registers // condition code registers
+ SpecialRegisterImpl::number_of_registers // special registers
+ VectorRegisterImpl::number_of_registers // VSX registers
+ VectorSRegisterImpl::number_of_registers // VSX registers
};
static const int max_gpr;

View File

@ -479,8 +479,8 @@ void RegisterSaver::restore_result_registers(MacroAssembler* masm, int frame_siz
// Is vector's size (in bytes) bigger than a size saved by default?
bool SharedRuntime::is_wide_vector(int size) {
// Note, MaxVectorSize == 8 on PPC64.
assert(size <= 8, "%d bytes vectors are not supported", size);
// Note, MaxVectorSize == 8/16 on PPC64.
assert(size <= (SuperwordUseVSX ? 16 : 8), "%d bytes vectors are not supported", size);
return size > 8;
}
@ -2234,9 +2234,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
__ release();
// TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
__ stw(R0, thread_(thread_state));
if (UseMembar) {
__ fence();
}
// The JNI call
@ -2393,9 +2390,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
__ release();
// TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
__ stw(R0, thread_(thread_state));
if (UseMembar) {
__ fence();
}
__ bind(after_transition);
// Reguard any pages if necessary.

View File

@ -2667,7 +2667,7 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
// Arguments for generated stub (little endian only):
// Arguments for generated stub:
// R3_ARG1 - source byte array address
// R4_ARG2 - destination byte array address
// R5_ARG3 - round key array
@ -2686,7 +2686,6 @@ class StubGenerator: public StubCodeGenerator {
Register keylen = R8;
Register temp = R9;
Register keypos = R10;
Register hex = R11;
Register fifteen = R12;
VectorRegister vRet = VR0;
@ -2706,60 +2705,58 @@ class StubGenerator: public StubCodeGenerator {
VectorRegister vTmp3 = VR11;
VectorRegister vTmp4 = VR12;
VectorRegister vLow = VR13;
VectorRegister vHigh = VR14;
__ li (hex, 16);
__ li (fifteen, 15);
__ vspltisb (fSplt, 0x0f);
// load unaligned from[0-15] to vsRet
__ lvx (vRet, from);
__ lvx (vTmp1, fifteen, from);
__ lvsl (fromPerm, from);
#ifdef VM_LITTLE_ENDIAN
__ vspltisb (fSplt, 0x0f);
__ vxor (fromPerm, fromPerm, fSplt);
#endif
__ vperm (vRet, vRet, vTmp1, fromPerm);
// load keylen (44 or 52 or 60)
__ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
// to load keys
__ lvsr (keyPerm, key);
__ vxor (vTmp2, vTmp2, vTmp2);
__ load_perm (keyPerm, key);
#ifdef VM_LITTLE_ENDIAN
__ vspltisb (vTmp2, -16);
__ vrld (keyPerm, keyPerm, vTmp2);
__ vrld (keyPerm, keyPerm, vTmp2);
__ vsldoi (keyPerm, keyPerm, keyPerm, 8);
#endif
// load the 1st round key to vKey1
__ li (keypos, 0);
// load the 1st round key to vTmp1
__ lvx (vTmp1, key);
__ li (keypos, 16);
__ lvx (vKey1, keypos, key);
__ addi (keypos, keypos, 16);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey1, vTmp1, vKey1, keyPerm);
__ vec_perm (vTmp1, vKey1, keyPerm);
// 1st round
__ vxor (vRet, vRet, vKey1);
__ vxor (vRet, vRet, vTmp1);
// load the 2nd round key to vKey1
__ addi (keypos, keypos, 16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey1, vTmp2, vTmp1, keyPerm);
__ li (keypos, 32);
__ lvx (vKey2, keypos, key);
__ vec_perm (vKey1, vKey2, keyPerm);
// load the 3rd round key to vKey2
__ addi (keypos, keypos, 16);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey2, vTmp1, vTmp2, keyPerm);
__ li (keypos, 48);
__ lvx (vKey3, keypos, key);
__ vec_perm (vKey2, vKey3, keyPerm);
// load the 4th round key to vKey3
__ addi (keypos, keypos, 16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey3, vTmp2, vTmp1, keyPerm);
__ li (keypos, 64);
__ lvx (vKey4, keypos, key);
__ vec_perm (vKey3, vKey4, keyPerm);
// load the 5th round key to vKey4
__ addi (keypos, keypos, 16);
__ li (keypos, 80);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey4, vTmp1, vTmp2, keyPerm);
__ vec_perm (vKey4, vTmp1, keyPerm);
// 2nd - 5th rounds
__ vcipher (vRet, vRet, vKey1);
@ -2768,24 +2765,24 @@ class StubGenerator: public StubCodeGenerator {
__ vcipher (vRet, vRet, vKey4);
// load the 6th round key to vKey1
__ addi (keypos, keypos, 16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey1, vTmp2, vTmp1, keyPerm);
__ li (keypos, 96);
__ lvx (vKey2, keypos, key);
__ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
// load the 7th round key to vKey2
__ addi (keypos, keypos, 16);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey2, vTmp1, vTmp2, keyPerm);
__ li (keypos, 112);
__ lvx (vKey3, keypos, key);
__ vec_perm (vKey2, vKey3, keyPerm);
// load the 8th round key to vKey3
__ addi (keypos, keypos, 16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey3, vTmp2, vTmp1, keyPerm);
__ li (keypos, 128);
__ lvx (vKey4, keypos, key);
__ vec_perm (vKey3, vKey4, keyPerm);
// load the 9th round key to vKey4
__ addi (keypos, keypos, 16);
__ li (keypos, 144);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey4, vTmp1, vTmp2, keyPerm);
__ vec_perm (vKey4, vTmp1, keyPerm);
// 6th - 9th rounds
__ vcipher (vRet, vRet, vKey1);
@ -2794,14 +2791,14 @@ class StubGenerator: public StubCodeGenerator {
__ vcipher (vRet, vRet, vKey4);
// load the 10th round key to vKey1
__ addi (keypos, keypos, 16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey1, vTmp2, vTmp1, keyPerm);
__ li (keypos, 160);
__ lvx (vKey2, keypos, key);
__ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
// load the 11th round key to vKey2
__ addi (keypos, keypos, 16);
__ li (keypos, 176);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey2, vTmp1, vTmp2, keyPerm);
__ vec_perm (vKey2, vTmp1, keyPerm);
// if all round keys are loaded, skip next 4 rounds
__ cmpwi (CCR0, keylen, 44);
@ -2812,14 +2809,14 @@ class StubGenerator: public StubCodeGenerator {
__ vcipher (vRet, vRet, vKey2);
// load the 12th round key to vKey1
__ addi (keypos, keypos, 16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey1, vTmp2, vTmp1, keyPerm);
__ li (keypos, 192);
__ lvx (vKey2, keypos, key);
__ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
// load the 13th round key to vKey2
__ addi (keypos, keypos, 16);
__ li (keypos, 208);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey2, vTmp1, vTmp2, keyPerm);
__ vec_perm (vKey2, vTmp1, keyPerm);
// if all round keys are loaded, skip next 2 rounds
__ cmpwi (CCR0, keylen, 52);
@ -2830,14 +2827,14 @@ class StubGenerator: public StubCodeGenerator {
__ vcipher (vRet, vRet, vKey2);
// load the 14th round key to vKey1
__ addi (keypos, keypos, 16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey1, vTmp2, vTmp1, keyPerm);
__ li (keypos, 224);
__ lvx (vKey2, keypos, key);
__ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
// load the 15th round key to vKey2
__ addi (keypos, keypos, 16);
__ li (keypos, 240);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey2, vTmp1, vTmp2, keyPerm);
__ vec_perm (vKey2, vTmp1, keyPerm);
__ bind(L_doLast);
@ -2845,25 +2842,33 @@ class StubGenerator: public StubCodeGenerator {
__ vcipher (vRet, vRet, vKey1);
__ vcipherlast (vRet, vRet, vKey2);
__ neg (temp, to);
__ lvsr (toPerm, temp);
__ vspltisb (vTmp2, -1);
__ vxor (vTmp1, vTmp1, vTmp1);
__ vperm (vTmp2, vTmp2, vTmp1, toPerm);
__ vxor (toPerm, toPerm, fSplt);
// store result (unaligned)
#ifdef VM_LITTLE_ENDIAN
__ lvsl (toPerm, to);
#else
__ lvsr (toPerm, to);
#endif
__ vspltisb (vTmp3, -1);
__ vspltisb (vTmp4, 0);
__ lvx (vTmp1, to);
__ vperm (vRet, vRet, vRet, toPerm);
__ vsel (vTmp1, vTmp1, vRet, vTmp2);
__ lvx (vTmp4, fifteen, to);
__ lvx (vTmp2, fifteen, to);
#ifdef VM_LITTLE_ENDIAN
__ vperm (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask
__ vxor (toPerm, toPerm, fSplt); // swap bytes
#else
__ vperm (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask
#endif
__ vperm (vTmp4, vRet, vRet, toPerm); // rotate data
__ vsel (vTmp2, vTmp4, vTmp2, vTmp3);
__ vsel (vTmp1, vTmp1, vTmp4, vTmp3);
__ stvx (vTmp2, fifteen, to); // store this one first (may alias)
__ stvx (vTmp1, to);
__ vsel (vRet, vRet, vTmp4, vTmp2);
__ stvx (vRet, fifteen, to);
__ blr();
return start;
}
// Arguments for generated stub (little endian only):
// Arguments for generated stub:
// R3_ARG1 - source byte array address
// R4_ARG2 - destination byte array address
// R5_ARG3 - K (key) in little endian int array
@ -2885,7 +2890,6 @@ class StubGenerator: public StubCodeGenerator {
Register keylen = R8;
Register temp = R9;
Register keypos = R10;
Register hex = R11;
Register fifteen = R12;
VectorRegister vRet = VR0;
@ -2906,30 +2910,30 @@ class StubGenerator: public StubCodeGenerator {
VectorRegister vTmp3 = VR12;
VectorRegister vTmp4 = VR13;
VectorRegister vLow = VR14;
VectorRegister vHigh = VR15;
__ li (hex, 16);
__ li (fifteen, 15);
__ vspltisb (fSplt, 0x0f);
// load unaligned from[0-15] to vsRet
__ lvx (vRet, from);
__ lvx (vTmp1, fifteen, from);
__ lvsl (fromPerm, from);
#ifdef VM_LITTLE_ENDIAN
__ vspltisb (fSplt, 0x0f);
__ vxor (fromPerm, fromPerm, fSplt);
#endif
__ vperm (vRet, vRet, vTmp1, fromPerm); // align [and byte swap in LE]
// load keylen (44 or 52 or 60)
__ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
// to load keys
__ lvsr (keyPerm, key);
__ load_perm (keyPerm, key);
#ifdef VM_LITTLE_ENDIAN
__ vxor (vTmp2, vTmp2, vTmp2);
__ vspltisb (vTmp2, -16);
__ vrld (keyPerm, keyPerm, vTmp2);
__ vrld (keyPerm, keyPerm, vTmp2);
__ vsldoi (keyPerm, keyPerm, keyPerm, 8);
#endif
__ cmpwi (CCR0, keylen, 44);
__ beq (CCR0, L_do44);
@ -2937,32 +2941,32 @@ class StubGenerator: public StubCodeGenerator {
__ cmpwi (CCR0, keylen, 52);
__ beq (CCR0, L_do52);
// load the 15th round key to vKey11
// load the 15th round key to vKey1
__ li (keypos, 240);
__ lvx (vKey1, keypos, key);
__ li (keypos, 224);
__ lvx (vKey2, keypos, key);
__ vec_perm (vKey1, vKey2, vKey1, keyPerm);
// load the 14th round key to vKey2
__ li (keypos, 208);
__ lvx (vKey3, keypos, key);
__ vec_perm (vKey2, vKey3, vKey2, keyPerm);
// load the 13th round key to vKey3
__ li (keypos, 192);
__ lvx (vKey4, keypos, key);
__ vec_perm (vKey3, vKey4, vKey3, keyPerm);
// load the 12th round key to vKey4
__ li (keypos, 176);
__ lvx (vKey5, keypos, key);
__ vec_perm (vKey4, vKey5, vKey4, keyPerm);
// load the 11th round key to vKey5
__ li (keypos, 160);
__ lvx (vTmp1, keypos, key);
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey1, vTmp1, vTmp2, keyPerm);
// load the 14th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey2, vTmp2, vTmp1, keyPerm);
// load the 13th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey3, vTmp1, vTmp2, keyPerm);
// load the 12th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey4, vTmp2, vTmp1, keyPerm);
// load the 11th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey5, vTmp1, vTmp2, keyPerm);
__ vec_perm (vKey5, vTmp1, vKey5, keyPerm);
// 1st - 5th rounds
__ vxor (vRet, vRet, vKey1);
@ -2975,22 +2979,22 @@ class StubGenerator: public StubCodeGenerator {
__ bind (L_do52);
// load the 13th round key to vKey11
// load the 13th round key to vKey1
__ li (keypos, 208);
__ lvx (vTmp1, keypos, key);
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey1, vTmp1, vTmp2, keyPerm);
__ lvx (vKey1, keypos, key);
__ li (keypos, 192);
__ lvx (vKey2, keypos, key);
__ vec_perm (vKey1, vKey2, vKey1, keyPerm);
// load the 12th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey2, vTmp2, vTmp1, keyPerm);
// load the 12th round key to vKey2
__ li (keypos, 176);
__ lvx (vKey3, keypos, key);
__ vec_perm (vKey2, vKey3, vKey2, keyPerm);
// load the 11th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey3, vTmp1, vTmp2, keyPerm);
// load the 11th round key to vKey3
__ li (keypos, 160);
__ lvx (vTmp1, keypos, key);
__ vec_perm (vKey3, vTmp1, vKey3, keyPerm);
// 1st - 3rd rounds
__ vxor (vRet, vRet, vKey1);
@ -3001,42 +3005,42 @@ class StubGenerator: public StubCodeGenerator {
__ bind (L_do44);
// load the 11th round key to vKey11
// load the 11th round key to vKey1
__ li (keypos, 176);
__ lvx (vKey1, keypos, key);
__ li (keypos, 160);
__ lvx (vTmp1, keypos, key);
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey1, vTmp1, vTmp2, keyPerm);
__ vec_perm (vKey1, vTmp1, vKey1, keyPerm);
// 1st round
__ vxor (vRet, vRet, vKey1);
__ bind (L_doLast);
// load the 10th round key to vKey10
__ addi (keypos, keypos, -16);
// load the 10th round key to vKey1
__ li (keypos, 144);
__ lvx (vKey2, keypos, key);
__ vec_perm (vKey1, vKey2, vTmp1, keyPerm);
// load the 9th round key to vKey2
__ li (keypos, 128);
__ lvx (vKey3, keypos, key);
__ vec_perm (vKey2, vKey3, vKey2, keyPerm);
// load the 8th round key to vKey3
__ li (keypos, 112);
__ lvx (vKey4, keypos, key);
__ vec_perm (vKey3, vKey4, vKey3, keyPerm);
// load the 7th round key to vKey4
__ li (keypos, 96);
__ lvx (vKey5, keypos, key);
__ vec_perm (vKey4, vKey5, vKey4, keyPerm);
// load the 6th round key to vKey5
__ li (keypos, 80);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey1, vTmp2, vTmp1, keyPerm);
// load the 9th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey2, vTmp1, vTmp2, keyPerm);
// load the 8th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey3, vTmp2, vTmp1, keyPerm);
// load the 7th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey4, vTmp1, vTmp2, keyPerm);
// load the 6th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey5, vTmp2, vTmp1, keyPerm);
__ vec_perm (vKey5, vTmp1, vKey5, keyPerm);
// last 10th - 6th rounds
__ vncipher (vRet, vRet, vKey1);
@ -3045,30 +3049,29 @@ class StubGenerator: public StubCodeGenerator {
__ vncipher (vRet, vRet, vKey4);
__ vncipher (vRet, vRet, vKey5);
// load the 5th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey1, vTmp1, vTmp2, keyPerm);
// load the 5th round key to vKey1
__ li (keypos, 64);
__ lvx (vKey2, keypos, key);
__ vec_perm (vKey1, vKey2, vTmp1, keyPerm);
// load the 4th round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey2, vTmp2, vTmp1, keyPerm);
// load the 4th round key to vKey2
__ li (keypos, 48);
__ lvx (vKey3, keypos, key);
__ vec_perm (vKey2, vKey3, vKey2, keyPerm);
// load the 3rd round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey3, vTmp1, vTmp2, keyPerm);
// load the 3rd round key to vKey3
__ li (keypos, 32);
__ lvx (vKey4, keypos, key);
__ vec_perm (vKey3, vKey4, vKey3, keyPerm);
// load the 2nd round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp1, keypos, key);
__ vperm (vKey4, vTmp2, vTmp1, keyPerm);
// load the 2nd round key to vKey4
__ li (keypos, 16);
__ lvx (vKey5, keypos, key);
__ vec_perm (vKey4, vKey5, vKey4, keyPerm);
// load the 1st round key to vKey10
__ addi (keypos, keypos, -16);
__ lvx (vTmp2, keypos, key);
__ vperm (vKey5, vTmp1, vTmp2, keyPerm);
// load the 1st round key to vKey5
__ lvx (vTmp1, key);
__ vec_perm (vKey5, vTmp1, vKey5, keyPerm);
// last 5th - 1th rounds
__ vncipher (vRet, vRet, vKey1);
@ -3077,19 +3080,49 @@ class StubGenerator: public StubCodeGenerator {
__ vncipher (vRet, vRet, vKey4);
__ vncipherlast (vRet, vRet, vKey5);
__ neg (temp, to);
__ lvsr (toPerm, temp);
__ vspltisb (vTmp2, -1);
__ vxor (vTmp1, vTmp1, vTmp1);
__ vperm (vTmp2, vTmp2, vTmp1, toPerm);
__ vxor (toPerm, toPerm, fSplt);
// store result (unaligned)
#ifdef VM_LITTLE_ENDIAN
__ lvsl (toPerm, to);
#else
__ lvsr (toPerm, to);
#endif
__ vspltisb (vTmp3, -1);
__ vspltisb (vTmp4, 0);
__ lvx (vTmp1, to);
__ vperm (vRet, vRet, vRet, toPerm);
__ vsel (vTmp1, vTmp1, vRet, vTmp2);
__ lvx (vTmp4, fifteen, to);
__ lvx (vTmp2, fifteen, to);
#ifdef VM_LITTLE_ENDIAN
__ vperm (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask
__ vxor (toPerm, toPerm, fSplt); // swap bytes
#else
__ vperm (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask
#endif
__ vperm (vTmp4, vRet, vRet, toPerm); // rotate data
__ vsel (vTmp2, vTmp4, vTmp2, vTmp3);
__ vsel (vTmp1, vTmp1, vTmp4, vTmp3);
__ stvx (vTmp2, fifteen, to); // store this one first (may alias)
__ stvx (vTmp1, to);
__ vsel (vRet, vRet, vTmp4, vTmp2);
__ stvx (vRet, fifteen, to);
__ blr();
return start;
}
address generate_sha256_implCompress(bool multi_block, const char *name) {
assert(UseSHA, "need SHA instructions");
StubCodeMark mark(this, "StubRoutines", name);
address start = __ function_entry();
__ sha256 (multi_block);
__ blr();
return start;
}
address generate_sha512_implCompress(bool multi_block, const char *name) {
assert(UseSHA, "need SHA instructions");
StubCodeMark mark(this, "StubRoutines", name);
address start = __ function_entry();
__ sha512 (multi_block);
__ blr();
return start;
@ -3306,6 +3339,267 @@ class StubGenerator: public StubCodeGenerator {
BLOCK_COMMENT("} Stub body");
}
/**
* Arguments:
*
* Input:
* R3_ARG1 - out address
* R4_ARG2 - in address
* R5_ARG3 - offset
* R6_ARG4 - len
* R7_ARG5 - k
* Output:
* R3_RET - carry
*/
address generate_mulAdd() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "mulAdd");
address start = __ function_entry();
// C2 does not sign extend signed parameters to full 64 bits registers:
__ rldic (R5_ARG3, R5_ARG3, 2, 32); // always positive
__ clrldi(R6_ARG4, R6_ARG4, 32); // force zero bits on higher word
__ clrldi(R7_ARG5, R7_ARG5, 32); // force zero bits on higher word
__ muladd(R3_ARG1, R4_ARG2, R5_ARG3, R6_ARG4, R7_ARG5, R8, R9, R10);
// Moves output carry to return register
__ mr (R3_RET, R10);
__ blr();
return start;
}
/**
* Arguments:
*
* Input:
* R3_ARG1 - in address
* R4_ARG2 - in length
* R5_ARG3 - out address
* R6_ARG4 - out length
*/
address generate_squareToLen() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "squareToLen");
address start = __ function_entry();
// args - higher word is cleaned (unsignedly) due to int to long casting
const Register in = R3_ARG1;
const Register in_len = R4_ARG2;
__ clrldi(in_len, in_len, 32);
const Register out = R5_ARG3;
const Register out_len = R6_ARG4;
__ clrldi(out_len, out_len, 32);
// output
const Register ret = R3_RET;
// temporaries
const Register lplw_s = R7;
const Register in_aux = R8;
const Register out_aux = R9;
const Register piece = R10;
const Register product = R14;
const Register lplw = R15;
const Register i_minus1 = R16;
const Register carry = R17;
const Register offset = R18;
const Register off_aux = R19;
const Register t = R20;
const Register mlen = R21;
const Register len = R22;
const Register a = R23;
const Register b = R24;
const Register i = R25;
const Register c = R26;
const Register cs = R27;
// Labels
Label SKIP_LSHIFT, SKIP_DIAGONAL_SUM, SKIP_ADDONE, SKIP_MULADD, SKIP_LOOP_SQUARE;
Label LOOP_LSHIFT, LOOP_DIAGONAL_SUM, LOOP_ADDONE, LOOP_MULADD, LOOP_SQUARE;
// Save non-volatile regs (frameless).
int current_offs = -8;
__ std(R28, current_offs, R1_SP); current_offs -= 8;
__ std(R27, current_offs, R1_SP); current_offs -= 8;
__ std(R26, current_offs, R1_SP); current_offs -= 8;
__ std(R25, current_offs, R1_SP); current_offs -= 8;
__ std(R24, current_offs, R1_SP); current_offs -= 8;
__ std(R23, current_offs, R1_SP); current_offs -= 8;
__ std(R22, current_offs, R1_SP); current_offs -= 8;
__ std(R21, current_offs, R1_SP); current_offs -= 8;
__ std(R20, current_offs, R1_SP); current_offs -= 8;
__ std(R19, current_offs, R1_SP); current_offs -= 8;
__ std(R18, current_offs, R1_SP); current_offs -= 8;
__ std(R17, current_offs, R1_SP); current_offs -= 8;
__ std(R16, current_offs, R1_SP); current_offs -= 8;
__ std(R15, current_offs, R1_SP); current_offs -= 8;
__ std(R14, current_offs, R1_SP);
// Store the squares, right shifted one bit (i.e., divided by 2)
__ subi (out_aux, out, 8);
__ subi (in_aux, in, 4);
__ cmpwi (CCR0, in_len, 0);
// Initialize lplw outside of the loop
__ xorr (lplw, lplw, lplw);
__ ble (CCR0, SKIP_LOOP_SQUARE); // in_len <= 0
__ mtctr (in_len);
__ bind(LOOP_SQUARE);
__ lwzu (piece, 4, in_aux);
__ mulld (product, piece, piece);
// shift left 63 bits and only keep the MSB
__ rldic (lplw_s, lplw, 63, 0);
__ mr (lplw, product);
// shift right 1 bit without sign extension
__ srdi (product, product, 1);
// join them to the same register and store it
__ orr (product, lplw_s, product);
#ifdef VM_LITTLE_ENDIAN
// Swap low and high words for little endian
__ rldicl (product, product, 32, 0);
#endif
__ stdu (product, 8, out_aux);
__ bdnz (LOOP_SQUARE);
__ bind(SKIP_LOOP_SQUARE);
// Add in off-diagonal sums
__ cmpwi (CCR0, in_len, 0);
__ ble (CCR0, SKIP_DIAGONAL_SUM);
// Avoid CTR usage here in order to use it at mulAdd
__ subi (i_minus1, in_len, 1);
__ li (offset, 4);
__ bind(LOOP_DIAGONAL_SUM);
__ sldi (off_aux, out_len, 2);
__ sub (off_aux, off_aux, offset);
__ mr (len, i_minus1);
__ sldi (mlen, i_minus1, 2);
__ lwzx (t, in, mlen);
__ muladd (out, in, off_aux, len, t, a, b, carry);
// begin<addOne>
// off_aux = out_len*4 - 4 - mlen - offset*4 - 4;
__ addi (mlen, mlen, 4);
__ sldi (a, out_len, 2);
__ subi (a, a, 4);
__ sub (a, a, mlen);
__ subi (off_aux, offset, 4);
__ sub (off_aux, a, off_aux);
__ lwzx (b, off_aux, out);
__ add (b, b, carry);
__ stwx (b, off_aux, out);
// if (((uint64_t)s >> 32) != 0) {
__ srdi_ (a, b, 32);
__ beq (CCR0, SKIP_ADDONE);
// while (--mlen >= 0) {
__ bind(LOOP_ADDONE);
__ subi (mlen, mlen, 4);
__ cmpwi (CCR0, mlen, 0);
__ beq (CCR0, SKIP_ADDONE);
// if (--offset_aux < 0) { // Carry out of number
__ subi (off_aux, off_aux, 4);
__ cmpwi (CCR0, off_aux, 0);
__ blt (CCR0, SKIP_ADDONE);
// } else {
__ lwzx (b, off_aux, out);
__ addi (b, b, 1);
__ stwx (b, off_aux, out);
__ cmpwi (CCR0, b, 0);
__ bne (CCR0, SKIP_ADDONE);
__ b (LOOP_ADDONE);
__ bind(SKIP_ADDONE);
// } } } end<addOne>
__ addi (offset, offset, 8);
__ subi (i_minus1, i_minus1, 1);
__ cmpwi (CCR0, i_minus1, 0);
__ bge (CCR0, LOOP_DIAGONAL_SUM);
__ bind(SKIP_DIAGONAL_SUM);
// Shift back up and set low bit
// Shifts 1 bit left up to len positions. Assumes no leading zeros
// begin<primitiveLeftShift>
__ cmpwi (CCR0, out_len, 0);
__ ble (CCR0, SKIP_LSHIFT);
__ li (i, 0);
__ lwz (c, 0, out);
__ subi (b, out_len, 1);
__ mtctr (b);
__ bind(LOOP_LSHIFT);
__ mr (b, c);
__ addi (cs, i, 4);
__ lwzx (c, out, cs);
__ sldi (b, b, 1);
__ srwi (cs, c, 31);
__ orr (b, b, cs);
__ stwx (b, i, out);
__ addi (i, i, 4);
__ bdnz (LOOP_LSHIFT);
__ sldi (c, out_len, 2);
__ subi (c, c, 4);
__ lwzx (b, out, c);
__ sldi (b, b, 1);
__ stwx (b, out, c);
__ bind(SKIP_LSHIFT);
// end<primitiveLeftShift>
// Set low bit
__ sldi (i, in_len, 2);
__ subi (i, i, 4);
__ lwzx (i, in, i);
__ sldi (c, out_len, 2);
__ subi (c, c, 4);
__ lwzx (b, out, c);
__ andi (i, i, 1);
__ orr (i, b, i);
__ stwx (i, out, c);
// Restore non-volatile regs.
current_offs = -8;
__ ld(R28, current_offs, R1_SP); current_offs -= 8;
__ ld(R27, current_offs, R1_SP); current_offs -= 8;
__ ld(R26, current_offs, R1_SP); current_offs -= 8;
__ ld(R25, current_offs, R1_SP); current_offs -= 8;
__ ld(R24, current_offs, R1_SP); current_offs -= 8;
__ ld(R23, current_offs, R1_SP); current_offs -= 8;
__ ld(R22, current_offs, R1_SP); current_offs -= 8;
__ ld(R21, current_offs, R1_SP); current_offs -= 8;
__ ld(R20, current_offs, R1_SP); current_offs -= 8;
__ ld(R19, current_offs, R1_SP); current_offs -= 8;
__ ld(R18, current_offs, R1_SP); current_offs -= 8;
__ ld(R17, current_offs, R1_SP); current_offs -= 8;
__ ld(R16, current_offs, R1_SP); current_offs -= 8;
__ ld(R15, current_offs, R1_SP); current_offs -= 8;
__ ld(R14, current_offs, R1_SP);
__ mr(ret, out);
__ blr();
return start;
}
/**
* Arguments:
@ -3500,6 +3794,12 @@ class StubGenerator: public StubCodeGenerator {
}
#endif
if (UseSquareToLenIntrinsic) {
StubRoutines::_squareToLen = generate_squareToLen();
}
if (UseMulAddIntrinsic) {
StubRoutines::_mulAdd = generate_mulAdd();
}
if (UseMontgomeryMultiplyIntrinsic) {
StubRoutines::_montgomeryMultiply
= CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
@ -3514,6 +3814,14 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
}
if (UseSHA256Intrinsics) {
StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
}
if (UseSHA512Intrinsics) {
StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress");
StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
}
}
public:

View File

@ -34,7 +34,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _call_
enum platform_dependent_constants {
code_size1 = 20000, // simply increase if too small (assembler will crash if too small)
code_size2 = 20000 // simply increase if too small (assembler will crash if too small)
code_size2 = 24000 // simply increase if too small (assembler will crash if too small)
};
// CRC32 Intrinsics.

View File

@ -1470,10 +1470,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
// TODO PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
__ stw(R0, thread_(thread_state));
if (UseMembar) {
__ fence();
}
//=============================================================================
// Call the native method. Argument registers must not have been
// overwritten since "__ call_stub(signature_handler);" (except for
@ -1594,9 +1590,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
__ li(R0/*thread_state*/, _thread_in_Java);
__ release();
__ stw(R0/*thread_state*/, thread_(thread_state));
if (UseMembar) {
__ fence();
}
if (CheckJNICalls) {
// clear_pending_jni_exception_check

View File

@ -2224,6 +2224,7 @@ void TemplateTable::load_field_cp_cache_entry(Register Robj,
if (is_static) {
__ ld(Robj, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::f1_offset()), Rcache);
__ ld(Robj, in_bytes(Klass::java_mirror_offset()), Robj);
__ resolve_oop_handle(Robj);
// Acquire not needed here. Following access has an address dependency on this value.
}
}

View File

@ -107,13 +107,23 @@ void VM_Version::initialize() {
// TODO: PPC port PdScheduling::power6SectorSize = 0x20;
}
MaxVectorSize = 8;
if (PowerArchitecturePPC64 >= 8) {
if (FLAG_IS_DEFAULT(SuperwordUseVSX)) {
FLAG_SET_ERGO(bool, SuperwordUseVSX, true);
}
} else {
if (SuperwordUseVSX) {
warning("SuperwordUseVSX specified, but needs at least Power8.");
FLAG_SET_DEFAULT(SuperwordUseVSX, false);
}
}
MaxVectorSize = SuperwordUseVSX ? 16 : 8;
#endif
// Create and print feature-string.
char buf[(num_features+1) * 16]; // Max 16 chars per feature.
jio_snprintf(buf, sizeof(buf),
"ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
"ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
(has_fsqrt() ? " fsqrt" : ""),
(has_isel() ? " isel" : ""),
(has_lxarxeh() ? " lxarxeh" : ""),
@ -130,7 +140,8 @@ void VM_Version::initialize() {
(has_mfdscr() ? " mfdscr" : ""),
(has_vsx() ? " vsx" : ""),
(has_ldbrx() ? " ldbrx" : ""),
(has_stdbrx() ? " stdbrx" : "")
(has_stdbrx() ? " stdbrx" : ""),
(has_vshasig() ? " sha" : "")
// Make sure number of %s matches num_features!
);
_features_string = os::strdup(buf);
@ -138,8 +149,7 @@ void VM_Version::initialize() {
print_features();
}
// PPC64 supports 8-byte compare-exchange operations (see
// Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr)
// PPC64 supports 8-byte compare-exchange operations (see Atomic::cmpxchg)
// and 'atomic long memory ops' (see Unsafe_GetLongVolatile).
_supports_cx8 = true;
@ -200,7 +210,6 @@ void VM_Version::initialize() {
}
// The AES intrinsic stubs require AES instruction support.
#if defined(VM_LITTLE_ENDIAN)
if (has_vcipher()) {
if (FLAG_IS_DEFAULT(UseAES)) {
UseAES = true;
@ -221,18 +230,6 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
}
#else
if (UseAES) {
warning("AES instructions are not available on this CPU");
FLAG_SET_DEFAULT(UseAES, false);
}
if (UseAESIntrinsics) {
if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
warning("AES intrinsics are not available on this CPU");
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
}
#endif
if (UseAESCTRIntrinsics) {
warning("AES/CTR intrinsics are not available on this CPU");
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
@ -247,17 +244,49 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseFMA, true);
}
if (UseSHA) {
if (has_vshasig()) {
if (FLAG_IS_DEFAULT(UseSHA)) {
UseSHA = true;
}
} else if (UseSHA) {
if (!FLAG_IS_DEFAULT(UseSHA))
warning("SHA instructions are not available on this CPU");
FLAG_SET_DEFAULT(UseSHA, false);
}
if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
warning("SHA intrinsics are not available on this CPU");
if (UseSHA1Intrinsics) {
warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
}
if (UseSHA && has_vshasig()) {
if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
}
} else if (UseSHA256Intrinsics) {
warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
}
if (UseSHA && has_vshasig()) {
if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
}
} else if (UseSHA512Intrinsics) {
warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
}
if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA, false);
}
if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
UseSquareToLenIntrinsic = true;
}
if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
UseMulAddIntrinsic = true;
}
if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
UseMultiplyToLenIntrinsic = true;
}
@ -657,6 +686,7 @@ void VM_Version::determine_features() {
a->lxvd2x(VSR0, R3_ARG1); // code[14] -> vsx
a->ldbrx(R7, R3_ARG1, R4_ARG2); // code[15] -> ldbrx
a->stdbrx(R7, R3_ARG1, R4_ARG2); // code[16] -> stdbrx
a->vshasigmaw(VR0, VR1, 1, 0xF); // code[17] -> vshasig
a->blr();
// Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
@ -708,6 +738,7 @@ void VM_Version::determine_features() {
if (code[feature_cntr++]) features |= vsx_m;
if (code[feature_cntr++]) features |= ldbrx_m;
if (code[feature_cntr++]) features |= stdbrx_m;
if (code[feature_cntr++]) features |= vshasig_m;
// Print the detection code.
if (PrintAssembly) {

View File

@ -49,6 +49,7 @@ protected:
vsx,
ldbrx,
stdbrx,
vshasig,
num_features // last entry to count features
};
enum Feature_Flag_Set {
@ -64,6 +65,7 @@ protected:
vand_m = (1 << vand ),
lqarx_m = (1 << lqarx ),
vcipher_m = (1 << vcipher),
vshasig_m = (1 << vshasig),
vpmsumb_m = (1 << vpmsumb),
tcheck_m = (1 << tcheck ),
mfdscr_m = (1 << mfdscr ),
@ -106,6 +108,7 @@ public:
static bool has_vsx() { return (_features & vsx_m) != 0; }
static bool has_ldbrx() { return (_features & ldbrx_m) != 0; }
static bool has_stdbrx() { return (_features & stdbrx_m) != 0; }
static bool has_vshasig() { return (_features & vshasig_m) != 0; }
static bool has_mtfprd() { return has_vpmsumb(); } // alias for P8
// Assembler testing

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -250,7 +250,6 @@ class Address VALUE_OBJ_CLASS_SPEC {
bool is_RSform() { return has_base() && !has_index() && is_disp12(); }
bool is_RSYform() { return has_base() && !has_index() && is_disp20(); }
bool is_RXform() { return has_base() && has_index() && is_disp12(); }
bool is_RXEform() { return has_base() && has_index() && is_disp12(); }
bool is_RXYform() { return has_base() && has_index() && is_disp20(); }
bool uses(Register r) { return _base == r || _index == r; };
@ -1093,7 +1092,201 @@ class Assembler : public AbstractAssembler {
#define TRTT_ZOPC (unsigned int)(0xb9 << 24 | 0x90 << 16)
// Miscellaneous Operations
//---------------------------
//-- Vector Instructions --
//---------------------------
//---< Vector Support Instructions >---
//--- Load (memory) ---
#define VLM_ZOPC (unsigned long)(0xe7L << 40 | 0x36L << 0) // load full vreg range (n * 128 bit)
#define VL_ZOPC (unsigned long)(0xe7L << 40 | 0x06L << 0) // load full vreg (128 bit)
#define VLEB_ZOPC (unsigned long)(0xe7L << 40 | 0x00L << 0) // load vreg element (8 bit)
#define VLEH_ZOPC (unsigned long)(0xe7L << 40 | 0x01L << 0) // load vreg element (16 bit)
#define VLEF_ZOPC (unsigned long)(0xe7L << 40 | 0x03L << 0) // load vreg element (32 bit)
#define VLEG_ZOPC (unsigned long)(0xe7L << 40 | 0x02L << 0) // load vreg element (64 bit)
#define VLREP_ZOPC (unsigned long)(0xe7L << 40 | 0x05L << 0) // load and replicate into all vector elements
#define VLLEZ_ZOPC (unsigned long)(0xe7L << 40 | 0x04L << 0) // load logical element and zero.
// vector register gather
#define VGEF_ZOPC (unsigned long)(0xe7L << 40 | 0x13L << 0) // gather element (32 bit), V1(M3) = [D2(V2(M3),B2)]
#define VGEG_ZOPC (unsigned long)(0xe7L << 40 | 0x12L << 0) // gather element (64 bit), V1(M3) = [D2(V2(M3),B2)]
// vector register scatter
#define VSCEF_ZOPC (unsigned long)(0xe7L << 40 | 0x1bL << 0) // vector scatter element FW
#define VSCEG_ZOPC (unsigned long)(0xe7L << 40 | 0x1aL << 0) // vector scatter element DW
#define VLBB_ZOPC (unsigned long)(0xe7L << 40 | 0x07L << 0) // load vreg to block boundary (load to alignment).
#define VLL_ZOPC (unsigned long)(0xe7L << 40 | 0x37L << 0) // load vreg with length.
//--- Load (register) ---
#define VLR_ZOPC (unsigned long)(0xe7L << 40 | 0x56L << 0) // copy full vreg (128 bit)
#define VLGV_ZOPC (unsigned long)(0xe7L << 40 | 0x21L << 0) // copy vreg element -> GR
#define VLVG_ZOPC (unsigned long)(0xe7L << 40 | 0x22L << 0) // copy GR -> vreg element
#define VLVGP_ZOPC (unsigned long)(0xe7L << 40 | 0x62L << 0) // copy GR2, GR3 (disjoint pair) -> vreg
// vector register pack: cut in half the size the source vector elements
#define VPK_ZOPC (unsigned long)(0xe7L << 40 | 0x94L << 0) // just cut
#define VPKS_ZOPC (unsigned long)(0xe7L << 40 | 0x97L << 0) // saturate as signed values
#define VPKLS_ZOPC (unsigned long)(0xe7L << 40 | 0x95L << 0) // saturate as unsigned values
// vector register unpack: double in size the source vector elements
#define VUPH_ZOPC (unsigned long)(0xe7L << 40 | 0xd7L << 0) // signed, left half of the source vector elements
#define VUPLH_ZOPC (unsigned long)(0xe7L << 40 | 0xd5L << 0) // unsigned, left half of the source vector elements
#define VUPL_ZOPC (unsigned long)(0xe7L << 40 | 0xd6L << 0) // signed, right half of the source vector elements
#define VUPLL_ZOPC (unsigned long)(0xe7L << 40 | 0xd4L << 0) // unsigned, right half of the source vector element
// vector register merge
#define VMRH_ZOPC (unsigned long)(0xe7L << 40 | 0x61L << 0) // register merge high (left half of source registers)
#define VMRL_ZOPC (unsigned long)(0xe7L << 40 | 0x60L << 0) // register merge low (right half of source registers)
// vector register permute
#define VPERM_ZOPC (unsigned long)(0xe7L << 40 | 0x8cL << 0) // vector permute
#define VPDI_ZOPC (unsigned long)(0xe7L << 40 | 0x84L << 0) // vector permute DW immediate
// vector register replicate
#define VREP_ZOPC (unsigned long)(0xe7L << 40 | 0x4dL << 0) // vector replicate
#define VREPI_ZOPC (unsigned long)(0xe7L << 40 | 0x45L << 0) // vector replicate immediate
#define VSEL_ZOPC (unsigned long)(0xe7L << 40 | 0x8dL << 0) // vector select
#define VSEG_ZOPC (unsigned long)(0xe7L << 40 | 0x5fL << 0) // vector sign-extend to DW (rightmost element in each DW).
//--- Load (immediate) ---
#define VLEIB_ZOPC (unsigned long)(0xe7L << 40 | 0x40L << 0) // load vreg element (16 bit imm to 8 bit)
#define VLEIH_ZOPC (unsigned long)(0xe7L << 40 | 0x41L << 0) // load vreg element (16 bit imm to 16 bit)
#define VLEIF_ZOPC (unsigned long)(0xe7L << 40 | 0x43L << 0) // load vreg element (16 bit imm to 32 bit)
#define VLEIG_ZOPC (unsigned long)(0xe7L << 40 | 0x42L << 0) // load vreg element (16 bit imm to 64 bit)
//--- Store ---
#define VSTM_ZOPC (unsigned long)(0xe7L << 40 | 0x3eL << 0) // store full vreg range (n * 128 bit)
#define VST_ZOPC (unsigned long)(0xe7L << 40 | 0x0eL << 0) // store full vreg (128 bit)
#define VSTEB_ZOPC (unsigned long)(0xe7L << 40 | 0x08L << 0) // store vreg element (8 bit)
#define VSTEH_ZOPC (unsigned long)(0xe7L << 40 | 0x09L << 0) // store vreg element (16 bit)
#define VSTEF_ZOPC (unsigned long)(0xe7L << 40 | 0x0bL << 0) // store vreg element (32 bit)
#define VSTEG_ZOPC (unsigned long)(0xe7L << 40 | 0x0aL << 0) // store vreg element (64 bit)
#define VSTL_ZOPC (unsigned long)(0xe7L << 40 | 0x3fL << 0) // store vreg with length.
//--- Misc ---
#define VGM_ZOPC (unsigned long)(0xe7L << 40 | 0x46L << 0) // generate bit mask, [start..end] = '1', else '0'
#define VGBM_ZOPC (unsigned long)(0xe7L << 40 | 0x44L << 0) // generate byte mask, bits(imm16) -> bytes
//---< Vector Arithmetic Instructions >---
// Load
#define VLC_ZOPC (unsigned long)(0xe7L << 40 | 0xdeL << 0) // V1 := -V2, element size = 2**m
#define VLP_ZOPC (unsigned long)(0xe7L << 40 | 0xdfL << 0) // V1 := |V2|, element size = 2**m
// ADD
#define VA_ZOPC (unsigned long)(0xe7L << 40 | 0xf3L << 0) // V1 := V2 + V3, element size = 2**m
#define VACC_ZOPC (unsigned long)(0xe7L << 40 | 0xf1L << 0) // V1 := carry(V2 + V3), element size = 2**m
// SUB
#define VS_ZOPC (unsigned long)(0xe7L << 40 | 0xf7L << 0) // V1 := V2 - V3, element size = 2**m
#define VSCBI_ZOPC (unsigned long)(0xe7L << 40 | 0xf5L << 0) // V1 := borrow(V2 - V3), element size = 2**m
// MUL
#define VML_ZOPC (unsigned long)(0xe7L << 40 | 0xa2L << 0) // V1 := V2 * V3, element size = 2**m
#define VMH_ZOPC (unsigned long)(0xe7L << 40 | 0xa3L << 0) // V1 := V2 * V3, element size = 2**m
#define VMLH_ZOPC (unsigned long)(0xe7L << 40 | 0xa1L << 0) // V1 := V2 * V3, element size = 2**m, unsigned
#define VME_ZOPC (unsigned long)(0xe7L << 40 | 0xa6L << 0) // V1 := V2 * V3, element size = 2**m
#define VMLE_ZOPC (unsigned long)(0xe7L << 40 | 0xa4L << 0) // V1 := V2 * V3, element size = 2**m, unsigned
#define VMO_ZOPC (unsigned long)(0xe7L << 40 | 0xa7L << 0) // V1 := V2 * V3, element size = 2**m
#define VMLO_ZOPC (unsigned long)(0xe7L << 40 | 0xa5L << 0) // V1 := V2 * V3, element size = 2**m, unsigned
// MUL & ADD
#define VMAL_ZOPC (unsigned long)(0xe7L << 40 | 0xaaL << 0) // V1 := V2 * V3 + V4, element size = 2**m
#define VMAH_ZOPC (unsigned long)(0xe7L << 40 | 0xabL << 0) // V1 := V2 * V3 + V4, element size = 2**m
#define VMALH_ZOPC (unsigned long)(0xe7L << 40 | 0xa9L << 0) // V1 := V2 * V3 + V4, element size = 2**m, unsigned
#define VMAE_ZOPC (unsigned long)(0xe7L << 40 | 0xaeL << 0) // V1 := V2 * V3 + V4, element size = 2**m
#define VMALE_ZOPC (unsigned long)(0xe7L << 40 | 0xacL << 0) // V1 := V2 * V3 + V4, element size = 2**m, unsigned
#define VMAO_ZOPC (unsigned long)(0xe7L << 40 | 0xafL << 0) // V1 := V2 * V3 + V4, element size = 2**m
#define VMALO_ZOPC (unsigned long)(0xe7L << 40 | 0xadL << 0) // V1 := V2 * V3 + V4, element size = 2**m, unsigned
// Vector SUM
#define VSUM_ZOPC (unsigned long)(0xe7L << 40 | 0x64L << 0) // V1[j] := toFW(sum(V2[i]) + V3[j]), subelements: byte or HW
#define VSUMG_ZOPC (unsigned long)(0xe7L << 40 | 0x65L << 0) // V1[j] := toDW(sum(V2[i]) + V3[j]), subelements: HW or FW
#define VSUMQ_ZOPC (unsigned long)(0xe7L << 40 | 0x67L << 0) // V1[j] := toQW(sum(V2[i]) + V3[j]), subelements: FW or DW
// Average
#define VAVG_ZOPC (unsigned long)(0xe7L << 40 | 0xf2L << 0) // V1 := (V2+V3+1)/2, signed, element size = 2**m
#define VAVGL_ZOPC (unsigned long)(0xe7L << 40 | 0xf0L << 0) // V1 := (V2+V3+1)/2, unsigned, element size = 2**m
// VECTOR Galois Field Multiply Sum
#define VGFM_ZOPC (unsigned long)(0xe7L << 40 | 0xb4L << 0)
#define VGFMA_ZOPC (unsigned long)(0xe7L << 40 | 0xbcL << 0)
//---< Vector Logical Instructions >---
// AND
#define VN_ZOPC (unsigned long)(0xe7L << 40 | 0x68L << 0) // V1 := V2 & V3, element size = 2**m
#define VNC_ZOPC (unsigned long)(0xe7L << 40 | 0x69L << 0) // V1 := V2 & ~V3, element size = 2**m
// XOR
#define VX_ZOPC (unsigned long)(0xe7L << 40 | 0x6dL << 0) // V1 := V2 ^ V3, element size = 2**m
// NOR
#define VNO_ZOPC (unsigned long)(0xe7L << 40 | 0x6bL << 0) // V1 := !(V2 | V3), element size = 2**m
// OR
#define VO_ZOPC (unsigned long)(0xe7L << 40 | 0x6aL << 0) // V1 := V2 | V3, element size = 2**m
// Comparison (element-wise)
#define VCEQ_ZOPC (unsigned long)(0xe7L << 40 | 0xf8L << 0) // V1 := (V2 == V3) ? 0xffff : 0x0000, element size = 2**m
#define VCH_ZOPC (unsigned long)(0xe7L << 40 | 0xfbL << 0) // V1 := (V2 > V3) ? 0xffff : 0x0000, element size = 2**m, signed
#define VCHL_ZOPC (unsigned long)(0xe7L << 40 | 0xf9L << 0) // V1 := (V2 > V3) ? 0xffff : 0x0000, element size = 2**m, unsigned
// Max/Min (element-wise)
#define VMX_ZOPC (unsigned long)(0xe7L << 40 | 0xffL << 0) // V1 := (V2 > V3) ? V2 : V3, element size = 2**m, signed
#define VMXL_ZOPC (unsigned long)(0xe7L << 40 | 0xfdL << 0) // V1 := (V2 > V3) ? V2 : V3, element size = 2**m, unsigned
#define VMN_ZOPC (unsigned long)(0xe7L << 40 | 0xfeL << 0) // V1 := (V2 < V3) ? V2 : V3, element size = 2**m, signed
#define VMNL_ZOPC (unsigned long)(0xe7L << 40 | 0xfcL << 0) // V1 := (V2 < V3) ? V2 : V3, element size = 2**m, unsigned
// Leading/Trailing Zeros, population count
#define VCLZ_ZOPC (unsigned long)(0xe7L << 40 | 0x53L << 0) // V1 := leadingzeros(V2), element size = 2**m
#define VCTZ_ZOPC (unsigned long)(0xe7L << 40 | 0x52L << 0) // V1 := trailingzeros(V2), element size = 2**m
#define VPOPCT_ZOPC (unsigned long)(0xe7L << 40 | 0x50L << 0) // V1 := popcount(V2), bytewise!!
// Rotate/Shift
#define VERLLV_ZOPC (unsigned long)(0xe7L << 40 | 0x73L << 0) // V1 := rotateleft(V2), rotate count in V3 element
#define VERLL_ZOPC (unsigned long)(0xe7L << 40 | 0x33L << 0) // V1 := rotateleft(V3), rotate count from d2(b2).
#define VERIM_ZOPC (unsigned long)(0xe7L << 40 | 0x72L << 0) // Rotate then insert under mask. Read Principles of Operation!!
#define VESLV_ZOPC (unsigned long)(0xe7L << 40 | 0x70L << 0) // V1 := SLL(V2, V3), unsigned, element-wise
#define VESL_ZOPC (unsigned long)(0xe7L << 40 | 0x30L << 0) // V1 := SLL(V3), unsigned, shift count from d2(b2).
#define VESRAV_ZOPC (unsigned long)(0xe7L << 40 | 0x7AL << 0) // V1 := SRA(V2, V3), signed, element-wise
#define VESRA_ZOPC (unsigned long)(0xe7L << 40 | 0x3AL << 0) // V1 := SRA(V3), signed, shift count from d2(b2).
#define VESRLV_ZOPC (unsigned long)(0xe7L << 40 | 0x78L << 0) // V1 := SRL(V2, V3), unsigned, element-wise
#define VESRL_ZOPC (unsigned long)(0xe7L << 40 | 0x38L << 0) // V1 := SRL(V3), unsigned, shift count from d2(b2).
#define VSL_ZOPC (unsigned long)(0xe7L << 40 | 0x74L << 0) // V1 := SLL(V2), unsigned, bit-count
#define VSLB_ZOPC (unsigned long)(0xe7L << 40 | 0x75L << 0) // V1 := SLL(V2), unsigned, byte-count
#define VSLDB_ZOPC (unsigned long)(0xe7L << 40 | 0x77L << 0) // V1 := SLL((V2,V3)), unsigned, byte-count
#define VSRA_ZOPC (unsigned long)(0xe7L << 40 | 0x7eL << 0) // V1 := SRA(V2), signed, bit-count
#define VSRAB_ZOPC (unsigned long)(0xe7L << 40 | 0x7fL << 0) // V1 := SRA(V2), signed, byte-count
#define VSRL_ZOPC (unsigned long)(0xe7L << 40 | 0x7cL << 0) // V1 := SRL(V2), unsigned, bit-count
#define VSRLB_ZOPC (unsigned long)(0xe7L << 40 | 0x7dL << 0) // V1 := SRL(V2), unsigned, byte-count
// Test under Mask
#define VTM_ZOPC (unsigned long)(0xe7L << 40 | 0xd8L << 0) // Like TM, set CC according to state of selected bits.
//---< Vector String Instructions >---
#define VFAE_ZOPC (unsigned long)(0xe7L << 40 | 0x82L << 0) // Find any element
#define VFEE_ZOPC (unsigned long)(0xe7L << 40 | 0x80L << 0) // Find element equal
#define VFENE_ZOPC (unsigned long)(0xe7L << 40 | 0x81L << 0) // Find element not equal
#define VSTRC_ZOPC (unsigned long)(0xe7L << 40 | 0x8aL << 0) // String range compare
#define VISTR_ZOPC (unsigned long)(0xe7L << 40 | 0x5cL << 0) // Isolate String
//--------------------------------
//-- Miscellaneous Operations --
//--------------------------------
// Execute
#define EX_ZOPC (unsigned int)(68L << 24)
@ -1117,7 +1310,6 @@ class Assembler : public AbstractAssembler {
#define LAOG_ZOPC (unsigned long)(0xebL << 40 | 0xe6L) // z196
// System Functions
#define STCK_ZOPC (unsigned int)(0xb2 << 24 | 0x05 << 16)
#define STCKF_ZOPC (unsigned int)(0xb2 << 24 | 0x7c << 16)
#define STFLE_ZOPC (unsigned int)(0xb2 << 24 | 0xb0 << 16)
#define ECTG_ZOPC (unsigned long)(0xc8L <<40 | 0x01L << 32) // z10
@ -1244,10 +1436,18 @@ class Assembler : public AbstractAssembler {
// unsigned arithmetic calculation instructions
// Mask bit#0 is not used by these instructions.
// There is no indication of overflow for these instr.
bcondLogZero = 2,
bcondLogNotZero = 5,
bcondLogZero_NoCarry = 8,
bcondLogZero_Carry = 2,
// bcondLogZero_Borrow = 8, // This CC is never generated.
bcondLogZero_NoBorrow = 2,
bcondLogZero = bcondLogZero_Carry | bcondLogZero_NoCarry,
bcondLogNotZero_NoCarry = 4,
bcondLogNotZero_Carry = 1,
bcondLogNotZero_Borrow = 4,
bcondLogNotZero_NoBorrow = 1,
bcondLogNotZero = bcondLogNotZero_Carry | bcondLogNotZero_NoCarry,
bcondLogCarry = bcondLogZero_Carry | bcondLogNotZero_Carry,
bcondLogBorrow = /* bcondLogZero_Borrow | */ bcondLogNotZero_Borrow,
// string search instructions
bcondFound = 4,
bcondNotFound = 2,
@ -1280,6 +1480,29 @@ class Assembler : public AbstractAssembler {
to_minus_infinity = 7
};
// Vector Register Element Type.
enum VRegElemType {
VRET_BYTE = 0,
VRET_HW = 1,
VRET_FW = 2,
VRET_DW = 3,
VRET_QW = 4
};
// Vector Operation Result Control.
// This is a set of flags used in some vector instructions to control
// the result (side) effects of instruction execution.
enum VOpRC {
VOPRC_CCSET = 0b0001, // set the CC.
VOPRC_CCIGN = 0b0000, // ignore, don't set CC.
VOPRC_ZS = 0b0010, // Zero Search. Additional, elementwise, comparison against zero.
VOPRC_NOZS = 0b0000, // No Zero Search.
VOPRC_RTBYTEIX = 0b0100, // generate byte index to lowest element with true comparison.
VOPRC_RTBITVEC = 0b0000, // generate bit vector, all 1s for true, all 0s for false element comparisons.
VOPRC_INVERT = 0b1000, // invert comparison results.
VOPRC_NOINVERT = 0b0000 // use comparison results as is, do not invert.
};
// Inverse condition code, i.e. determine "15 - cc" for a given condition code cc.
static branch_condition inverse_condition(branch_condition cc);
static branch_condition inverse_float_condition(branch_condition cc);
@ -1376,6 +1599,65 @@ class Assembler : public AbstractAssembler {
return r;
}
static int64_t rsmask_48( Address a) { assert(a.is_RSform(), "bad address format"); return rsmask_48( a.disp12(), a.base()); }
static int64_t rxmask_48( Address a) { if (a.is_RXform()) { return rxmask_48( a.disp12(), a.index(), a.base()); }
else if (a.is_RSform()) { return rsmask_48( a.disp12(), a.base()); }
else { guarantee(false, "bad address format"); return 0; }
}
static int64_t rsymask_48(Address a) { assert(a.is_RSYform(), "bad address format"); return rsymask_48(a.disp20(), a.base()); }
static int64_t rxymask_48(Address a) { if (a.is_RXYform()) { return rxymask_48( a.disp20(), a.index(), a.base()); }
else if (a.is_RSYform()) { return rsymask_48( a.disp20(), a.base()); }
else { guarantee(false, "bad address format"); return 0; }
}
static int64_t rsmask_48( int64_t d2, Register b2) { return uimm12(d2, 20, 48) | regz(b2, 16, 48); }
static int64_t rxmask_48( int64_t d2, Register x2, Register b2) { return uimm12(d2, 20, 48) | reg(x2, 12, 48) | regz(b2, 16, 48); }
static int64_t rsymask_48(int64_t d2, Register b2) { return simm20(d2) | regz(b2, 16, 48); }
static int64_t rxymask_48(int64_t d2, Register x2, Register b2) { return simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48); }
// Address calculated from d12(vx,b) - vx is vector index register.
static int64_t rvmask_48( int64_t d2, VectorRegister x2, Register b2) { return uimm12(d2, 20, 48) | vreg(x2, 12) | regz(b2, 16, 48); }
static int64_t vreg_mask(VectorRegister v, int pos) {
return vreg(v, pos) | v->RXB_mask(pos);
}
// Vector Element Size Control. 4-bit field which indicates the size of the vector elements.
static int64_t vesc_mask(int64_t size, int min_size, int max_size, int pos) {
// min_size - minimum element size. Not all instructions support element sizes beginning with "byte".
// max_size - maximum element size. Not all instructions support element sizes up to "QW".
assert((min_size <= size) && (size <= max_size), "element size control out of range");
return uimm4(size, pos, 48);
}
// Vector Element IndeX. 4-bit field which indexes the target vector element.
static int64_t veix_mask(int64_t ix, int el_size, int pos) {
// el_size - size of the vector element. This is a VRegElemType enum value.
// ix - vector element index.
int max_ix = -1;
switch (el_size) {
case VRET_BYTE: max_ix = 15; break;
case VRET_HW: max_ix = 7; break;
case VRET_FW: max_ix = 3; break;
case VRET_DW: max_ix = 1; break;
case VRET_QW: max_ix = 0; break;
default: guarantee(false, "bad vector element size %d", el_size); break;
}
assert((0 <= ix) && (ix <= max_ix), "element size out of range (0 <= %ld <= %d)", ix, max_ix);
return uimm4(ix, pos, 48);
}
// Vector Operation Result Control. 4-bit field.
static int64_t voprc_any(int64_t flags, int pos, int64_t allowed_flags = 0b1111) {
assert((flags & allowed_flags) == flags, "Invalid VOPRC_* flag combination: %d", (int)flags);
return uimm4(flags, pos, 48);
}
// Vector Operation Result Control. Condition code setting.
static int64_t voprc_ccmask(int64_t flags, int pos) {
return voprc_any(flags, pos, VOPRC_CCIGN | VOPRC_CCSET);
}
public:
//--------------------------------------------------
@ -1453,6 +1735,8 @@ class Assembler : public AbstractAssembler {
static long imm24(int64_t i24, int s, int len) { return imm(i24, 24) << (len-s-24); }
static long imm32(int64_t i32, int s, int len) { return imm(i32, 32) << (len-s-32); }
static long vreg(VectorRegister v, int pos) { const int len = 48; return u_field(v->encoding()&0x0f, (len-pos)-1, (len-pos)-4) | v->RXB_mask(pos); }
static long fregt(FloatRegister r, int s, int len) { return freg(r,s,len); }
static long freg( FloatRegister r, int s, int len) { return u_field(r->encoding(), (len-s)-1, (len-s)-4); }
@ -1840,13 +2124,16 @@ class Assembler : public AbstractAssembler {
inline void z_alsi( const Address& d, int64_t i2); // add logical *(d) += i2_imm8 ; uint32 -- z10
inline void z_algsi(const Address& d, int64_t i2); // add logical *(d) += i2_imm8 ; uint64 -- z10
// negate
// sign adjustment
inline void z_lcr( Register r1, Register r2 = noreg); // neg r1 = -r2 ; int32
inline void z_lcgr( Register r1, Register r2 = noreg); // neg r1 = -r2 ; int64
inline void z_lcgfr(Register r1, Register r2); // neg r1 = -r2 ; int64 <- int32
inline void z_lnr( Register r1, Register r2 = noreg); // neg r1 = -|r2| ; int32
inline void z_lngr( Register r1, Register r2 = noreg); // neg r1 = -|r2| ; int64
inline void z_lngfr(Register r1, Register r2); // neg r1 = -|r2| ; int64 <- int32
inline void z_lpr( Register r1, Register r2 = noreg); // r1 = |r2| ; int32
inline void z_lpgr( Register r1, Register r2 = noreg); // r1 = |r2| ; int64
inline void z_lpgfr(Register r1, Register r2); // r1 = |r2| ; int64 <- int32
// subtract intstructions
// sub registers
@ -2125,6 +2412,422 @@ class Assembler : public AbstractAssembler {
inline void z_trtt(Register r1, Register r2, int64_t m3);
//---------------------------
//-- Vector Instructions --
//---------------------------
//---< Vector Support Instructions >---
// Load (transfer from memory)
inline void z_vlm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vl( VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vleb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vleh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vlef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vleg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
// Gather/Scatter
inline void z_vgef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
inline void z_vgeg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
inline void z_vscef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
inline void z_vsceg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
// load and replicate
inline void z_vlrep( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vlrepb(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vlreph(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vlrepf(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vlrepg(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vllez( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vllezb(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vllezh(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vllezf(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vllezg(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vlbb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vll( VectorRegister v1, Register r3, int64_t d2, Register b2);
// Load (register to register)
inline void z_vlr( VectorRegister v1, VectorRegister v2);
inline void z_vlgv( Register r1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
inline void z_vlgvb( Register r1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vlgvh( Register r1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vlgvf( Register r1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vlgvg( Register r1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vlvg( VectorRegister v1, Register r3, int64_t d2, Register b2, int64_t m4);
inline void z_vlvgb( VectorRegister v1, Register r3, int64_t d2, Register b2);
inline void z_vlvgh( VectorRegister v1, Register r3, int64_t d2, Register b2);
inline void z_vlvgf( VectorRegister v1, Register r3, int64_t d2, Register b2);
inline void z_vlvgg( VectorRegister v1, Register r3, int64_t d2, Register b2);
inline void z_vlvgp( VectorRegister v1, Register r2, Register r3);
// vector register pack
inline void z_vpk( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vpkh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpkf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpkg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpks( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
inline void z_vpksh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpksf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpksg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpkshs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpksfs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpksgs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpkls( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
inline void z_vpklsh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpklsf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpklsg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpklshs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpklsfs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpklsgs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
// vector register unpack (sign-extended)
inline void z_vuph( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vuphb( VectorRegister v1, VectorRegister v2);
inline void z_vuphh( VectorRegister v1, VectorRegister v2);
inline void z_vuphf( VectorRegister v1, VectorRegister v2);
inline void z_vupl( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vuplb( VectorRegister v1, VectorRegister v2);
inline void z_vuplh( VectorRegister v1, VectorRegister v2);
inline void z_vuplf( VectorRegister v1, VectorRegister v2);
// vector register unpack (zero-extended)
inline void z_vuplh( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vuplhb( VectorRegister v1, VectorRegister v2);
inline void z_vuplhh( VectorRegister v1, VectorRegister v2);
inline void z_vuplhf( VectorRegister v1, VectorRegister v2);
inline void z_vupll( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vupllb( VectorRegister v1, VectorRegister v2);
inline void z_vupllh( VectorRegister v1, VectorRegister v2);
inline void z_vupllf( VectorRegister v1, VectorRegister v2);
// vector register merge high/low
inline void z_vmrh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmrhb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmrhh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmrhf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmrhg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmrl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmrlb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmrlh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmrlf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmrlg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
// vector register permute
inline void z_vperm( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
inline void z_vpdi( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
// vector register replicate
inline void z_vrep( VectorRegister v1, VectorRegister v3, int64_t imm2, int64_t m4);
inline void z_vrepb( VectorRegister v1, VectorRegister v3, int64_t imm2);
inline void z_vreph( VectorRegister v1, VectorRegister v3, int64_t imm2);
inline void z_vrepf( VectorRegister v1, VectorRegister v3, int64_t imm2);
inline void z_vrepg( VectorRegister v1, VectorRegister v3, int64_t imm2);
inline void z_vrepi( VectorRegister v1, int64_t imm2, int64_t m3);
inline void z_vrepib(VectorRegister v1, int64_t imm2);
inline void z_vrepih(VectorRegister v1, int64_t imm2);
inline void z_vrepif(VectorRegister v1, int64_t imm2);
inline void z_vrepig(VectorRegister v1, int64_t imm2);
inline void z_vsel( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
inline void z_vseg( VectorRegister v1, VectorRegister v2, int64_t imm3);
// Load (immediate)
inline void z_vleib( VectorRegister v1, int64_t imm2, int64_t m3);
inline void z_vleih( VectorRegister v1, int64_t imm2, int64_t m3);
inline void z_vleif( VectorRegister v1, int64_t imm2, int64_t m3);
inline void z_vleig( VectorRegister v1, int64_t imm2, int64_t m3);
// Store
inline void z_vstm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vst( VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vsteb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vsteh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vstef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vsteg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vstl( VectorRegister v1, Register r3, int64_t d2, Register b2);
// Misc
inline void z_vgm( VectorRegister v1, int64_t imm2, int64_t imm3, int64_t m4);
inline void z_vgmb( VectorRegister v1, int64_t imm2, int64_t imm3);
inline void z_vgmh( VectorRegister v1, int64_t imm2, int64_t imm3);
inline void z_vgmf( VectorRegister v1, int64_t imm2, int64_t imm3);
inline void z_vgmg( VectorRegister v1, int64_t imm2, int64_t imm3);
inline void z_vgbm( VectorRegister v1, int64_t imm2);
inline void z_vzero( VectorRegister v1); // preferred method to set vreg to all zeroes
inline void z_vone( VectorRegister v1); // preferred method to set vreg to all ones
//---< Vector Arithmetic Instructions >---
// Load
inline void z_vlc( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vlcb( VectorRegister v1, VectorRegister v2);
inline void z_vlch( VectorRegister v1, VectorRegister v2);
inline void z_vlcf( VectorRegister v1, VectorRegister v2);
inline void z_vlcg( VectorRegister v1, VectorRegister v2);
inline void z_vlp( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vlpb( VectorRegister v1, VectorRegister v2);
inline void z_vlph( VectorRegister v1, VectorRegister v2);
inline void z_vlpf( VectorRegister v1, VectorRegister v2);
inline void z_vlpg( VectorRegister v1, VectorRegister v2);
// ADD
inline void z_va( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vab( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vah( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vaf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vag( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vaq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vacc( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vaccb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vacch( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vaccf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vaccg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vaccq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// SUB
inline void z_vs( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vsb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vscbi( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vscbib( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vscbih( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vscbif( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vscbig( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vscbiq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// MULTIPLY
inline void z_vml( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmlh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vme( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmle( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmlo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
// MULTIPLY & ADD
inline void z_vmal( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
inline void z_vmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
inline void z_vmalh( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
inline void z_vmae( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
inline void z_vmale( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
inline void z_vmao( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
inline void z_vmalo( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
// VECTOR SUM
inline void z_vsum( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vsumb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsumh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsumg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vsumgh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsumgf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsumq( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vsumqf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsumqg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// Average
inline void z_vavg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vavgb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vavgh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vavgf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vavgg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vavgl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vavglb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vavglh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vavglf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vavglg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// VECTOR Galois Field Multiply Sum
inline void z_vgfm( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vgfmb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vgfmh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vgfmf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vgfmg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// VECTOR Galois Field Multiply Sum and Accumulate
inline void z_vgfma( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
inline void z_vgfmab( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
inline void z_vgfmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
inline void z_vgfmaf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
inline void z_vgfmag( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
//---< Vector Logical Instructions >---
// AND
inline void z_vn( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vnc( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// XOR
inline void z_vx( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// NOR
inline void z_vno( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// OR
inline void z_vo( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// Comparison (element-wise)
inline void z_vceq( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
inline void z_vceqb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vceqh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vceqf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vceqg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vceqbs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vceqhs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vceqfs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vceqgs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vch( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
inline void z_vchb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchbs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchhs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchfs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchgs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
inline void z_vchlb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchlh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchlf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchlg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchlbs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchlhs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchlfs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchlgs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// Max/Min (element-wise)
inline void z_vmx( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmxb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmxh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmxf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmxg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmxl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmxlb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmxlh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmxlf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmxlg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmn( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmnb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmnh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmnf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmng( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmnl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmnlb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmnlh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmnlf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmnlg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// Leading/Trailing Zeros, population count
inline void z_vclz( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vclzb( VectorRegister v1, VectorRegister v2);
inline void z_vclzh( VectorRegister v1, VectorRegister v2);
inline void z_vclzf( VectorRegister v1, VectorRegister v2);
inline void z_vclzg( VectorRegister v1, VectorRegister v2);
inline void z_vctz( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vctzb( VectorRegister v1, VectorRegister v2);
inline void z_vctzh( VectorRegister v1, VectorRegister v2);
inline void z_vctzf( VectorRegister v1, VectorRegister v2);
inline void z_vctzg( VectorRegister v1, VectorRegister v2);
inline void z_vpopct( VectorRegister v1, VectorRegister v2, int64_t m3);
// Rotate/Shift
inline void z_verllv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_verllvb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_verllvh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_verllvf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_verllvg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_verll( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
inline void z_verllb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_verllh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_verllf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_verllg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_verim( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t m5);
inline void z_verimb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
inline void z_verimh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
inline void z_verimf( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
inline void z_verimg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
inline void z_veslv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_veslvb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_veslvh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_veslvf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_veslvg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesl( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
inline void z_veslb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_veslh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_veslf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_veslg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesrav( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vesravb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesravh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesravf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesravg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesra( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
inline void z_vesrab( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesrah( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesraf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesrag( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesrlv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vesrlvb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesrlvh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesrlvf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesrlvg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesrl( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
inline void z_vesrlb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesrlh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesrlf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesrlg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vsl( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vslb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsldb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
inline void z_vsra( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsrab( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsrl( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsrlb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// Test under Mask
inline void z_vtm( VectorRegister v1, VectorRegister v2);
//---< Vector String Instructions >---
inline void z_vfae( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5); // Find any element
inline void z_vfaeb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfaeh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfaef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfee( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5); // Find element equal
inline void z_vfeeb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfeeh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfeef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfene( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5); // Find element not equal
inline void z_vfeneb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfeneh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfenef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vstrc( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t imm5, int64_t cc6); // String range compare
inline void z_vstrcb( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6);
inline void z_vstrch( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6);
inline void z_vstrcf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6);
inline void z_vistr( VectorRegister v1, VectorRegister v2, int64_t imm3, int64_t cc5); // Isolate String
inline void z_vistrb( VectorRegister v1, VectorRegister v2, int64_t cc5);
inline void z_vistrh( VectorRegister v1, VectorRegister v2, int64_t cc5);
inline void z_vistrf( VectorRegister v1, VectorRegister v2, int64_t cc5);
inline void z_vistrbs(VectorRegister v1, VectorRegister v2);
inline void z_vistrhs(VectorRegister v1, VectorRegister v2);
inline void z_vistrfs(VectorRegister v1, VectorRegister v2);
// Floatingpoint instructions
// ==========================
@ -2331,7 +3034,6 @@ class Assembler : public AbstractAssembler {
inline void z_ahhlr(Register r1, Register r2, Register r3); // ADD halfword high low
inline void z_tam();
inline void z_stck(int64_t d2, Register b2);
inline void z_stckf(int64_t d2, Register b2);
inline void z_stmg(Register r1, Register r3, int64_t d2, Register b2);
inline void z_lmg(Register r1, Register r3, int64_t d2, Register b2);

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -309,6 +309,9 @@ inline void Assembler::z_lcgfr(Register r1, Register r2) { emit_32( LCGFR_ZOPC |
inline void Assembler::z_lnr( Register r1, Register r2) { emit_16( LNR_ZOPC | regt( r1, 8, 16) | reg((r2 == noreg) ? r1:r2, 12, 16)); }
inline void Assembler::z_lngr( Register r1, Register r2) { emit_32( LNGR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
inline void Assembler::z_lngfr(Register r1, Register r2) { emit_32( LNGFR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
inline void Assembler::z_lpr( Register r1, Register r2) { emit_16( LPR_ZOPC | regt( r1, 8, 16) | reg((r2 == noreg) ? r1:r2, 12, 16)); }
inline void Assembler::z_lpgr( Register r1, Register r2) { emit_32( LPGR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
inline void Assembler::z_lpgfr(Register r1, Register r2) { emit_32( LPGFR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
inline void Assembler::z_lrvr( Register r1, Register r2) { emit_32( LRVR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
inline void Assembler::z_lrvgr(Register r1, Register r2) { emit_32( LRVGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
@ -686,7 +689,6 @@ inline void Assembler::z_ahhhr(Register r1, Register r2, Register r3) { emit_32(
inline void Assembler::z_ahhlr(Register r1, Register r2, Register r3) { emit_32( AHHLR_ZOPC | reg(r3, 16, 32) | reg(r1, 24, 32) | reg(r2, 28, 32)); }
inline void Assembler::z_tam() { emit_16( TAM_ZOPC); }
inline void Assembler::z_stck(int64_t d2, Register b2) { emit_32( STCK_ZOPC | uimm12(d2, 20, 32) | regz(b2, 16, 32)); }
inline void Assembler::z_stckf(int64_t d2, Register b2) { emit_32( STCKF_ZOPC | uimm12(d2, 20, 32) | regz(b2, 16, 32)); }
inline void Assembler::z_stmg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( STMG_ZOPC | simm20(d2) | reg(r1, 8, 48) | reg(r3,12,48)| reg(b2,16,48) ); }
inline void Assembler::z_lmg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( LMG_ZOPC | simm20(d2) | reg(r1, 8, 48) | reg(r3,12,48)| reg(b2,16,48) ); }
@ -702,6 +704,421 @@ inline void Assembler::z_cvd(Register r1, int64_t d2, Register x2, Register b2)
inline void Assembler::z_cvdg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CVDG_ZOPC | regt(r1, 8, 48) | reg(x2, 12, 48) | reg(b2, 16, 48) | simm20(d2)); }
//---------------------------
//-- Vector Instructions --
//---------------------------
//---< Vector Support Instructions >---
// Load (transfer from memory)
inline void Assembler::z_vlm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {emit_48(VLM_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2)); }
inline void Assembler::z_vl( VectorRegister v1, int64_t d2, Register x2, Register b2) {emit_48(VL_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2)); }
inline void Assembler::z_vleb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEB_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_BYTE, 32)); }
inline void Assembler::z_vleh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEH_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_HW, 32)); }
inline void Assembler::z_vlef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEF_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_FW, 32)); }
inline void Assembler::z_vleg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEG_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_DW, 32)); }
// Gather/Scatter
inline void Assembler::z_vgef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VGEF_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_FW, 32)); }
inline void Assembler::z_vgeg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VGEG_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_DW, 32)); }
inline void Assembler::z_vscef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VSCEF_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_FW, 32)); }
inline void Assembler::z_vsceg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VSCEG_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_DW, 32)); }
// load and replicate
inline void Assembler::z_vlrep( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLREP_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vlrepb( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vlrep(v1, d2, x2, b2, VRET_BYTE); }// load byte and replicate to all vector elements of type 'B'
inline void Assembler::z_vlreph( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vlrep(v1, d2, x2, b2, VRET_HW); } // load HW and replicate to all vector elements of type 'H'
inline void Assembler::z_vlrepf( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vlrep(v1, d2, x2, b2, VRET_FW); } // load FW and replicate to all vector elements of type 'F'
inline void Assembler::z_vlrepg( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vlrep(v1, d2, x2, b2, VRET_DW); } // load DW and replicate to all vector elements of type 'G'
inline void Assembler::z_vllez( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLLEZ_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vllezb( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vllez(v1, d2, x2, b2, VRET_BYTE); }// load logical byte into left DW of VR, zero all other bit positions.
inline void Assembler::z_vllezh( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vllez(v1, d2, x2, b2, VRET_HW); } // load logical HW into left DW of VR, zero all other bit positions.
inline void Assembler::z_vllezf( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vllez(v1, d2, x2, b2, VRET_FW); } // load logical FW into left DW of VR, zero all other bit positions.
inline void Assembler::z_vllezg( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vllez(v1, d2, x2, b2, VRET_DW); } // load logical DW into left DW of VR, zero all other bit positions.
inline void Assembler::z_vlbb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLBB_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | uimm4(m3, 32, 48)); }
inline void Assembler::z_vll( VectorRegister v1, Register r3, int64_t d2, Register b2) {emit_48(VLL_ZOPC | vreg(v1, 8) | reg(r3, 12, 48) | rsmask_48(d2, b2)); }
// Load (register to register)
inline void Assembler::z_vlr ( VectorRegister v1, VectorRegister v2) {emit_48(VLR_ZOPC | vreg(v1, 8) | vreg(v2, 12)); }
inline void Assembler::z_vlgv( Register r1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VLGV_ZOPC | reg(r1, 8, 48) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vlgvb( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_BYTE); } // load byte from VR element (index d2(b2)) into GR (logical)
inline void Assembler::z_vlgvh( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_HW); } // load HW from VR element (index d2(b2)) into GR (logical)
inline void Assembler::z_vlgvf( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_FW); } // load FW from VR element (index d2(b2)) into GR (logical)
inline void Assembler::z_vlgvg( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_DW); } // load DW from VR element (index d2(b2)) into GR.
inline void Assembler::z_vlvg( VectorRegister v1, Register r3, int64_t d2, Register b2, int64_t m4) {emit_48(VLVG_ZOPC | vreg(v1, 8) | reg(r3, 12, 48) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vlvgb( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_BYTE); }
inline void Assembler::z_vlvgh( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_HW); }
inline void Assembler::z_vlvgf( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_FW); }
inline void Assembler::z_vlvgg( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_DW); }
inline void Assembler::z_vlvgp( VectorRegister v1, Register r2, Register r3) {emit_48(VLVGP_ZOPC | vreg(v1, 8) | reg(r2, 12, 48) | reg(r3, 16, 48)); }
// vector register pack
inline void Assembler::z_vpk( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VPK_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_DW, 32)); }
inline void Assembler::z_vpkh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpk(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vpkf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpk(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vpkg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpk(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vpks( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VPKS_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
inline void Assembler::z_vpksh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
inline void Assembler::z_vpksf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
inline void Assembler::z_vpksg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
inline void Assembler::z_vpkshs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', set CC
inline void Assembler::z_vpksfs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', set CC
inline void Assembler::z_vpksgs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', set CC
inline void Assembler::z_vpkls( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VPKLS_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
inline void Assembler::z_vpklsh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
inline void Assembler::z_vpklsf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
inline void Assembler::z_vpklsg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
inline void Assembler::z_vpklshs(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', set CC
inline void Assembler::z_vpklsfs(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', set CC
inline void Assembler::z_vpklsgs(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', set CC
// vector register unpack (sign-extended)
inline void Assembler::z_vuph( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VUPH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vuphb( VectorRegister v1, VectorRegister v2) {z_vuph(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vuphh( VectorRegister v1, VectorRegister v2) {z_vuph(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vuphf( VectorRegister v1, VectorRegister v2) {z_vuph(v1, v2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vupl( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VUPL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vuplb( VectorRegister v1, VectorRegister v2) {z_vupl(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vuplh( VectorRegister v1, VectorRegister v2) {z_vupl(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vuplf( VectorRegister v1, VectorRegister v2) {z_vupl(v1, v2, VRET_FW); } // vector element type 'F'
// vector register unpack (zero-extended)
inline void Assembler::z_vuplh( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VUPLH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vuplhb( VectorRegister v1, VectorRegister v2) {z_vuplh(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vuplhh( VectorRegister v1, VectorRegister v2) {z_vuplh(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vuplhf( VectorRegister v1, VectorRegister v2) {z_vuplh(v1, v2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vupll( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VUPLL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vupllb( VectorRegister v1, VectorRegister v2) {z_vupll(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vupllh( VectorRegister v1, VectorRegister v2) {z_vupll(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vupllf( VectorRegister v1, VectorRegister v2) {z_vupll(v1, v2, VRET_FW); } // vector element type 'F'
// vector register merge high/low
inline void Assembler::z_vmrh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMRH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vmrhb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vmrhh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vmrhf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vmrhg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vmrl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMRL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vmrlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vmrlh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vmrlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vmrlg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_DW); } // vector element type 'G'
// vector register permute
inline void Assembler::z_vperm( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {emit_48(VPERM_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32)); }
inline void Assembler::z_vpdi( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VPDI_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | uimm4(m4, 32, 48)); }
// vector register replicate
inline void Assembler::z_vrep( VectorRegister v1, VectorRegister v3, int64_t imm2, int64_t m4) {emit_48(VREP_ZOPC | vreg(v1, 8) | vreg(v3, 12) | simm16(imm2, 16, 48) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vrepb( VectorRegister v1, VectorRegister v3, int64_t imm2) {z_vrep(v1, v3, imm2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vreph( VectorRegister v1, VectorRegister v3, int64_t imm2) {z_vrep(v1, v3, imm2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vrepf( VectorRegister v1, VectorRegister v3, int64_t imm2) {z_vrep(v1, v3, imm2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vrepg( VectorRegister v1, VectorRegister v3, int64_t imm2) {z_vrep(v1, v3, imm2, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vrepi( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VREPI_ZOPC | vreg(v1, 8) | simm16(imm2, 16, 48) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vrepib( VectorRegister v1, int64_t imm2) {z_vrepi(v1, imm2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vrepih( VectorRegister v1, int64_t imm2) {z_vrepi(v1, imm2, VRET_HW); } // vector element type 'B'
inline void Assembler::z_vrepif( VectorRegister v1, int64_t imm2) {z_vrepi(v1, imm2, VRET_FW); } // vector element type 'B'
inline void Assembler::z_vrepig( VectorRegister v1, int64_t imm2) {z_vrepi(v1, imm2, VRET_DW); } // vector element type 'B'
inline void Assembler::z_vsel( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {emit_48(VSEL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32)); }
inline void Assembler::z_vseg( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VSEG_ZOPC | vreg(v1, 8) | vreg(v2, 12) | uimm4(m3, 32, 48)); }
// Load (immediate)
inline void Assembler::z_vleib( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VLEIB_ZOPC | vreg(v1, 8) | simm16(imm2, 32, 48) | veix_mask(m3, VRET_BYTE, 32)); }
inline void Assembler::z_vleih( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VLEIH_ZOPC | vreg(v1, 8) | simm16(imm2, 32, 48) | veix_mask(m3, VRET_HW, 32)); }
inline void Assembler::z_vleif( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VLEIF_ZOPC | vreg(v1, 8) | simm16(imm2, 32, 48) | veix_mask(m3, VRET_FW, 32)); }
inline void Assembler::z_vleig( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VLEIG_ZOPC | vreg(v1, 8) | simm16(imm2, 32, 48) | veix_mask(m3, VRET_DW, 32)); }
// Store
inline void Assembler::z_vstm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {emit_48(VSTM_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2)); }
inline void Assembler::z_vst( VectorRegister v1, int64_t d2, Register x2, Register b2) {emit_48(VST_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2)); }
inline void Assembler::z_vsteb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEB_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_BYTE, 32)); }
inline void Assembler::z_vsteh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEH_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_HW, 32)); }
inline void Assembler::z_vstef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEF_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_FW, 32)); }
inline void Assembler::z_vsteg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEG_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_DW, 32)); }
inline void Assembler::z_vstl( VectorRegister v1, Register r3, int64_t d2, Register b2) {emit_48(VSTL_ZOPC | vreg(v1, 8) | reg(r3, 12, 48) | rsmask_48(d2, b2)); }
// Misc
inline void Assembler::z_vgm( VectorRegister v1, int64_t imm2, int64_t imm3, int64_t m4) {emit_48(VGM_ZOPC | vreg(v1, 8) | uimm8( imm2, 16, 48) | uimm8(imm3, 24, 48) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vgmb( VectorRegister v1, int64_t imm2, int64_t imm3) {z_vgm(v1, imm2, imm3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vgmh( VectorRegister v1, int64_t imm2, int64_t imm3) {z_vgm(v1, imm2, imm3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vgmf( VectorRegister v1, int64_t imm2, int64_t imm3) {z_vgm(v1, imm2, imm3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vgmg( VectorRegister v1, int64_t imm2, int64_t imm3) {z_vgm(v1, imm2, imm3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vgbm( VectorRegister v1, int64_t imm2) {emit_48(VGBM_ZOPC | vreg(v1, 8) | uimm16(imm2, 16, 48)); }
inline void Assembler::z_vzero( VectorRegister v1) {z_vgbm(v1, 0); } // preferred method to set vreg to all zeroes
inline void Assembler::z_vone( VectorRegister v1) {z_vgbm(v1, 0xffff); } // preferred method to set vreg to all ones
//---< Vector Arithmetic Instructions >---
// Load
inline void Assembler::z_vlc( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VLC_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vlcb( VectorRegister v1, VectorRegister v2) {z_vlc(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vlch( VectorRegister v1, VectorRegister v2) {z_vlc(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vlcf( VectorRegister v1, VectorRegister v2) {z_vlc(v1, v2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vlcg( VectorRegister v1, VectorRegister v2) {z_vlc(v1, v2, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vlp( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VLP_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vlpb( VectorRegister v1, VectorRegister v2) {z_vlp(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vlph( VectorRegister v1, VectorRegister v2) {z_vlp(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vlpf( VectorRegister v1, VectorRegister v2) {z_vlp(v1, v2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vlpg( VectorRegister v1, VectorRegister v2) {z_vlp(v1, v2, VRET_DW); } // vector element type 'G'
// ADD
inline void Assembler::z_va( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VA_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
inline void Assembler::z_vab( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vah( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vaf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vag( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vaq( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_QW); } // vector element type 'Q'
inline void Assembler::z_vacc( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VACC_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
inline void Assembler::z_vaccb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vacch( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vaccf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vaccg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vaccq( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_QW); } // vector element type 'Q'
// SUB
inline void Assembler::z_vs( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VS_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
inline void Assembler::z_vsb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vsh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vsf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vsg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vsq( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_QW); } // vector element type 'Q'
inline void Assembler::z_vscbi( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSCBI_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
inline void Assembler::z_vscbib( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vscbih( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vscbif( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vscbig( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vscbiq( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_QW); } // vector element type 'Q'
// MULTIPLY
inline void Assembler::z_vml( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VML_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vmh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vmlh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vme( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VME_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vmle( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vmo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vmlo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
// MULTIPLY & ADD
inline void Assembler::z_vmal( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
inline void Assembler::z_vmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
inline void Assembler::z_vmalh( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMALH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
inline void Assembler::z_vmae( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
inline void Assembler::z_vmale( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMALE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
inline void Assembler::z_vmao( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
inline void Assembler::z_vmalo( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMALO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
// VECTOR SUM
inline void Assembler::z_vsum( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSUM_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_HW, 32)); }
inline void Assembler::z_vsumb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsum(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vsumh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsum(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vsumg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSUMG_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_FW, 32)); }
inline void Assembler::z_vsumgh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsumg(v1, v2, v3, VRET_HW); } // vector element type 'B'
inline void Assembler::z_vsumgf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsumg(v1, v2, v3, VRET_FW); } // vector element type 'H'
inline void Assembler::z_vsumq( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSUMQ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_FW, VRET_DW, 32)); }
inline void Assembler::z_vsumqf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsumq(v1, v2, v3, VRET_FW); } // vector element type 'B'
inline void Assembler::z_vsumqg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsumq(v1, v2, v3, VRET_DW); } // vector element type 'H'
// Average
inline void Assembler::z_vavg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VAVG_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vavgb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavg(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vavgh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavg(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vavgf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavg(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vavgg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavg(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vavgl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VAVGL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vavglb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavgl(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vavglh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavgl(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vavglf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavgl(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vavglg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavgl(v1, v2, v3, VRET_DW); } // vector element type 'G'
// VECTOR Galois Field Multiply Sum
inline void Assembler::z_vgfm( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VGFM_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vgfmb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vgfm(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vgfmh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vgfm(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vgfmf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vgfm(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vgfmg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vgfm(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vgfma( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VGFMA_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v3, 16) | vesc_mask(m5, VRET_BYTE, VRET_DW, 20)); }
inline void Assembler::z_vgfmab( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vgfmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vgfmaf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vgfmag( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_DW); } // vector element type 'G'
//---< Vector Logical Instructions >---
// AND
inline void Assembler::z_vn( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VN_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
inline void Assembler::z_vnc( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VNC_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
// XOR
inline void Assembler::z_vx( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VX_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
// NOR
inline void Assembler::z_vno( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VNO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
// OR
inline void Assembler::z_vo( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
// Comparison (element-wise)
inline void Assembler::z_vceq( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VCEQ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
inline void Assembler::z_vceqb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_BYTE, VOPRC_CCIGN); } // vector element type 'B', don't set CC
inline void Assembler::z_vceqh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
inline void Assembler::z_vceqf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
inline void Assembler::z_vceqg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
inline void Assembler::z_vceqbs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_BYTE, VOPRC_CCSET); } // vector element type 'B', don't set CC
inline void Assembler::z_vceqhs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', don't set CC
inline void Assembler::z_vceqfs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', don't set CC
inline void Assembler::z_vceqgs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', don't set CC
inline void Assembler::z_vch( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VCH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
inline void Assembler::z_vchb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_BYTE, VOPRC_CCIGN); } // vector element type 'B', don't set CC
inline void Assembler::z_vchh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
inline void Assembler::z_vchf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
inline void Assembler::z_vchg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
inline void Assembler::z_vchbs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_BYTE, VOPRC_CCSET); } // vector element type 'B', don't set CC
inline void Assembler::z_vchhs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', don't set CC
inline void Assembler::z_vchfs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', don't set CC
inline void Assembler::z_vchgs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', don't set CC
inline void Assembler::z_vchl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VCHL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
inline void Assembler::z_vchlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_BYTE, VOPRC_CCIGN); } // vector element type 'B', don't set CC
inline void Assembler::z_vchlh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
inline void Assembler::z_vchlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
inline void Assembler::z_vchlg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
inline void Assembler::z_vchlbs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_BYTE, VOPRC_CCSET); } // vector element type 'B', don't set CC
inline void Assembler::z_vchlhs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', don't set CC
inline void Assembler::z_vchlfs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', don't set CC
inline void Assembler::z_vchlgs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', don't set CC
// Max/Min (element-wise)
inline void Assembler::z_vmx( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMX_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vmxb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmx(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vmxh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmx(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vmxf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmx(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vmxg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmx(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vmxl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMXL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vmxlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmxl(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vmxlh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmxl(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vmxlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmxl(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vmxlg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmxl(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vmn( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMN_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vmnb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmn(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vmnh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmn(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vmnf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmn(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vmng( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmn(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vmnl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMNL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vmnlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmnl(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vmnlh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmnl(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vmnlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmnl(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vmnlg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmnl(v1, v2, v3, VRET_DW); } // vector element type 'G'
// Leading/Trailing Zeros, population count
inline void Assembler::z_vclz( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VCLZ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vclzb( VectorRegister v1, VectorRegister v2) {z_vclz(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vclzh( VectorRegister v1, VectorRegister v2) {z_vclz(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vclzf( VectorRegister v1, VectorRegister v2) {z_vclz(v1, v2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vclzg( VectorRegister v1, VectorRegister v2) {z_vclz(v1, v2, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vctz( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VCTZ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vctzb( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vctzh( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vctzf( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vctzg( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vpopct( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VPOPCT_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
// Rotate/Shift
inline void Assembler::z_verllv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VERLLV_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_verllvb(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_verllvh(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_verllvf(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_verllvg(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_verll( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VERLL_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_verllb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
inline void Assembler::z_verllh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_HW);} // vector element type 'H'
inline void Assembler::z_verllf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_FW);} // vector element type 'F'
inline void Assembler::z_verllg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_DW);} // vector element type 'G'
inline void Assembler::z_verim( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t m5) {emit_48(VERLL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | uimm8(imm4, 24, 48) | vesc_mask(m5, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_verimb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_verimh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_HW); } // vector element type 'H'
inline void Assembler::z_verimf( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_FW); } // vector element type 'F'
inline void Assembler::z_verimg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_DW); } // vector element type 'G'
inline void Assembler::z_veslv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VESLV_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_veslvb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_veslv(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_veslvh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_veslv(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_veslvf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_veslv(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_veslvg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_veslv(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vesl( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VESL_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_veslb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesl(v1, v3, d2, b2, VRET_BYTE);} // vector element type 'B'
inline void Assembler::z_veslh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesl(v1, v3, d2, b2, VRET_HW);} // vector element type 'H'
inline void Assembler::z_veslf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesl(v1, v3, d2, b2, VRET_FW);} // vector element type 'F'
inline void Assembler::z_veslg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesl(v1, v3, d2, b2, VRET_DW);} // vector element type 'G'
inline void Assembler::z_vesrav( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VESRAV_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vesravb(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrav(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vesravh(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrav(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vesravf(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrav(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vesravg(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrav(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vesra( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VESRA_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vesrab( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesra(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
inline void Assembler::z_vesrah( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesra(v1, v3, d2, b2, VRET_HW);} // vector element type 'H'
inline void Assembler::z_vesraf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesra(v1, v3, d2, b2, VRET_FW);} // vector element type 'F'
inline void Assembler::z_vesrag( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesra(v1, v3, d2, b2, VRET_DW);} // vector element type 'G'
inline void Assembler::z_vesrlv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VESRLV_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vesrlvb(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrlv(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vesrlvh(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrlv(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vesrlvf(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrlv(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vesrlvg(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrlv(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vesrl( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VESRL_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vesrlb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesrl(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
inline void Assembler::z_vesrlh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesrl(v1, v3, d2, b2, VRET_HW);} // vector element type 'H'
inline void Assembler::z_vesrlf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesrl(v1, v3, d2, b2, VRET_FW);} // vector element type 'F'
inline void Assembler::z_vesrlg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesrl(v1, v3, d2, b2, VRET_DW);} // vector element type 'G'
inline void Assembler::z_vsl( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
inline void Assembler::z_vslb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSLB_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
inline void Assembler::z_vsldb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {emit_48(VSLDB_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | uimm8(imm4, 24, 48)); }
inline void Assembler::z_vsra( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSRA_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
inline void Assembler::z_vsrab( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSRAB_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
inline void Assembler::z_vsrl( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSRL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
inline void Assembler::z_vsrlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSRLB_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
// Test under Mask
inline void Assembler::z_vtm( VectorRegister v1, VectorRegister v2) {emit_48(VTM_ZOPC | vreg(v1, 8) | vreg(v2, 12)); }
//---< Vector String Instructions >---
inline void Assembler::z_vfae( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5) {emit_48(VFAE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(imm4, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); } // Find any element
inline void Assembler::z_vfaeb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfae(v1, v2, v3, VRET_BYTE, cc5); }
inline void Assembler::z_vfaeh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfae(v1, v2, v3, VRET_HW, cc5); }
inline void Assembler::z_vfaef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfae(v1, v2, v3, VRET_FW, cc5); }
inline void Assembler::z_vfee( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5) {emit_48(VFEE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(imm4, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); } // Find element equal
inline void Assembler::z_vfeeb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfee(v1, v2, v3, VRET_BYTE, cc5); }
inline void Assembler::z_vfeeh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfee(v1, v2, v3, VRET_HW, cc5); }
inline void Assembler::z_vfeef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfee(v1, v2, v3, VRET_FW, cc5); }
inline void Assembler::z_vfene( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5) {emit_48(VFENE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(imm4, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); } // Find element not equal
inline void Assembler::z_vfeneb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfene(v1, v2, v3, VRET_BYTE, cc5); }
inline void Assembler::z_vfeneh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfene(v1, v2, v3, VRET_HW, cc5); }
inline void Assembler::z_vfenef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfene(v1, v2, v3, VRET_FW, cc5); }
inline void Assembler::z_vstrc( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t imm5, int64_t cc6) {emit_48(VSTRC_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(imm5, VRET_BYTE, VRET_FW, 20) | voprc_any(cc6, 24) ); } // String range compare
inline void Assembler::z_vstrcb( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6) {z_vstrc(v1, v2, v3, v4, VRET_BYTE, cc6); }
inline void Assembler::z_vstrch( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6) {z_vstrc(v1, v2, v3, v4, VRET_HW, cc6); }
inline void Assembler::z_vstrcf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6) {z_vstrc(v1, v2, v3, v4, VRET_FW, cc6); }
inline void Assembler::z_vistr( VectorRegister v1, VectorRegister v2, int64_t imm3, int64_t cc5) {emit_48(VISTR_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(imm3, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); } // isolate string
inline void Assembler::z_vistrb( VectorRegister v1, VectorRegister v2, int64_t cc5) {z_vistr(v1, v2, VRET_BYTE, cc5); }
inline void Assembler::z_vistrh( VectorRegister v1, VectorRegister v2, int64_t cc5) {z_vistr(v1, v2, VRET_HW, cc5); }
inline void Assembler::z_vistrf( VectorRegister v1, VectorRegister v2, int64_t cc5) {z_vistr(v1, v2, VRET_FW, cc5); }
inline void Assembler::z_vistrbs(VectorRegister v1, VectorRegister v2) {z_vistr(v1, v2, VRET_BYTE, VOPRC_CCSET); }
inline void Assembler::z_vistrhs(VectorRegister v1, VectorRegister v2) {z_vistr(v1, v2, VRET_HW, VOPRC_CCSET); }
inline void Assembler::z_vistrfs(VectorRegister v1, VectorRegister v2) {z_vistr(v1, v2, VRET_FW, VOPRC_CCSET); }
//-------------------------------
// FLOAT INSTRUCTIONS
//-------------------------------

View File

@ -2713,13 +2713,9 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
metadata2reg(md->constant_encoding(), mdo);
Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
Bytecodes::Code bc = method->java_code_at_bci(bci);
const bool callee_is_static = callee->is_loaded() && callee->is_static();
// Perform additional virtual call profiling for invokevirtual and
// invokeinterface bytecodes.
if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
!callee_is_static && // Required for optimized MH invokes.
C1ProfileVirtualCalls) {
// invokeinterface bytecodes
if (op->should_profile_receiver_type()) {
assert(op->recv()->is_single_cpu(), "recv must be allocated");
Register recv = op->recv()->as_register();
assert_different_registers(mdo, tmp1, recv);

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* Copyright (c) 2016, 2017 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,7 +34,7 @@
// Sorted according to sparc.
// z/Architecture remembers branch targets, so don't share vtables.
define_pd_global(bool, ShareVtableStubs, false);
define_pd_global(bool, ShareVtableStubs, true);
define_pd_global(bool, NeedsDeoptSuspend, false); // Only register window machines need this.
define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks.

View File

@ -914,7 +914,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
//
// markOop displaced_header = obj->mark().set_unlocked();
// monitor->lock()->set_displaced_header(displaced_header);
// if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
// if (Atomic::cmpxchg(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
// // We stored the monitor address into the object's mark word.
// } else if (THREAD->is_lock_owned((address)displaced_header))
// // Simple recursive case.
@ -949,7 +949,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
z_stg(displaced_header, BasicObjectLock::lock_offset_in_bytes() +
BasicLock::displaced_header_offset_in_bytes(), monitor);
// if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
// if (Atomic::cmpxchg(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
// Store stack address of the BasicObjectLock (this is monitor) into object.
add2reg(object_mark_addr, oopDesc::mark_offset_in_bytes(), object);
@ -1021,7 +1021,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor, Register object)
// if ((displaced_header = monitor->displaced_header()) == NULL) {
// // Recursive unlock. Mark the monitor unlocked by setting the object field to NULL.
// monitor->set_obj(NULL);
// } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) {
// } else if (Atomic::cmpxchg(displaced_header, obj->mark_addr(), monitor) == monitor) {
// // We swapped the unlocked mark in displaced_header into the object's mark word.
// monitor->set_obj(NULL);
// } else {
@ -1062,7 +1062,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor, Register object)
BasicLock::displaced_header_offset_in_bytes()));
z_bre(done); // displaced_header == 0 -> goto done
// } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) {
// } else if (Atomic::cmpxchg(displaced_header, obj->mark_addr(), monitor) == monitor) {
// // We swapped the unlocked mark in displaced_header into the object's mark word.
// monitor->set_obj(NULL);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -29,9 +29,9 @@
// This file holds platform-dependent routines used to write primitive
// jni types to the array of arguments passed into JavaCalls::call.
#include "jni.h"
#include "memory/allocation.hpp"
#include "oops/oop.hpp"
#include "prims/jni.h"
class JNITypes : AllStatic {
// These functions write a java primitive type (in native format) to

View File

@ -4671,6 +4671,7 @@ void MacroAssembler::load_mirror(Register mirror, Register method) {
mem2reg_opt(mirror, Address(mirror, ConstMethod::constants_offset()));
mem2reg_opt(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes()));
mem2reg_opt(mirror, Address(mirror, Klass::java_mirror_offset()));
resolve_oop_handle(mirror);
}
//---------------------------------------------------------------

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -35,3 +35,5 @@
REGISTER_DEFINITION(Register, noreg);
REGISTER_DEFINITION(FloatRegister, fnoreg);
REGISTER_DEFINITION(VectorRegister, vnoreg);

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -46,3 +46,13 @@ const char* FloatRegisterImpl::name() const {
};
return is_valid() ? names[encoding()] : "fnoreg";
}
const char* VectorRegisterImpl::name() const {
const char* names[number_of_registers] = {
"Z_V0", "Z_V1", "Z_V2", "Z_V3", "Z_V4", "Z_V5", "Z_V6", "Z_V7",
"Z_V8", "Z_V9", "Z_V10", "Z_V11", "Z_V12", "Z_V13", "Z_V14", "Z_V15",
"Z_V16", "Z_V17", "Z_V18", "Z_V19", "Z_V20", "Z_V21", "Z_V22", "Z_V23",
"Z_V24", "Z_V25", "Z_V26", "Z_V27", "Z_V28", "Z_V29", "Z_V30", "Z_V31"
};
return is_valid() ? names[encoding()] : "fnoreg";
}

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,11 +34,6 @@ class VMRegImpl;
typedef VMRegImpl* VMReg;
// Use Register as shortcut.
class RegisterImpl;
typedef RegisterImpl* Register;
// The implementation of integer registers for z/Architecture.
// z/Architecture registers, see "LINUX for zSeries ELF ABI Supplement", IBM March 2001
//
@ -57,6 +52,17 @@ typedef RegisterImpl* Register;
// f1,f3,f5,f7 General purpose (volatile)
// f8-f15 General purpose (nonvolatile)
//===========================
//=== Integer Registers ===
//===========================
// Use Register as shortcut.
class RegisterImpl;
typedef RegisterImpl* Register;
// The implementation of integer registers for z/Architecture.
inline Register as_Register(int encoding) {
return (Register)(long)encoding;
}
@ -110,6 +116,11 @@ CONSTANT_REGISTER_DECLARATION(Register, Z_R13, (13));
CONSTANT_REGISTER_DECLARATION(Register, Z_R14, (14));
CONSTANT_REGISTER_DECLARATION(Register, Z_R15, (15));
//=============================
//=== Condition Registers ===
//=============================
// Use ConditionRegister as shortcut
class ConditionRegisterImpl;
typedef ConditionRegisterImpl* ConditionRegister;
@ -159,7 +170,7 @@ CONSTANT_REGISTER_DECLARATION(ConditionRegister, Z_CR, (0));
// dangers of defines.
// If a particular file has a problem with these defines then it's possible
// to turn them off in that file by defining
// DONT_USE_REGISTER_DEFINES. Register_definition_s390.cpp does that
// DONT_USE_REGISTER_DEFINES. Register_definitions_s390.cpp does that
// so that it's able to provide real definitions of these registers
// for use in debuggers and such.
@ -186,6 +197,11 @@ CONSTANT_REGISTER_DECLARATION(ConditionRegister, Z_CR, (0));
#define Z_CR ((ConditionRegister)(Z_CR_ConditionRegisterEnumValue))
#endif // DONT_USE_REGISTER_DEFINES
//=========================
//=== Float Registers ===
//=========================
// Use FloatRegister as shortcut
class FloatRegisterImpl;
typedef FloatRegisterImpl* FloatRegister;
@ -263,22 +279,6 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F15, (15));
#define Z_F15 ((FloatRegister)( Z_F15_FloatRegisterEnumValue))
#endif // DONT_USE_REGISTER_DEFINES
// Need to know the total number of registers of all sorts for SharedInfo.
// Define a class that exports it.
class ConcreteRegisterImpl : public AbstractRegisterImpl {
public:
enum {
number_of_registers =
(RegisterImpl::number_of_registers +
FloatRegisterImpl::number_of_registers)
* 2 // register halves
+ 1 // condition code register
};
static const int max_gpr;
static const int max_fpr;
};
// Single, Double and Quad fp reg classes. These exist to map the ADLC
// encoding for a floating point register, to the FloatRegister number
// desired by the macroassembler. A FloatRegister is a number between
@ -329,6 +329,161 @@ class QuadFloatRegisterImpl {
};
//==========================
//=== Vector Registers ===
//==========================
// Use VectorRegister as shortcut
class VectorRegisterImpl;
typedef VectorRegisterImpl* VectorRegister;
// The implementation of vector registers for z/Architecture.
inline VectorRegister as_VectorRegister(int encoding) {
return (VectorRegister)(long)encoding;
}
class VectorRegisterImpl: public AbstractRegisterImpl {
public:
enum {
number_of_registers = 32,
number_of_arg_registers = 0
};
// construction
inline friend VectorRegister as_VectorRegister(int encoding);
inline VMReg as_VMReg();
// accessors
int encoding() const {
assert(is_valid(), "invalid register"); return value();
}
bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
bool is_volatile() const { return true; }
bool is_nonvolatile() const { return false; }
// Register fields in z/Architecture instructions are 4 bits wide, restricting the
// addressable register set size to 16.
// The vector register set size is 32, requiring an extension, by one bit, of the
// register encoding. This is accomplished by the introduction of a RXB field in the
// instruction. RXB = Register eXtension Bits.
// The RXB field contains the MSBs (most significant bit) of the vector register numbers
// used for this instruction. Assignment of MSB in RBX is by bit position of the
// register field in the instruction.
// Example:
// The register field starting at bit position 12 in the instruction is assigned RXB bit 0b0100.
int64_t RXB_mask(int pos) {
if (encoding() >= number_of_registers/2) {
switch (pos) {
case 8: return ((int64_t)0b1000) << 8; // actual bit pos: 36
case 12: return ((int64_t)0b0100) << 8; // actual bit pos: 37
case 16: return ((int64_t)0b0010) << 8; // actual bit pos: 38
case 32: return ((int64_t)0b0001) << 8; // actual bit pos: 39
default:
ShouldNotReachHere();
}
}
return 0;
}
const char* name() const;
VectorRegister successor() const { return as_VectorRegister(encoding() + 1); }
};
// The Vector registers of z/Architecture.
CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg, (-1));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V0, (0));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V1, (1));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V2, (2));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V3, (3));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V4, (4));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V5, (5));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V6, (6));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V7, (7));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V8, (8));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V9, (9));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V10, (10));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V11, (11));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V12, (12));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V13, (13));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V14, (14));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V15, (15));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V16, (16));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V17, (17));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V18, (18));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V19, (19));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V20, (20));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V21, (21));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V22, (22));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V23, (23));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V24, (24));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V25, (25));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V26, (26));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V27, (27));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V28, (28));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V29, (29));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V30, (30));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V31, (31));
#ifndef DONT_USE_REGISTER_DEFINES
#define vnoreg ((VectorRegister)(vnoreg_VectorRegisterEnumValue))
#define Z_V0 ((VectorRegister)( Z_V0_VectorRegisterEnumValue))
#define Z_V1 ((VectorRegister)( Z_V1_VectorRegisterEnumValue))
#define Z_V2 ((VectorRegister)( Z_V2_VectorRegisterEnumValue))
#define Z_V3 ((VectorRegister)( Z_V3_VectorRegisterEnumValue))
#define Z_V4 ((VectorRegister)( Z_V4_VectorRegisterEnumValue))
#define Z_V5 ((VectorRegister)( Z_V5_VectorRegisterEnumValue))
#define Z_V6 ((VectorRegister)( Z_V6_VectorRegisterEnumValue))
#define Z_V7 ((VectorRegister)( Z_V7_VectorRegisterEnumValue))
#define Z_V8 ((VectorRegister)( Z_V8_VectorRegisterEnumValue))
#define Z_V9 ((VectorRegister)( Z_V9_VectorRegisterEnumValue))
#define Z_V10 ((VectorRegister)( Z_V10_VectorRegisterEnumValue))
#define Z_V11 ((VectorRegister)( Z_V11_VectorRegisterEnumValue))
#define Z_V12 ((VectorRegister)( Z_V12_VectorRegisterEnumValue))
#define Z_V13 ((VectorRegister)( Z_V13_VectorRegisterEnumValue))
#define Z_V14 ((VectorRegister)( Z_V14_VectorRegisterEnumValue))
#define Z_V15 ((VectorRegister)( Z_V15_VectorRegisterEnumValue))
#define Z_V16 ((VectorRegister)( Z_V16_VectorRegisterEnumValue))
#define Z_V17 ((VectorRegister)( Z_V17_VectorRegisterEnumValue))
#define Z_V18 ((VectorRegister)( Z_V18_VectorRegisterEnumValue))
#define Z_V19 ((VectorRegister)( Z_V19_VectorRegisterEnumValue))
#define Z_V20 ((VectorRegister)( Z_V20_VectorRegisterEnumValue))
#define Z_V21 ((VectorRegister)( Z_V21_VectorRegisterEnumValue))
#define Z_V22 ((VectorRegister)( Z_V22_VectorRegisterEnumValue))
#define Z_V23 ((VectorRegister)( Z_V23_VectorRegisterEnumValue))
#define Z_V24 ((VectorRegister)( Z_V24_VectorRegisterEnumValue))
#define Z_V25 ((VectorRegister)( Z_V25_VectorRegisterEnumValue))
#define Z_V26 ((VectorRegister)( Z_V26_VectorRegisterEnumValue))
#define Z_V27 ((VectorRegister)( Z_V27_VectorRegisterEnumValue))
#define Z_V28 ((VectorRegister)( Z_V28_VectorRegisterEnumValue))
#define Z_V29 ((VectorRegister)( Z_V29_VectorRegisterEnumValue))
#define Z_V30 ((VectorRegister)( Z_V30_VectorRegisterEnumValue))
#define Z_V31 ((VectorRegister)( Z_V31_VectorRegisterEnumValue))
#endif // DONT_USE_REGISTER_DEFINES
// Need to know the total number of registers of all sorts for SharedInfo.
// Define a class that exports it.
class ConcreteRegisterImpl : public AbstractRegisterImpl {
public:
enum {
number_of_registers =
(RegisterImpl::number_of_registers +
FloatRegisterImpl::number_of_registers)
* 2 // register halves
+ 1 // condition code register
};
static const int max_gpr;
static const int max_fpr;
};
// Common register declarations used in assembler code.
REGISTER_DECLARATION(Register, Z_EXC_OOP, Z_R2);
REGISTER_DECLARATION(Register, Z_EXC_PC, Z_R3);

View File

@ -3149,7 +3149,7 @@ operand noArg_iRegI() %{
interface(REG_INTER);
%}
// Revenregi and roddRegI constitute and even-odd-pair.
// revenRegI and roddRegI constitute and even-odd-pair.
operand revenRegI() %{
constraint(ALLOC_IN_RC(z_rarg3_int_reg));
match(iRegI);
@ -3157,7 +3157,7 @@ operand revenRegI() %{
interface(REG_INTER);
%}
// Revenregi and roddRegI constitute and even-odd-pair.
// revenRegI and roddRegI constitute and even-odd-pair.
operand roddRegI() %{
constraint(ALLOC_IN_RC(z_rarg4_int_reg));
match(iRegI);
@ -3283,7 +3283,7 @@ operand memoryRegP() %{
interface(REG_INTER);
%}
// Revenregp and roddRegP constitute and even-odd-pair.
// revenRegP and roddRegP constitute and even-odd-pair.
operand revenRegP() %{
constraint(ALLOC_IN_RC(z_rarg3_ptr_reg));
match(iRegP);
@ -3291,7 +3291,7 @@ operand revenRegP() %{
interface(REG_INTER);
%}
// Revenregl and roddRegL constitute and even-odd-pair.
// revenRegP and roddRegP constitute and even-odd-pair.
operand roddRegP() %{
constraint(ALLOC_IN_RC(z_rarg4_ptr_reg));
match(iRegP);
@ -3380,7 +3380,7 @@ operand iRegL() %{
interface(REG_INTER);
%}
// Revenregl and roddRegL constitute and even-odd-pair.
// revenRegL and roddRegL constitute and even-odd-pair.
operand revenRegL() %{
constraint(ALLOC_IN_RC(z_rarg3_long_reg));
match(iRegL);
@ -3388,7 +3388,7 @@ operand revenRegL() %{
interface(REG_INTER);
%}
// Revenregl and roddRegL constitute and even-odd-pair.
// revenRegL and roddRegL constitute and even-odd-pair.
operand roddRegL() %{
constraint(ALLOC_IN_RC(z_rarg4_long_reg));
match(iRegL);
@ -6443,6 +6443,32 @@ instruct mulL_Reg_mem(iRegL dst, memory src)%{
ins_pipe(pipe_class_dummy);
%}
instruct mulHiL_reg_reg(revenRegL Rdst, roddRegL Rsrc1, iRegL Rsrc2, iRegL Rtmp1, flagsReg cr)%{
match(Set Rdst (MulHiL Rsrc1 Rsrc2));
effect(TEMP_DEF Rdst, USE_KILL Rsrc1, TEMP Rtmp1, KILL cr);
ins_cost(7*DEFAULT_COST);
// TODO: s390 port size(VARIABLE_SIZE);
format %{ "MulHiL $Rdst, $Rsrc1, $Rsrc2\t # Multiply High Long" %}
ins_encode%{
Register dst = $Rdst$$Register;
Register src1 = $Rsrc1$$Register;
Register src2 = $Rsrc2$$Register;
Register tmp1 = $Rtmp1$$Register;
Register tmp2 = $Rdst$$Register;
// z/Architecture has only unsigned multiply (64 * 64 -> 128).
// implementing mulhs(a,b) = mulhu(a,b) (a & (b>>63)) (b & (a>>63))
__ z_srag(tmp2, src1, 63); // a>>63
__ z_srag(tmp1, src2, 63); // b>>63
__ z_ngr(tmp2, src2); // b & (a>>63)
__ z_ngr(tmp1, src1); // a & (b>>63)
__ z_agr(tmp1, tmp2); // ((a & (b>>63)) + (b & (a>>63)))
__ z_mlgr(dst, src2); // tricky: 128-bit product is written to even/odd pair (dst,src1),
// multiplicand is taken from oddReg (src1), multiplier in src2.
__ z_sgr(dst, tmp1);
%}
ins_pipe(pipe_class_dummy);
%}
// DIV
// Integer DIVMOD with Register, both quotient and mod results

View File

@ -2382,6 +2382,7 @@ void TemplateTable::load_field_cp_cache_entry(Register obj,
if (is_static) {
__ mem2reg_opt(obj, Address(cache, index, cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
__ mem2reg_opt(obj, Address(obj, Klass::java_mirror_offset()));
__ resolve_oop_handle(obj);
}
}

View File

@ -224,7 +224,7 @@ void VM_Version::initialize() {
}
// z/Architecture supports 8-byte compare-exchange operations
// (see Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr)
// (see Atomic::cmpxchg)
// and 'atomic long memory ops' (see Unsafe_GetLongVolatile).
_supports_cx8 = true;
@ -706,13 +706,13 @@ void VM_Version::determine_features() {
Label getCPUFEATURES; // fcode = -1 (cache)
Label getCIPHERFEATURES; // fcode = -2 (cipher)
Label getMSGDIGESTFEATURES; // fcode = -3 (SHA)
Label checkLongDispFast;
Label noLongDisp;
Label posDisp, negDisp;
Label getVECTORFEATURES; // fcode = -4 (OS support for vector instructions)
Label errRTN;
a->z_ltgfr(Z_R0, Z_ARG2); // Buf len to r0 and test.
a->z_brl(getFEATURES); // negative -> Get machine features.
a->z_brz(checkLongDispFast); // zero -> Check for high-speed Long Displacement Facility.
a->z_brl(getFEATURES); // negative -> Get machine features not covered by facility list.
a->z_lghi(Z_R1,0);
a->z_brz(errRTN); // zero -> Function code currently not used, indicate "aborted".
a->z_aghi(Z_R0, -1);
a->z_stfle(0, Z_ARG1);
a->z_lg(Z_R1, 0, Z_ARG1); // Get first DW of facility list.
@ -736,6 +736,8 @@ void VM_Version::determine_features() {
a->z_bre(getCIPHERFEATURES);
a->z_cghi(Z_R0, -3); // -3: Extract detailed crypto capabilities (msg digest instructions).
a->z_bre(getMSGDIGESTFEATURES);
a->z_cghi(Z_R0, -4); // -4: Verify vector instruction availability (OS support).
a->z_bre(getVECTORFEATURES);
a->z_xgr(Z_RET, Z_RET); // Not a valid function code.
a->z_br(Z_R14); // Return "operation aborted".
@ -766,46 +768,9 @@ void VM_Version::determine_features() {
a->z_ecag(Z_RET,Z_R0,0,Z_ARG3); // Extract information as requested by Z_ARG1 contents.
a->z_br(Z_R14);
// Check the performance of the Long Displacement Facility, i.e. find out if we are running on z900 or newer.
a->bind(checkLongDispFast);
a->z_llill(Z_R0, 0xffff); // preset #iterations
a->z_larl(Z_R1, posDisp);
a->z_stck(0, Z_ARG1); // Get begin timestamp.
a->bind(posDisp); // Positive disp loop.
a->z_lg(Z_ARG2, 0, Z_ARG1);
a->z_bctgr(Z_R0, Z_R1);
a->z_stck(0, Z_ARG1); // Get end timestamp.
a->z_sg(Z_ARG2, 0, Z_R0, Z_ARG1); // Calculate elapsed time.
a->z_lcgr(Z_ARG2, Z_ARG2);
a->z_srlg(Z_ARG2, Z_ARG2, 12); // LSB: now microseconds
a->z_stg(Z_ARG2, 8, Z_ARG1); // Store difference in buffer[1].
a->z_llill(Z_R0, 0xffff); // preset #iterations
a->z_larl(Z_R1, negDisp);
a->z_xgr(Z_ARG2, Z_ARG2); // Clear to detect absence of LongDisp facility.
a->z_stck(0, Z_ARG1); // Get begin timestamp.
a->z_la(Z_ARG1, 8, Z_ARG1);
a->bind(negDisp); // Negative disp loop.
a->z_lg(Z_ARG2, -8, Z_ARG1);
a->z_bctgr(Z_R0, Z_R1);
a->z_aghi(Z_ARG1, -8);
a->z_stck(0, Z_ARG1); // Get end timestamp.
a->z_ltgr(Z_ARG2, Z_ARG2); // Check for absence of LongDisp facility.
a->z_brz(noLongDisp);
a->z_sg(Z_ARG2, 0, Z_R0, Z_ARG1); // Calc elapsed time.
a->z_lcgr(Z_ARG2, Z_ARG2);
a->z_srlg(Z_ARG2, Z_ARG2, 12); // LSB: now microseconds
a->z_stg(Z_ARG2, 0, Z_ARG1); // store difference in buffer[0]
a->z_llill(Z_RET,0xffff);
a->z_br(Z_R14);
a->bind(noLongDisp);
a->z_lghi(Z_RET,-1);
// Use a vector instruction to verify OS support. Will fail with SIGFPE if OS support is missing.
a->bind(getVECTORFEATURES);
a->z_vtm(Z_V0,Z_V0); // non-destructive vector instruction. Will cause SIGFPE if not supported.
a->z_br(Z_R14);
address code_end = a->pc();
@ -962,6 +927,19 @@ void VM_Version::determine_features() {
_nfeatures = 0;
}
if (has_VectorFacility()) {
// Verify that feature can actually be used. OS support required.
call_getFeatures(buffer, -4, 0);
if (printVerbose) {
ttyLocker ttyl;
if (has_VectorFacility()) {
tty->print_cr(" Vector Facility has been verified to be supported by OS");
} else {
tty->print_cr(" Vector Facility has been disabled - not supported by OS");
}
}
}
// Extract Crypto Facility details.
if (has_Crypto()) {
// Get cipher features.

View File

@ -473,6 +473,8 @@ class VM_Version: public Abstract_VM_Version {
static void set_has_CryptoExt5() { _features[0] |= CryptoExtension5Mask; }
static void set_has_VectorFacility() { _features[2] |= VectorFacilityMask; }
static void reset_has_VectorFacility() { _features[2] &= ~VectorFacilityMask; }
// Assembler testing.
static void allow_all();
static void revert();

View File

@ -122,6 +122,7 @@ class Assembler : public AbstractAssembler {
fpop1_op3 = 0x34,
fpop2_op3 = 0x35,
impdep1_op3 = 0x36,
addx_op3 = 0x36,
aes3_op3 = 0x36,
sha_op3 = 0x36,
bmask_op3 = 0x36,
@ -133,6 +134,8 @@ class Assembler : public AbstractAssembler {
fzero_op3 = 0x36,
fsrc_op3 = 0x36,
fnot_op3 = 0x36,
mpmul_op3 = 0x36,
umulx_op3 = 0x36,
xmulx_op3 = 0x36,
crc32c_op3 = 0x36,
impdep2_op3 = 0x37,
@ -195,6 +198,9 @@ class Assembler : public AbstractAssembler {
fnegs_opf = 0x05,
fnegd_opf = 0x06,
addxc_opf = 0x11,
addxccc_opf = 0x13,
umulxhi_opf = 0x16,
alignaddr_opf = 0x18,
bmask_opf = 0x19,
@ -240,7 +246,8 @@ class Assembler : public AbstractAssembler {
sha256_opf = 0x142,
sha512_opf = 0x143,
crc32c_opf = 0x147
crc32c_opf = 0x147,
mpmul_opf = 0x148
};
enum op5s {
@ -380,7 +387,7 @@ class Assembler : public AbstractAssembler {
assert_signed_range(x, nbits + 2);
}
static void assert_unsigned_const(int x, int nbits) {
static void assert_unsigned_range(int x, int nbits) {
assert(juint(x) < juint(1 << nbits), "unsigned constant out of range");
}
@ -534,6 +541,12 @@ class Assembler : public AbstractAssembler {
return x & ((1 << nbits) - 1);
}
// unsigned immediate, in low bits, at most nbits long.
static int uimm(int x, int nbits) {
assert_unsigned_range(x, nbits);
return x & ((1 << nbits) - 1);
}
// compute inverse of wdisp16
static intptr_t inv_wdisp16(int x, intptr_t pos) {
int lo = x & ((1 << 14) - 1);
@ -631,6 +644,9 @@ class Assembler : public AbstractAssembler {
// FMAf instructions supported only on certain processors
static void fmaf_only() { assert(VM_Version::has_fmaf(), "This instruction only works on SPARC with FMAf"); }
// MPMUL instruction supported only on certain processors
static void mpmul_only() { assert(VM_Version::has_mpmul(), "This instruction only works on SPARC with MPMUL"); }
// instruction only in VIS1
static void vis1_only() { assert(VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }
@ -772,11 +788,12 @@ class Assembler : public AbstractAssembler {
AbstractAssembler::flush();
}
inline void emit_int32(int); // shadows AbstractAssembler::emit_int32
inline void emit_data(int);
inline void emit_data(int, RelocationHolder const &rspec);
inline void emit_data(int, relocInfo::relocType rtype);
// helper for above functions
inline void emit_int32(int32_t); // shadows AbstractAssembler::emit_int32
inline void emit_data(int32_t);
inline void emit_data(int32_t, RelocationHolder const&);
inline void emit_data(int32_t, relocInfo::relocType rtype);
// Helper for the above functions.
inline void check_delay();
@ -929,6 +946,10 @@ class Assembler : public AbstractAssembler {
// fmaf instructions.
inline void fmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
inline void fmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
inline void fnmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
inline void fnmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
// pp 165
@ -960,6 +981,8 @@ class Assembler : public AbstractAssembler {
inline void ldf(FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d,
RelocationHolder const &rspec = RelocationHolder());
inline void ldd(Register s1, Register s2, FloatRegister d);
inline void ldd(Register s1, int simm13a, FloatRegister d);
inline void ldfsr(Register s1, Register s2);
inline void ldfsr(Register s1, int simm13a);
@ -987,8 +1010,6 @@ class Assembler : public AbstractAssembler {
inline void lduw(Register s1, int simm13a, Register d);
inline void ldx(Register s1, Register s2, Register d);
inline void ldx(Register s1, int simm13a, Register d);
inline void ldd(Register s1, Register s2, Register d);
inline void ldd(Register s1, int simm13a, Register d);
// pp 177
@ -1157,6 +1178,9 @@ class Assembler : public AbstractAssembler {
inline void stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2);
inline void stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, int simm13a);
inline void std(FloatRegister d, Register s1, Register s2);
inline void std(FloatRegister d, Register s1, int simm13a);
inline void stfsr(Register s1, Register s2);
inline void stfsr(Register s1, int simm13a);
inline void stxfsr(Register s1, Register s2);
@ -1177,8 +1201,6 @@ class Assembler : public AbstractAssembler {
inline void stw(Register d, Register s1, int simm13a);
inline void stx(Register d, Register s1, Register s2);
inline void stx(Register d, Register s1, int simm13a);
inline void std(Register d, Register s1, Register s2);
inline void std(Register d, Register s1, int simm13a);
// pp 177
@ -1267,6 +1289,9 @@ class Assembler : public AbstractAssembler {
// VIS3 instructions
inline void addxc(Register s1, Register s2, Register d);
inline void addxccc(Register s1, Register s2, Register d);
inline void movstosw(FloatRegister s, Register d);
inline void movstouw(FloatRegister s, Register d);
inline void movdtox(FloatRegister s, Register d);
@ -1276,6 +1301,7 @@ class Assembler : public AbstractAssembler {
inline void xmulx(Register s1, Register s2, Register d);
inline void xmulxhi(Register s1, Register s2, Register d);
inline void umulxhi(Register s1, Register s2, Register d);
// Crypto SHA instructions
@ -1287,6 +1313,10 @@ class Assembler : public AbstractAssembler {
inline void crc32c(FloatRegister s1, FloatRegister s2, FloatRegister d);
// MPMUL instruction
inline void mpmul(int uimm5);
// Creation
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
#ifdef VALIDATE_PIPELINE

View File

@ -59,7 +59,7 @@ inline void Assembler::check_delay() {
#endif
}
inline void Assembler::emit_int32(int x) {
inline void Assembler::emit_int32(int32_t x) {
check_delay();
#ifdef VALIDATE_PIPELINE
_hazard_state = NoHazard;
@ -67,16 +67,16 @@ inline void Assembler::emit_int32(int x) {
AbstractAssembler::emit_int32(x);
}
inline void Assembler::emit_data(int x) {
inline void Assembler::emit_data(int32_t x) {
emit_int32(x);
}
inline void Assembler::emit_data(int x, relocInfo::relocType rtype) {
inline void Assembler::emit_data(int32_t x, relocInfo::relocType rtype) {
relocate(rtype);
emit_int32(x);
}
inline void Assembler::emit_data(int x, RelocationHolder const &rspec) {
inline void Assembler::emit_data(int32_t x, RelocationHolder const &rspec) {
relocate(rspec);
emit_int32(x);
}
@ -359,6 +359,19 @@ inline void Assembler::fmadd(FloatRegisterImpl::Width w, FloatRegister s1, Float
fmaf_only();
emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(w) | fs2(s2, w));
}
inline void Assembler::fmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
fmaf_only();
emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0x4 + w) | fs2(s2, w));
}
inline void Assembler::fnmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
fmaf_only();
emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0xc + w) | fs2(s2, w));
}
inline void Assembler::fnmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
fmaf_only();
emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0x8 + w) | fs2(s2, w));
}
inline void Assembler::flush(Register s1, Register s2) {
emit_int32(op(arith_op) | op3(flush_op3) | rs1(s1) | rs2(s2));
@ -402,6 +415,15 @@ inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, int simm13a,
emit_data(op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec);
}
inline void Assembler::ldd(Register s1, Register s2, FloatRegister d) {
assert(d->is_even(), "not even");
ldf(FloatRegisterImpl::D, s1, s2, d);
}
inline void Assembler::ldd(Register s1, int simm13a, FloatRegister d) {
assert(d->is_even(), "not even");
ldf(FloatRegisterImpl::D, s1, simm13a, d);
}
inline void Assembler::ldxfsr(Register s1, Register s2) {
emit_int32(op(ldst_op) | rd(G1) | op3(ldfsr_op3) | rs1(s1) | rs2(s2));
}
@ -460,16 +482,6 @@ inline void Assembler::ldx(Register s1, Register s2, Register d) {
inline void Assembler::ldx(Register s1, int simm13a, Register d) {
emit_data(op(ldst_op) | rd(d) | op3(ldx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
}
inline void Assembler::ldd(Register s1, Register s2, Register d) {
v9_dep();
assert(d->is_even(), "not even");
emit_int32(op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | rs2(s2));
}
inline void Assembler::ldd(Register s1, int simm13a, Register d) {
v9_dep();
assert(d->is_even(), "not even");
emit_data(op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
}
inline void Assembler::ldsba(Register s1, Register s2, int ia, Register d) {
emit_int32(op(ldst_op) | rd(d) | op3(ldsb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2));
@ -806,6 +818,15 @@ inline void Assembler::stf(FloatRegisterImpl::Width w, FloatRegister d, Register
emit_data(op(ldst_op) | fd(d, w) | alt_op3(stf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13));
}
inline void Assembler::std(FloatRegister d, Register s1, Register s2) {
assert(d->is_even(), "not even");
stf(FloatRegisterImpl::D, d, s1, s2);
}
inline void Assembler::std(FloatRegister d, Register s1, int simm13a) {
assert(d->is_even(), "not even");
stf(FloatRegisterImpl::D, d, s1, simm13a);
}
inline void Assembler::stxfsr(Register s1, Register s2) {
emit_int32(op(ldst_op) | rd(G1) | op3(stfsr_op3) | rs1(s1) | rs2(s2));
}
@ -848,16 +869,6 @@ inline void Assembler::stx(Register d, Register s1, Register s2) {
inline void Assembler::stx(Register d, Register s1, int simm13a) {
emit_data(op(ldst_op) | rd(d) | op3(stx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
}
inline void Assembler::std(Register d, Register s1, Register s2) {
v9_dep();
assert(d->is_even(), "not even");
emit_int32(op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | rs2(s2));
}
inline void Assembler::std(Register d, Register s1, int simm13a) {
v9_dep();
assert(d->is_even(), "not even");
emit_data(op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
}
inline void Assembler::stba(Register d, Register s1, Register s2, int ia) {
emit_int32(op(ldst_op) | rd(d) | op3(stb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2));
@ -1043,6 +1054,15 @@ inline void Assembler::bshuffle(FloatRegister s1, FloatRegister s2, FloatRegiste
// VIS3 instructions
inline void Assembler::addxc(Register s1, Register s2, Register d) {
vis3_only();
emit_int32(op(arith_op) | rd(d) | op3(addx_op3) | rs1(s1) | opf(addxc_opf) | rs2(s2));
}
inline void Assembler::addxccc(Register s1, Register s2, Register d) {
vis3_only();
emit_int32(op(arith_op) | rd(d) | op3(addx_op3) | rs1(s1) | opf(addxccc_opf) | rs2(s2));
}
inline void Assembler::movstosw(FloatRegister s, Register d) {
vis3_only();
emit_int32(op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mstosw_opf) | fs2(s, FloatRegisterImpl::S));
@ -1073,6 +1093,10 @@ inline void Assembler::xmulxhi(Register s1, Register s2, Register d) {
vis3_only();
emit_int32(op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulxhi_opf) | rs2(s2));
}
inline void Assembler::umulxhi(Register s1, Register s2, Register d) {
vis3_only();
emit_int32(op(arith_op) | rd(d) | op3(umulx_op3) | rs1(s1) | opf(umulxhi_opf) | rs2(s2));
}
// Crypto SHA instructions
@ -1096,4 +1120,11 @@ inline void Assembler::crc32c(FloatRegister s1, FloatRegister s2, FloatRegister
emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(crc32c_op3) | fs1(s1, FloatRegisterImpl::D) | opf(crc32c_opf) | fs2(s2, FloatRegisterImpl::D));
}
// MPMUL instruction
inline void Assembler::mpmul(int uimm5) {
mpmul_only();
emit_int32(op(arith_op) | rd(0) | op3(mpmul_op3) | rs1(0) | opf(mpmul_opf) | uimm(uimm5, 5));
}
#endif // CPU_SPARC_VM_ASSEMBLER_SPARC_INLINE_HPP

View File

@ -2763,13 +2763,9 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
}
Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias);
Bytecodes::Code bc = method->java_code_at_bci(bci);
const bool callee_is_static = callee->is_loaded() && callee->is_static();
// Perform additional virtual call profiling for invokevirtual and
// invokeinterface bytecodes
if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
!callee_is_static && // required for optimized MH invokes
C1ProfileVirtualCalls) {
if (op->should_profile_receiver_type()) {
assert(op->recv()->is_single_cpu(), "recv must be allocated");
Register recv = op->recv()->as_register();
assert_different_registers(mdo, tmp1, recv);

View File

@ -119,8 +119,8 @@ address RegisterMap::pd_location(VMReg regname) const {
reg = regname->as_Register();
}
if (reg->is_out()) {
assert(_younger_window != NULL, "Younger window should be available");
return second_word + (address)&_younger_window[reg->after_save()->sp_offset_in_saved_window()];
return _younger_window == NULL ? NULL :
second_word + (address)&_younger_window[reg->after_save()->sp_offset_in_saved_window()];
}
if (reg->is_local() || reg->is_in()) {
assert(_window != NULL, "Window should be available");

View File

@ -43,7 +43,7 @@ const bool CCallingConventionRequiresIntsAsLongs = true;
#elif defined(COMPILER1)
// pure C1, 32-bit, small machine
#define DEFAULT_CACHE_LINE_SIZE 16
#elif defined(COMPILER2) || defined(SHARK)
#elif defined(COMPILER2)
// pure C2, 64-bit, large machine
#define DEFAULT_CACHE_LINE_SIZE 128
#endif

View File

@ -97,12 +97,15 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
writeable) \
\
product(intx, UseVIS, 99, \
"Highest supported VIS instructions set on Sparc") \
"Highest supported VIS instructions set on SPARC") \
range(0, 99) \
\
product(bool, UseCBCond, false, \
"Use compare and branch instruction on SPARC") \
\
product(bool, UseMPMUL, false, \
"Use multi-precision multiply instruction (mpmul) on SPARC") \
\
product(bool, UseBlockZeroing, false, \
"Use special cpu instructions for block zeroing") \
\

View File

@ -25,9 +25,9 @@
#ifndef CPU_SPARC_VM_JNITYPES_SPARC_HPP
#define CPU_SPARC_VM_JNITYPES_SPARC_HPP
#include "jni.h"
#include "memory/allocation.hpp"
#include "oops/oop.hpp"
#include "prims/jni.h"
// This file holds platform-dependent routines used to write primitive jni
// types to the array of arguments passed into JavaCalls::call

View File

@ -1574,31 +1574,41 @@ void MacroAssembler::br_null_short(Register s1, Predict p, Label& L) {
assert_not_delayed();
if (use_cbcond(L)) {
Assembler::cbcond(zero, ptr_cc, s1, 0, L);
return;
}
} else {
br_null(s1, false, p, L);
delayed()->nop();
}
}
void MacroAssembler::br_notnull_short(Register s1, Predict p, Label& L) {
assert_not_delayed();
if (use_cbcond(L)) {
Assembler::cbcond(notZero, ptr_cc, s1, 0, L);
return;
}
} else {
br_notnull(s1, false, p, L);
delayed()->nop();
}
}
// Unconditional short branch
void MacroAssembler::ba_short(Label& L) {
assert_not_delayed();
if (use_cbcond(L)) {
Assembler::cbcond(equal, icc, G0, G0, L);
return;
}
} else {
br(always, false, pt, L);
delayed()->nop();
}
}
// Branch if 'icc' says zero or not (i.e. icc.z == 1|0).
void MacroAssembler::br_icc_zero(bool iszero, Predict p, Label &L) {
assert_not_delayed();
Condition cf = (iszero ? Assembler::zero : Assembler::notZero);
br(cf, false, p, L);
delayed()->nop();
}
// instruction sequences factored across compiler & interpreter
@ -3565,20 +3575,6 @@ static void generate_satb_log_enqueue(bool with_frame) {
#undef __
}
static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) {
if (with_frame) {
if (satb_log_enqueue_with_frame == 0) {
generate_satb_log_enqueue(with_frame);
assert(satb_log_enqueue_with_frame != 0, "postcondition.");
}
} else {
if (satb_log_enqueue_frameless == 0) {
generate_satb_log_enqueue(with_frame);
assert(satb_log_enqueue_frameless != 0, "postcondition.");
}
}
}
void MacroAssembler::g1_write_barrier_pre(Register obj,
Register index,
int offset,
@ -3648,13 +3644,9 @@ void MacroAssembler::g1_write_barrier_pre(Register obj,
"Or we need to think harder.");
if (pre_val->is_global() && !preserve_o_regs) {
generate_satb_log_enqueue_if_necessary(true); // with frame
call(satb_log_enqueue_with_frame);
delayed()->mov(pre_val, O0);
} else {
generate_satb_log_enqueue_if_necessary(false); // frameless
save_frame(0);
call(satb_log_enqueue_frameless);
delayed()->mov(pre_val->after_save(), O0);
@ -3758,15 +3750,6 @@ static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
}
static inline void
generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) {
if (dirty_card_log_enqueue == 0) {
generate_dirty_card_log_enqueue(byte_map_base);
assert(dirty_card_log_enqueue != 0, "postcondition.");
}
}
void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
Label filtered;
@ -3796,7 +3779,6 @@ void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val
} else {
post_filter_masm->nop();
}
generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
save_frame(0);
call(dirty_card_log_enqueue);
if (use_scr) {
@ -3809,6 +3791,28 @@ void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val
bind(filtered);
}
// Called from init_globals() after universe_init() and before interpreter_init()
void g1_barrier_stubs_init() {
CollectedHeap* heap = Universe::heap();
if (heap->kind() == CollectedHeap::G1CollectedHeap) {
// Only needed for G1
if (dirty_card_log_enqueue == 0) {
G1SATBCardTableLoggingModRefBS* bs =
barrier_set_cast<G1SATBCardTableLoggingModRefBS>(heap->barrier_set());
generate_dirty_card_log_enqueue(bs->byte_map_base);
assert(dirty_card_log_enqueue != 0, "postcondition.");
}
if (satb_log_enqueue_with_frame == 0) {
generate_satb_log_enqueue(true);
assert(satb_log_enqueue_with_frame != 0, "postcondition.");
}
if (satb_log_enqueue_frameless == 0) {
generate_satb_log_enqueue(false);
assert(satb_log_enqueue_frameless != 0, "postcondition.");
}
}
}
#endif // INCLUDE_ALL_GCS
///////////////////////////////////////////////////////////////////////////////////
@ -3834,6 +3838,7 @@ void MacroAssembler::load_mirror(Register mirror, Register method) {
ld_ptr(mirror, in_bytes(ConstMethod::constants_offset()), mirror);
ld_ptr(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
ld_ptr(mirror, mirror_offset, mirror);
resolve_oop_handle(mirror);
}
void MacroAssembler::load_klass(Register src_oop, Register klass) {

View File

@ -606,7 +606,7 @@ class MacroAssembler : public Assembler {
// offset. No explicit code generation is needed if the offset is within a certain
// range (0 <= offset <= page_size).
//
// %%%%%% Currently not done for SPARC
// FIXME: Currently not done for SPARC
void null_check(Register reg, int offset = -1);
static bool needs_explicit_null_check(intptr_t offset);
@ -648,6 +648,9 @@ class MacroAssembler : public Assembler {
// unconditional short branch
void ba_short(Label& L);
// Branch on icc.z (true or not).
void br_icc_zero(bool iszero, Predict p, Label &L);
inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
inline void bp( Condition c, bool a, CC cc, Predict p, Label& L );

View File

@ -185,7 +185,7 @@ inline void MacroAssembler::br( Condition c, bool a, Predict p, address d, reloc
}
inline void MacroAssembler::br( Condition c, bool a, Predict p, Label& L ) {
// See note[+] on 'avoid_pipeline_stalls()', in "assembler_sparc.inline.hpp".
// See note[+] on 'avoid_pipeline_stall()', in "assembler_sparc.inline.hpp".
avoid_pipeline_stall();
br(c, a, p, target(L));
}

View File

@ -259,6 +259,8 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
}
bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
bool is_even() const { return (encoding() & 1) == 0; }
const char* name() const;
FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }

View File

@ -41,10 +41,6 @@
#ifdef COMPILER2
#include "opto/runtime.hpp"
#endif
#ifdef SHARK
#include "compiler/compileBroker.hpp"
#include "shark/sharkCompiler.hpp"
#endif
#if INCLUDE_JVMCI
#include "jvmci/jvmciJavaClasses.hpp"
#endif

View File

@ -2628,7 +2628,6 @@ enc_class fsqrtd (dflt_reg dst, dflt_reg src) %{
%}
enc_class fmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
MacroAssembler _masm(&cbuf);
@ -2651,7 +2650,71 @@ enc_class fmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
__ fmadd(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
%}
enc_class fmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
__ fmsub(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
%}
enc_class fmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
__ fmsub(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
%}
enc_class fnmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
__ fnmadd(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
%}
enc_class fnmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
__ fnmadd(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
%}
enc_class fnmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
__ fnmsub(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
%}
enc_class fnmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
__ fnmsub(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
%}
enc_class fmovs (dflt_reg dst, dflt_reg src) %{
@ -7597,7 +7660,7 @@ instruct sqrtD_reg_reg(regD dst, regD src) %{
ins_pipe(fdivD_reg_reg);
%}
// Single precision fused floating-point multiply-add (d = a * b + c).
// Single/Double precision fused floating-point multiply-add (d = a * b + c).
instruct fmaF_regx4(regF dst, regF a, regF b, regF c) %{
predicate(UseFMA);
match(Set dst (FmaF c (Binary a b)));
@ -7606,7 +7669,6 @@ instruct fmaF_regx4(regF dst, regF a, regF b, regF c) %{
ins_pipe(fmaF_regx4);
%}
// Double precision fused floating-point multiply-add (d = a * b + c).
instruct fmaD_regx4(regD dst, regD a, regD b, regD c) %{
predicate(UseFMA);
match(Set dst (FmaD c (Binary a b)));
@ -7615,6 +7677,66 @@ instruct fmaD_regx4(regD dst, regD a, regD b, regD c) %{
ins_pipe(fmaD_regx4);
%}
// Additional patterns matching complement versions that we can map directly to
// variants of the fused multiply-add instructions.
// Single/Double precision fused floating-point multiply-sub (d = a * b - c)
instruct fmsubF_regx4(regF dst, regF a, regF b, regF c) %{
predicate(UseFMA);
match(Set dst (FmaF (NegF c) (Binary a b)));
format %{ "fmsubs $a,$b,$c,$dst\t# $dst = $a * $b - $c" %}
ins_encode(fmsubs(dst, a, b, c));
ins_pipe(fmaF_regx4);
%}
instruct fmsubD_regx4(regD dst, regD a, regD b, regD c) %{
predicate(UseFMA);
match(Set dst (FmaD (NegD c) (Binary a b)));
format %{ "fmsubd $a,$b,$c,$dst\t# $dst = $a * $b - $c" %}
ins_encode(fmsubd(dst, a, b, c));
ins_pipe(fmaD_regx4);
%}
// Single/Double precision fused floating-point neg. multiply-add,
// d = -1 * a * b - c = -(a * b + c)
instruct fnmaddF_regx4(regF dst, regF a, regF b, regF c) %{
predicate(UseFMA);
match(Set dst (FmaF (NegF c) (Binary (NegF a) b)));
match(Set dst (FmaF (NegF c) (Binary a (NegF b))));
format %{ "fnmadds $a,$b,$c,$dst\t# $dst = -($a * $b + $c)" %}
ins_encode(fnmadds(dst, a, b, c));
ins_pipe(fmaF_regx4);
%}
instruct fnmaddD_regx4(regD dst, regD a, regD b, regD c) %{
predicate(UseFMA);
match(Set dst (FmaD (NegD c) (Binary (NegD a) b)));
match(Set dst (FmaD (NegD c) (Binary a (NegD b))));
format %{ "fnmaddd $a,$b,$c,$dst\t# $dst = -($a * $b + $c)" %}
ins_encode(fnmaddd(dst, a, b, c));
ins_pipe(fmaD_regx4);
%}
// Single/Double precision fused floating-point neg. multiply-sub,
// d = -1 * a * b + c = -(a * b - c)
instruct fnmsubF_regx4(regF dst, regF a, regF b, regF c) %{
predicate(UseFMA);
match(Set dst (FmaF c (Binary (NegF a) b)));
match(Set dst (FmaF c (Binary a (NegF b))));
format %{ "fnmsubs $a,$b,$c,$dst\t# $dst = -($a * $b - $c)" %}
ins_encode(fnmsubs(dst, a, b, c));
ins_pipe(fmaF_regx4);
%}
instruct fnmsubD_regx4(regD dst, regD a, regD b, regD c) %{
predicate(UseFMA);
match(Set dst (FmaD c (Binary (NegD a) b)));
match(Set dst (FmaD c (Binary a (NegD b))));
format %{ "fnmsubd $a,$b,$c,$dst\t# $dst = -($a * $b - $c)" %}
ins_encode(fnmsubd(dst, a, b, c));
ins_pipe(fmaD_regx4);
%}
//----------Logical Instructions-----------------------------------------------
// And Instructions
// Register And

View File

@ -58,7 +58,6 @@
// Note: The register L7 is used as L7_thread_cache, and may not be used
// any other way within this module.
static const Register& Lstub_temp = L2;
// -------------------------------------------------------------------------------------------------------------------------
@ -4975,6 +4974,773 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
/**
* Arguments:
*
* Inputs:
* I0 - int* x-addr
* I1 - int x-len
* I2 - int* y-addr
* I3 - int y-len
* I4 - int* z-addr (output vector)
* I5 - int z-len
*/
address generate_multiplyToLen() {
assert(UseMultiplyToLenIntrinsic, "need VIS3 instructions");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
address start = __ pc();
__ save_frame(0);
const Register xptr = I0; // input address
const Register xlen = I1; // ...and length in 32b-words
const Register yptr = I2; //
const Register ylen = I3; //
const Register zptr = I4; // output address
const Register zlen = I5; // ...and length in 32b-words
/* The minimal "limb" representation suggest that odd length vectors are as
* likely as even length dittos. This in turn suggests that we need to cope
* with odd/even length arrays and data not aligned properly for 64-bit read
* and write operations. We thus use a number of different kernels:
*
* if (is_even(x.len) && is_even(y.len))
* if (is_align64(x) && is_align64(y) && is_align64(z))
* if (x.len == y.len && 16 <= x.len && x.len <= 64)
* memv_mult_mpmul(...)
* else
* memv_mult_64x64(...)
* else
* memv_mult_64x64u(...)
* else
* memv_mult_32x32(...)
*
* Here we assume VIS3 support (for 'umulxhi', 'addxc' and 'addxccc').
* In case CBCOND instructions are supported, we will use 'cxbX'. If the
* MPMUL instruction is supported, we will generate a kernel using 'mpmul'
* (for vectors with proper characteristics).
*/
const Register tmp0 = L0;
const Register tmp1 = L1;
Label L_mult_32x32;
Label L_mult_64x64u;
Label L_mult_64x64;
Label L_exit;
if_both_even(xlen, ylen, tmp0, false, L_mult_32x32);
if_all3_aligned(xptr, yptr, zptr, tmp1, 64, false, L_mult_64x64u);
if (UseMPMUL) {
if_eq(xlen, ylen, false, L_mult_64x64);
if_in_rng(xlen, 16, 64, tmp0, tmp1, false, L_mult_64x64);
// 1. Multiply naturally aligned 64b-datums using a generic 'mpmul' kernel,
// operating on equal length vectors of size [16..64].
gen_mult_mpmul(xlen, xptr, yptr, zptr, L_exit);
}
// 2. Multiply naturally aligned 64-bit datums (64x64).
__ bind(L_mult_64x64);
gen_mult_64x64(xptr, xlen, yptr, ylen, zptr, zlen, L_exit);
// 3. Multiply unaligned 64-bit datums (64x64).
__ bind(L_mult_64x64u);
gen_mult_64x64_unaligned(xptr, xlen, yptr, ylen, zptr, zlen, L_exit);
// 4. Multiply naturally aligned 32-bit datums (32x32).
__ bind(L_mult_32x32);
gen_mult_32x32(xptr, xlen, yptr, ylen, zptr, zlen, L_exit);
__ bind(L_exit);
__ ret();
__ delayed()->restore();
return start;
}
// Additional help functions used by multiplyToLen generation.
void if_both_even(Register r1, Register r2, Register tmp, bool iseven, Label &L)
{
__ or3(r1, r2, tmp);
__ andcc(tmp, 0x1, tmp);
__ br_icc_zero(iseven, Assembler::pn, L);
}
void if_all3_aligned(Register r1, Register r2, Register r3,
Register tmp, uint align, bool isalign, Label &L)
{
__ or3(r1, r2, tmp);
__ or3(r3, tmp, tmp);
__ andcc(tmp, (align - 1), tmp);
__ br_icc_zero(isalign, Assembler::pn, L);
}
void if_eq(Register x, Register y, bool iseq, Label &L)
{
Assembler::Condition cf = (iseq ? Assembler::equal : Assembler::notEqual);
__ cmp_and_br_short(x, y, cf, Assembler::pt, L);
}
void if_in_rng(Register x, int lb, int ub, Register t1, Register t2, bool inrng, Label &L)
{
assert(Assembler::is_simm13(lb), "Small ints only!");
assert(Assembler::is_simm13(ub), "Small ints only!");
// Compute (x - lb) * (ub - x) >= 0
// NOTE: With the local use of this routine, we rely on small integers to
// guarantee that we do not overflow in the multiplication.
__ add(G0, ub, t2);
__ sub(x, lb, t1);
__ sub(t2, x, t2);
__ mulx(t1, t2, t1);
Assembler::Condition cf = (inrng ? Assembler::greaterEqual : Assembler::less);
__ cmp_and_br_short(t1, G0, cf, Assembler::pt, L);
}
void ldd_entry(Register base, Register offs, FloatRegister dest)
{
__ ldd(base, offs, dest);
__ inc(offs, 8);
}
void ldx_entry(Register base, Register offs, Register dest)
{
__ ldx(base, offs, dest);
__ inc(offs, 8);
}
void mpmul_entry(int m, Label &next)
{
__ mpmul(m);
__ cbcond(Assembler::equal, Assembler::icc, G0, G0, next);
}
void stx_entry(Label &L, Register r1, Register r2, Register base, Register offs)
{
__ bind(L);
__ stx(r1, base, offs);
__ inc(offs, 8);
__ stx(r2, base, offs);
__ inc(offs, 8);
}
void offs_entry(Label &Lbl0, Label &Lbl1)
{
assert(Lbl0.is_bound(), "must be");
assert(Lbl1.is_bound(), "must be");
int offset = Lbl0.loc_pos() - Lbl1.loc_pos();
__ emit_data(offset);
}
/* Generate the actual multiplication kernels for BigInteger vectors:
*
* 1. gen_mult_mpmul(...)
*
* 2. gen_mult_64x64(...)
*
* 3. gen_mult_64x64_unaligned(...)
*
* 4. gen_mult_32x32(...)
*/
void gen_mult_mpmul(Register len, Register xptr, Register yptr, Register zptr,
Label &L_exit)
{
const Register zero = G0;
const Register gxp = G1; // Need to use global registers across RWs.
const Register gyp = G2;
const Register gzp = G3;
const Register offs = G4;
const Register disp = G5;
__ mov(xptr, gxp);
__ mov(yptr, gyp);
__ mov(zptr, gzp);
/* Compute jump vector entry:
*
* 1. mpmul input size (0..31) x 64b
* 2. vector input size in 32b limbs (even number)
* 3. branch entries in reverse order (31..0), using two
* instructions per entry (2 * 4 bytes).
*
* displacement = byte_offset(bra_offset(len))
* = byte_offset((64 - len)/2)
* = 8 * (64 - len)/2
* = 4 * (64 - len)
*/
Register temp = I5; // Alright to use input regs. in first batch.
__ sub(zero, len, temp);
__ add(temp, 64, temp);
__ sllx(temp, 2, disp); // disp := (64 - len) << 2
// Dispatch relative current PC, into instruction table below.
__ rdpc(temp);
__ add(temp, 16, temp);
__ jmp(temp, disp);
__ delayed()->clr(offs);
ldd_entry(gxp, offs, F22);
ldd_entry(gxp, offs, F20);
ldd_entry(gxp, offs, F18);
ldd_entry(gxp, offs, F16);
ldd_entry(gxp, offs, F14);
ldd_entry(gxp, offs, F12);
ldd_entry(gxp, offs, F10);
ldd_entry(gxp, offs, F8);
ldd_entry(gxp, offs, F6);
ldd_entry(gxp, offs, F4);
ldx_entry(gxp, offs, I5);
ldx_entry(gxp, offs, I4);
ldx_entry(gxp, offs, I3);
ldx_entry(gxp, offs, I2);
ldx_entry(gxp, offs, I1);
ldx_entry(gxp, offs, I0);
ldx_entry(gxp, offs, L7);
ldx_entry(gxp, offs, L6);
ldx_entry(gxp, offs, L5);
ldx_entry(gxp, offs, L4);
ldx_entry(gxp, offs, L3);
ldx_entry(gxp, offs, L2);
ldx_entry(gxp, offs, L1);
ldx_entry(gxp, offs, L0);
ldd_entry(gxp, offs, F2);
ldd_entry(gxp, offs, F0);
ldx_entry(gxp, offs, O5);
ldx_entry(gxp, offs, O4);
ldx_entry(gxp, offs, O3);
ldx_entry(gxp, offs, O2);
ldx_entry(gxp, offs, O1);
ldx_entry(gxp, offs, O0);
__ save(SP, -176, SP);
const Register addr = gxp; // Alright to reuse 'gxp'.
// Dispatch relative current PC, into instruction table below.
__ rdpc(addr);
__ add(addr, 16, addr);
__ jmp(addr, disp);
__ delayed()->clr(offs);
ldd_entry(gyp, offs, F58);
ldd_entry(gyp, offs, F56);
ldd_entry(gyp, offs, F54);
ldd_entry(gyp, offs, F52);
ldd_entry(gyp, offs, F50);
ldd_entry(gyp, offs, F48);
ldd_entry(gyp, offs, F46);
ldd_entry(gyp, offs, F44);
ldd_entry(gyp, offs, F42);
ldd_entry(gyp, offs, F40);
ldd_entry(gyp, offs, F38);
ldd_entry(gyp, offs, F36);
ldd_entry(gyp, offs, F34);
ldd_entry(gyp, offs, F32);
ldd_entry(gyp, offs, F30);
ldd_entry(gyp, offs, F28);
ldd_entry(gyp, offs, F26);
ldd_entry(gyp, offs, F24);
ldx_entry(gyp, offs, O5);
ldx_entry(gyp, offs, O4);
ldx_entry(gyp, offs, O3);
ldx_entry(gyp, offs, O2);
ldx_entry(gyp, offs, O1);
ldx_entry(gyp, offs, O0);
ldx_entry(gyp, offs, L7);
ldx_entry(gyp, offs, L6);
ldx_entry(gyp, offs, L5);
ldx_entry(gyp, offs, L4);
ldx_entry(gyp, offs, L3);
ldx_entry(gyp, offs, L2);
ldx_entry(gyp, offs, L1);
ldx_entry(gyp, offs, L0);
__ save(SP, -176, SP);
__ save(SP, -176, SP);
__ save(SP, -176, SP);
__ save(SP, -176, SP);
__ save(SP, -176, SP);
Label L_mpmul_restore_4, L_mpmul_restore_3, L_mpmul_restore_2;
Label L_mpmul_restore_1, L_mpmul_restore_0;
// Dispatch relative current PC, into instruction table below.
__ rdpc(addr);
__ add(addr, 16, addr);
__ jmp(addr, disp);
__ delayed()->clr(offs);
mpmul_entry(31, L_mpmul_restore_0);
mpmul_entry(30, L_mpmul_restore_0);
mpmul_entry(29, L_mpmul_restore_0);
mpmul_entry(28, L_mpmul_restore_0);
mpmul_entry(27, L_mpmul_restore_1);
mpmul_entry(26, L_mpmul_restore_1);
mpmul_entry(25, L_mpmul_restore_1);
mpmul_entry(24, L_mpmul_restore_1);
mpmul_entry(23, L_mpmul_restore_1);
mpmul_entry(22, L_mpmul_restore_1);
mpmul_entry(21, L_mpmul_restore_1);
mpmul_entry(20, L_mpmul_restore_2);
mpmul_entry(19, L_mpmul_restore_2);
mpmul_entry(18, L_mpmul_restore_2);
mpmul_entry(17, L_mpmul_restore_2);
mpmul_entry(16, L_mpmul_restore_2);
mpmul_entry(15, L_mpmul_restore_2);
mpmul_entry(14, L_mpmul_restore_2);
mpmul_entry(13, L_mpmul_restore_3);
mpmul_entry(12, L_mpmul_restore_3);
mpmul_entry(11, L_mpmul_restore_3);
mpmul_entry(10, L_mpmul_restore_3);
mpmul_entry( 9, L_mpmul_restore_3);
mpmul_entry( 8, L_mpmul_restore_3);
mpmul_entry( 7, L_mpmul_restore_3);
mpmul_entry( 6, L_mpmul_restore_4);
mpmul_entry( 5, L_mpmul_restore_4);
mpmul_entry( 4, L_mpmul_restore_4);
mpmul_entry( 3, L_mpmul_restore_4);
mpmul_entry( 2, L_mpmul_restore_4);
mpmul_entry( 1, L_mpmul_restore_4);
mpmul_entry( 0, L_mpmul_restore_4);
Label L_z31, L_z30, L_z29, L_z28, L_z27, L_z26, L_z25, L_z24;
Label L_z23, L_z22, L_z21, L_z20, L_z19, L_z18, L_z17, L_z16;
Label L_z15, L_z14, L_z13, L_z12, L_z11, L_z10, L_z09, L_z08;
Label L_z07, L_z06, L_z05, L_z04, L_z03, L_z02, L_z01, L_z00;
Label L_zst_base; // Store sequence base address.
__ bind(L_zst_base);
stx_entry(L_z31, L7, L6, gzp, offs);
stx_entry(L_z30, L5, L4, gzp, offs);
stx_entry(L_z29, L3, L2, gzp, offs);
stx_entry(L_z28, L1, L0, gzp, offs);
__ restore();
stx_entry(L_z27, O5, O4, gzp, offs);
stx_entry(L_z26, O3, O2, gzp, offs);
stx_entry(L_z25, O1, O0, gzp, offs);
stx_entry(L_z24, L7, L6, gzp, offs);
stx_entry(L_z23, L5, L4, gzp, offs);
stx_entry(L_z22, L3, L2, gzp, offs);
stx_entry(L_z21, L1, L0, gzp, offs);
__ restore();
stx_entry(L_z20, O5, O4, gzp, offs);
stx_entry(L_z19, O3, O2, gzp, offs);
stx_entry(L_z18, O1, O0, gzp, offs);
stx_entry(L_z17, L7, L6, gzp, offs);
stx_entry(L_z16, L5, L4, gzp, offs);
stx_entry(L_z15, L3, L2, gzp, offs);
stx_entry(L_z14, L1, L0, gzp, offs);
__ restore();
stx_entry(L_z13, O5, O4, gzp, offs);
stx_entry(L_z12, O3, O2, gzp, offs);
stx_entry(L_z11, O1, O0, gzp, offs);
stx_entry(L_z10, L7, L6, gzp, offs);
stx_entry(L_z09, L5, L4, gzp, offs);
stx_entry(L_z08, L3, L2, gzp, offs);
stx_entry(L_z07, L1, L0, gzp, offs);
__ restore();
stx_entry(L_z06, O5, O4, gzp, offs);
stx_entry(L_z05, O3, O2, gzp, offs);
stx_entry(L_z04, O1, O0, gzp, offs);
stx_entry(L_z03, L7, L6, gzp, offs);
stx_entry(L_z02, L5, L4, gzp, offs);
stx_entry(L_z01, L3, L2, gzp, offs);
stx_entry(L_z00, L1, L0, gzp, offs);
__ restore();
__ restore();
// Exit out of 'mpmul' routine, back to multiplyToLen.
__ ba_short(L_exit);
Label L_zst_offs;
__ bind(L_zst_offs);
offs_entry(L_z31, L_zst_base); // index 31: 2048x2048
offs_entry(L_z30, L_zst_base);
offs_entry(L_z29, L_zst_base);
offs_entry(L_z28, L_zst_base);
offs_entry(L_z27, L_zst_base);
offs_entry(L_z26, L_zst_base);
offs_entry(L_z25, L_zst_base);
offs_entry(L_z24, L_zst_base);
offs_entry(L_z23, L_zst_base);
offs_entry(L_z22, L_zst_base);
offs_entry(L_z21, L_zst_base);
offs_entry(L_z20, L_zst_base);
offs_entry(L_z19, L_zst_base);
offs_entry(L_z18, L_zst_base);
offs_entry(L_z17, L_zst_base);
offs_entry(L_z16, L_zst_base);
offs_entry(L_z15, L_zst_base);
offs_entry(L_z14, L_zst_base);
offs_entry(L_z13, L_zst_base);
offs_entry(L_z12, L_zst_base);
offs_entry(L_z11, L_zst_base);
offs_entry(L_z10, L_zst_base);
offs_entry(L_z09, L_zst_base);
offs_entry(L_z08, L_zst_base);
offs_entry(L_z07, L_zst_base);
offs_entry(L_z06, L_zst_base);
offs_entry(L_z05, L_zst_base);
offs_entry(L_z04, L_zst_base);
offs_entry(L_z03, L_zst_base);
offs_entry(L_z02, L_zst_base);
offs_entry(L_z01, L_zst_base);
offs_entry(L_z00, L_zst_base); // index 0: 64x64
__ bind(L_mpmul_restore_4);
__ restore();
__ bind(L_mpmul_restore_3);
__ restore();
__ bind(L_mpmul_restore_2);
__ restore();
__ bind(L_mpmul_restore_1);
__ restore();
__ bind(L_mpmul_restore_0);
// Dispatch via offset vector entry, into z-store sequence.
Label L_zst_rdpc;
__ bind(L_zst_rdpc);
assert(L_zst_base.is_bound(), "must be");
assert(L_zst_offs.is_bound(), "must be");
assert(L_zst_rdpc.is_bound(), "must be");
int dbase = L_zst_rdpc.loc_pos() - L_zst_base.loc_pos();
int doffs = L_zst_rdpc.loc_pos() - L_zst_offs.loc_pos();
temp = gyp; // Alright to reuse 'gyp'.
__ rdpc(addr);
__ sub(addr, doffs, temp);
__ srlx(disp, 1, disp);
__ lduw(temp, disp, offs);
__ sub(addr, dbase, temp);
__ jmp(temp, offs);
__ delayed()->clr(offs);
}
void gen_mult_64x64(Register xp, Register xn,
Register yp, Register yn,
Register zp, Register zn, Label &L_exit)
{
// Assuming that a stack frame has already been created, i.e. local and
// output registers are available for immediate use.
const Register ri = L0; // Outer loop index, xv[i]
const Register rj = L1; // Inner loop index, yv[j]
const Register rk = L2; // Output loop index, zv[k]
const Register rx = L4; // x-vector datum [i]
const Register ry = L5; // y-vector datum [j]
const Register rz = L6; // z-vector datum [k]
const Register rc = L7; // carry over (to z-vector datum [k-1])
const Register lop = O0; // lo-64b product
const Register hip = O1; // hi-64b product
const Register zero = G0;
Label L_loop_i, L_exit_loop_i;
Label L_loop_j;
Label L_loop_i2, L_exit_loop_i2;
__ srlx(xn, 1, xn); // index for u32 to u64 ditto
__ srlx(yn, 1, yn); // index for u32 to u64 ditto
__ srlx(zn, 1, zn); // index for u32 to u64 ditto
__ dec(xn); // Adjust [0..(N/2)-1]
__ dec(yn);
__ dec(zn);
__ clr(rc); // u64 c = 0
__ sllx(xn, 3, ri); // int i = xn (byte offset i = 8*xn)
__ sllx(yn, 3, rj); // int j = yn (byte offset i = 8*xn)
__ sllx(zn, 3, rk); // int k = zn (byte offset k = 8*zn)
__ ldx(yp, rj, ry); // u64 y = yp[yn]
// for (int i = xn; i >= 0; i--)
__ bind(L_loop_i);
__ cmp_and_br_short(ri, 0, // i >= 0
Assembler::less, Assembler::pn, L_exit_loop_i);
__ ldx(xp, ri, rx); // x = xp[i]
__ mulx(rx, ry, lop); // lo-64b-part of result 64x64
__ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64
__ addcc(rc, lop, lop); // Accumulate lower order bits (producing carry)
__ addxc(hip, zero, rc); // carry over to next datum [k-1]
__ stx(lop, zp, rk); // z[k] = lop
__ dec(rk, 8); // k--
__ dec(ri, 8); // i--
__ ba_short(L_loop_i);
__ bind(L_exit_loop_i);
__ stx(rc, zp, rk); // z[k] = c
// for (int j = yn - 1; j >= 0; j--)
__ sllx(yn, 3, rj); // int j = yn - 1 (byte offset j = 8*yn)
__ dec(rj, 8);
__ bind(L_loop_j);
__ cmp_and_br_short(rj, 0, // j >= 0
Assembler::less, Assembler::pn, L_exit);
__ clr(rc); // u64 c = 0
__ ldx(yp, rj, ry); // u64 y = yp[j]
// for (int i = xn, k = --zn; i >= 0; i--)
__ dec(zn); // --zn
__ sllx(xn, 3, ri); // int i = xn (byte offset i = 8*xn)
__ sllx(zn, 3, rk); // int k = zn (byte offset k = 8*zn)
__ bind(L_loop_i2);
__ cmp_and_br_short(ri, 0, // i >= 0
Assembler::less, Assembler::pn, L_exit_loop_i2);
__ ldx(xp, ri, rx); // x = xp[i]
__ ldx(zp, rk, rz); // z = zp[k], accumulator
__ mulx(rx, ry, lop); // lo-64b-part of result 64x64
__ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64
__ addcc(rz, rc, rz); // Accumulate lower order bits,
__ addxc(hip, zero, rc); // Accumulate higher order bits to carry
__ addcc(rz, lop, rz); // z += lo(p) + c
__ addxc(rc, zero, rc);
__ stx(rz, zp, rk); // zp[k] = z
__ dec(rk, 8); // k--
__ dec(ri, 8); // i--
__ ba_short(L_loop_i2);
__ bind(L_exit_loop_i2);
__ stx(rc, zp, rk); // z[k] = c
__ dec(rj, 8); // j--
__ ba_short(L_loop_j);
}
void gen_mult_64x64_unaligned(Register xp, Register xn,
Register yp, Register yn,
Register zp, Register zn, Label &L_exit)
{
// Assuming that a stack frame has already been created, i.e. local and
// output registers are available for use.
const Register xpc = L0; // Outer loop cursor, xp[i]
const Register ypc = L1; // Inner loop cursor, yp[j]
const Register zpc = L2; // Output loop cursor, zp[k]
const Register rx = L4; // x-vector datum [i]
const Register ry = L5; // y-vector datum [j]
const Register rz = L6; // z-vector datum [k]
const Register rc = L7; // carry over (to z-vector datum [k-1])
const Register rt = O2;
const Register lop = O0; // lo-64b product
const Register hip = O1; // hi-64b product
const Register zero = G0;
Label L_loop_i, L_exit_loop_i;
Label L_loop_j;
Label L_loop_i2, L_exit_loop_i2;
__ srlx(xn, 1, xn); // index for u32 to u64 ditto
__ srlx(yn, 1, yn); // index for u32 to u64 ditto
__ srlx(zn, 1, zn); // index for u32 to u64 ditto
__ dec(xn); // Adjust [0..(N/2)-1]
__ dec(yn);
__ dec(zn);
__ clr(rc); // u64 c = 0
__ sllx(xn, 3, xpc); // u32* xpc = &xp[xn] (byte offset 8*xn)
__ add(xp, xpc, xpc);
__ sllx(yn, 3, ypc); // u32* ypc = &yp[yn] (byte offset 8*yn)
__ add(yp, ypc, ypc);
__ sllx(zn, 3, zpc); // u32* zpc = &zp[zn] (byte offset 8*zn)
__ add(zp, zpc, zpc);
__ lduw(ypc, 0, rt); // u64 y = yp[yn]
__ lduw(ypc, 4, ry); // ...
__ sllx(rt, 32, rt);
__ or3(rt, ry, ry);
// for (int i = xn; i >= 0; i--)
__ bind(L_loop_i);
__ cmp_and_br_short(xpc, xp,// i >= 0
Assembler::less, Assembler::pn, L_exit_loop_i);
__ lduw(xpc, 0, rt); // u64 x = xp[i]
__ lduw(xpc, 4, rx); // ...
__ sllx(rt, 32, rt);
__ or3(rt, rx, rx);
__ mulx(rx, ry, lop); // lo-64b-part of result 64x64
__ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64
__ addcc(rc, lop, lop); // Accumulate lower order bits (producing carry)
__ addxc(hip, zero, rc); // carry over to next datum [k-1]
__ srlx(lop, 32, rt);
__ stw(rt, zpc, 0); // z[k] = lop
__ stw(lop, zpc, 4); // ...
__ dec(zpc, 8); // k-- (zpc--)
__ dec(xpc, 8); // i-- (xpc--)
__ ba_short(L_loop_i);
__ bind(L_exit_loop_i);
__ srlx(rc, 32, rt);
__ stw(rt, zpc, 0); // z[k] = c
__ stw(rc, zpc, 4);
// for (int j = yn - 1; j >= 0; j--)
__ sllx(yn, 3, ypc); // u32* ypc = &yp[yn] (byte offset 8*yn)
__ add(yp, ypc, ypc);
__ dec(ypc, 8); // yn - 1 (ypc--)
__ bind(L_loop_j);
__ cmp_and_br_short(ypc, yp,// j >= 0
Assembler::less, Assembler::pn, L_exit);
__ clr(rc); // u64 c = 0
__ lduw(ypc, 0, rt); // u64 y = yp[j] (= *ypc)
__ lduw(ypc, 4, ry); // ...
__ sllx(rt, 32, rt);
__ or3(rt, ry, ry);
// for (int i = xn, k = --zn; i >= 0; i--)
__ sllx(xn, 3, xpc); // u32* xpc = &xp[xn] (byte offset 8*xn)
__ add(xp, xpc, xpc);
__ dec(zn); // --zn
__ sllx(zn, 3, zpc); // u32* zpc = &zp[zn] (byte offset 8*zn)
__ add(zp, zpc, zpc);
__ bind(L_loop_i2);
__ cmp_and_br_short(xpc, xp,// i >= 0
Assembler::less, Assembler::pn, L_exit_loop_i2);
__ lduw(xpc, 0, rt); // u64 x = xp[i] (= *xpc)
__ lduw(xpc, 4, rx); // ...
__ sllx(rt, 32, rt);
__ or3(rt, rx, rx);
__ lduw(zpc, 0, rt); // u64 z = zp[k] (= *zpc)
__ lduw(zpc, 4, rz); // ...
__ sllx(rt, 32, rt);
__ or3(rt, rz, rz);
__ mulx(rx, ry, lop); // lo-64b-part of result 64x64
__ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64
__ addcc(rz, rc, rz); // Accumulate lower order bits...
__ addxc(hip, zero, rc); // Accumulate higher order bits to carry
__ addcc(rz, lop, rz); // ... z += lo(p) + c
__ addxccc(rc, zero, rc);
__ srlx(rz, 32, rt);
__ stw(rt, zpc, 0); // zp[k] = z (*zpc = z)
__ stw(rz, zpc, 4);
__ dec(zpc, 8); // k-- (zpc--)
__ dec(xpc, 8); // i-- (xpc--)
__ ba_short(L_loop_i2);
__ bind(L_exit_loop_i2);
__ srlx(rc, 32, rt);
__ stw(rt, zpc, 0); // z[k] = c
__ stw(rc, zpc, 4);
__ dec(ypc, 8); // j-- (ypc--)
__ ba_short(L_loop_j);
}
void gen_mult_32x32(Register xp, Register xn,
Register yp, Register yn,
Register zp, Register zn, Label &L_exit)
{
// Assuming that a stack frame has already been created, i.e. local and
// output registers are available for use.
const Register ri = L0; // Outer loop index, xv[i]
const Register rj = L1; // Inner loop index, yv[j]
const Register rk = L2; // Output loop index, zv[k]
const Register rx = L4; // x-vector datum [i]
const Register ry = L5; // y-vector datum [j]
const Register rz = L6; // z-vector datum [k]
const Register rc = L7; // carry over (to z-vector datum [k-1])
const Register p64 = O0; // 64b product
const Register z65 = O1; // carry+64b accumulator
const Register c65 = O2; // carry at bit 65
const Register c33 = O2; // carry at bit 33 (after shift)
const Register zero = G0;
Label L_loop_i, L_exit_loop_i;
Label L_loop_j;
Label L_loop_i2, L_exit_loop_i2;
__ dec(xn); // Adjust [0..N-1]
__ dec(yn);
__ dec(zn);
__ clr(rc); // u32 c = 0
__ sllx(xn, 2, ri); // int i = xn (byte offset i = 4*xn)
__ sllx(yn, 2, rj); // int j = yn (byte offset i = 4*xn)
__ sllx(zn, 2, rk); // int k = zn (byte offset k = 4*zn)
__ lduw(yp, rj, ry); // u32 y = yp[yn]
// for (int i = xn; i >= 0; i--)
__ bind(L_loop_i);
__ cmp_and_br_short(ri, 0, // i >= 0
Assembler::less, Assembler::pn, L_exit_loop_i);
__ lduw(xp, ri, rx); // x = xp[i]
__ mulx(rx, ry, p64); // 64b result of 32x32
__ addcc(rc, p64, z65); // Accumulate to 65 bits (producing carry)
__ addxc(zero, zero, c65); // Materialise carry (in bit 65) into lsb,
__ sllx(c65, 32, c33); // and shift into bit 33
__ srlx(z65, 32, rc); // carry = c33 | hi(z65) >> 32
__ add(c33, rc, rc); // carry over to next datum [k-1]
__ stw(z65, zp, rk); // z[k] = lo(z65)
__ dec(rk, 4); // k--
__ dec(ri, 4); // i--
__ ba_short(L_loop_i);
__ bind(L_exit_loop_i);
__ stw(rc, zp, rk); // z[k] = c
// for (int j = yn - 1; j >= 0; j--)
__ sllx(yn, 2, rj); // int j = yn - 1 (byte offset j = 4*yn)
__ dec(rj, 4);
__ bind(L_loop_j);
__ cmp_and_br_short(rj, 0, // j >= 0
Assembler::less, Assembler::pn, L_exit);
__ clr(rc); // u32 c = 0
__ lduw(yp, rj, ry); // u32 y = yp[j]
// for (int i = xn, k = --zn; i >= 0; i--)
__ dec(zn); // --zn
__ sllx(xn, 2, ri); // int i = xn (byte offset i = 4*xn)
__ sllx(zn, 2, rk); // int k = zn (byte offset k = 4*zn)
__ bind(L_loop_i2);
__ cmp_and_br_short(ri, 0, // i >= 0
Assembler::less, Assembler::pn, L_exit_loop_i2);
__ lduw(xp, ri, rx); // x = xp[i]
__ lduw(zp, rk, rz); // z = zp[k], accumulator
__ mulx(rx, ry, p64); // 64b result of 32x32
__ add(rz, rc, rz); // Accumulate lower order bits,
__ addcc(rz, p64, z65); // z += lo(p64) + c
__ addxc(zero, zero, c65); // Materialise carry (in bit 65) into lsb,
__ sllx(c65, 32, c33); // and shift into bit 33
__ srlx(z65, 32, rc); // carry = c33 | hi(z65) >> 32
__ add(c33, rc, rc); // carry over to next datum [k-1]
__ stw(z65, zp, rk); // zp[k] = lo(z65)
__ dec(rk, 4); // k--
__ dec(ri, 4); // i--
__ ba_short(L_loop_i2);
__ bind(L_exit_loop_i2);
__ stw(rc, zp, rk); // z[k] = c
__ dec(rj, 4); // j--
__ ba_short(L_loop_j);
}
void generate_initial() {
// Generates all stubs and initializes the entry points
@ -5073,8 +5839,14 @@ class StubGenerator: public StubCodeGenerator {
if (UseAdler32Intrinsics) {
StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
}
}
#ifdef COMPILER2
// Intrinsics supported by C2 only:
if (UseMultiplyToLenIntrinsic) {
StubRoutines::_multiplyToLen = generate_multiplyToLen();
}
#endif // COMPILER2
}
public:
StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {

View File

@ -41,7 +41,7 @@ static bool returns_to_call_stub(address return_pc) {
enum /* platform_dependent_constants */ {
// %%%%%%%% May be able to shrink this a lot
code_size1 = 20000, // simply increase if too small (assembler will crash if too small)
code_size2 = 27000 // simply increase if too small (assembler will crash if too small)
code_size2 = 29000 // simply increase if too small (assembler will crash if too small)
};
class Sparc {

View File

@ -2049,6 +2049,7 @@ void TemplateTable::load_field_cp_cache_entry(Register Robj,
__ ld_ptr(Rcache, cp_base_offset + ConstantPoolCacheEntry::f1_offset(), Robj);
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ ld_ptr( Robj, mirror_offset, Robj);
__ resolve_oop_handle(Robj);
}
}

View File

@ -101,6 +101,14 @@
declare_constant(VM_Version::ISA_XMONT) \
declare_constant(VM_Version::ISA_PAUSE_NSEC) \
declare_constant(VM_Version::ISA_VAMASK) \
declare_constant(VM_Version::ISA_SPARC6) \
declare_constant(VM_Version::ISA_DICTUNP) \
declare_constant(VM_Version::ISA_FPCMPSHL) \
declare_constant(VM_Version::ISA_RLE) \
declare_constant(VM_Version::ISA_SHA3) \
declare_constant(VM_Version::ISA_VIS3C) \
declare_constant(VM_Version::ISA_SPARC5B) \
declare_constant(VM_Version::ISA_MME) \
declare_constant(VM_Version::CPU_FAST_IDIV) \
declare_constant(VM_Version::CPU_FAST_RDPC) \
declare_constant(VM_Version::CPU_FAST_BIS) \

View File

@ -103,7 +103,7 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(AllocatePrefetchInstr, 1);
}
else if (has_sparc5()) {
// Use prefetch instruction to avoid partial RAW issue on Core S4 processors,
// Use prefetch instruction to avoid partial RAW issue on Core C4 processors,
// also use prefetch style 3.
FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
@ -128,7 +128,7 @@ void VM_Version::initialize() {
// We increase the number of prefetched cache lines, to use just a bit more
// aggressive approach, when the L2-cache line size is small (32 bytes), or
// when running on newer processor implementations, such as the Core S4.
// when running on newer processor implementations, such as the Core C4.
bool inc_prefetch = cache_line_size > 0 && (cache_line_size < 64 || has_sparc5());
if (inc_prefetch) {
@ -168,6 +168,16 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseCBCond, false);
}
// Use 'mpmul' instruction if available.
if (has_mpmul()) {
if (FLAG_IS_DEFAULT(UseMPMUL)) {
FLAG_SET_DEFAULT(UseMPMUL, true);
}
} else if (UseMPMUL) {
warning("MPMUL instruction is not available on this CPU");
FLAG_SET_DEFAULT(UseMPMUL, false);
}
assert(BlockZeroingLowLimit > 0, "invalid value");
if (has_blk_zeroing() && cache_line_size > 0) {
@ -208,7 +218,9 @@ void VM_Version::initialize() {
char buf[512];
jio_snprintf(buf, sizeof(buf),
"%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
"%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s"
"%s%s%s%s%s%s%s%s%s" "%s%s%s%s%s%s%s%s%s"
"%s%s%s%s%s%s%s",
(has_v9() ? "v9" : ""),
(has_popc() ? ", popc" : ""),
(has_vis1() ? ", vis1" : ""),
@ -241,6 +253,16 @@ void VM_Version::initialize() {
(has_pause_nsec() ? ", pause_nsec" : ""),
(has_vamask() ? ", vamask" : ""),
(has_sparc6() ? ", sparc6" : ""),
(has_dictunp() ? ", dictunp" : ""),
(has_fpcmpshl() ? ", fpcmpshl" : ""),
(has_rle() ? ", rle" : ""),
(has_sha3() ? ", sha3" : ""),
(has_athena_plus2()? ", athena_plus2" : ""),
(has_vis3c() ? ", vis3c" : ""),
(has_sparc5b() ? ", sparc5b" : ""),
(has_mme() ? ", mme" : ""),
(has_fast_idiv() ? ", *idiv" : ""),
(has_fast_rdpc() ? ", *rdpc" : ""),
(has_fast_bis() ? ", *bis" : ""),
@ -409,6 +431,15 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
}
if (UseVIS > 2) {
if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
}
} else if (UseMultiplyToLenIntrinsic) {
warning("SPARC multiplyToLen intrinsics require VIS3 instructions support. Intrinsics will be disabled");
FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
}
if (UseVectorizedMismatchIntrinsic) {
warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU.");
FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);

View File

@ -67,6 +67,16 @@ protected:
ISA_PAUSE_NSEC,
ISA_VAMASK,
ISA_SPARC6,
ISA_DICTUNP,
ISA_FPCMPSHL,
ISA_RLE,
ISA_SHA3,
ISA_FJATHPLUS2,
ISA_VIS3C,
ISA_SPARC5B,
ISA_MME,
// Synthesised properties:
CPU_FAST_IDIV,
@ -79,7 +89,7 @@ protected:
};
private:
enum { ISA_last_feature = ISA_VAMASK,
enum { ISA_last_feature = ISA_MME,
CPU_last_feature = CPU_BLK_ZEROING };
enum {
@ -119,6 +129,16 @@ private:
ISA_pause_nsec_msk = UINT64_C(1) << ISA_PAUSE_NSEC,
ISA_vamask_msk = UINT64_C(1) << ISA_VAMASK,
ISA_sparc6_msk = UINT64_C(1) << ISA_SPARC6,
ISA_dictunp_msk = UINT64_C(1) << ISA_DICTUNP,
ISA_fpcmpshl_msk = UINT64_C(1) << ISA_FPCMPSHL,
ISA_rle_msk = UINT64_C(1) << ISA_RLE,
ISA_sha3_msk = UINT64_C(1) << ISA_SHA3,
ISA_fjathplus2_msk = UINT64_C(1) << ISA_FJATHPLUS2,
ISA_vis3c_msk = UINT64_C(1) << ISA_VIS3C,
ISA_sparc5b_msk = UINT64_C(1) << ISA_SPARC5B,
ISA_mme_msk = UINT64_C(1) << ISA_MME,
CPU_fast_idiv_msk = UINT64_C(1) << CPU_FAST_IDIV,
CPU_fast_rdpc_msk = UINT64_C(1) << CPU_FAST_RDPC,
CPU_fast_bis_msk = UINT64_C(1) << CPU_FAST_BIS,
@ -153,40 +173,51 @@ private:
* UltraSPARC T2+: (Victoria Falls, etc.)
* SPARC-V9, VIS, VIS2, ASI_BIS, POPC (Crypto/hash in SPU)
*
* UltraSPARC T3: (Rainbow Falls/S2)
* UltraSPARC T3: (Rainbow Falls/C2)
* SPARC-V9, VIS, VIS2, ASI_BIS, POPC (Crypto/hash in SPU)
*
* Oracle SPARC T4/T5/M5: (Core S3)
* Oracle SPARC T4/T5/M5: (Core C3)
* SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND,
* AES, DES, Kasumi, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL
*
* Oracle SPARC M7: (Core S4)
* Oracle SPARC M7: (Core C4)
* SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND,
* AES, DES, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL, VIS3b,
* ADI, SPARC5, MWAIT, XMPMUL, XMONT, PAUSE_NSEC, VAMASK
*
* Oracle SPARC M8: (Core C5)
* SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND,
* AES, DES, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL, VIS3b,
* ADI, SPARC5, MWAIT, XMPMUL, XMONT, PAUSE_NSEC, VAMASK, SPARC6, FPCMPSHL,
* DICTUNP, RLE, SHA3, MME
*
* NOTE: Oracle Number support ignored.
*/
enum {
niagara1_msk = ISA_v9_msk | ISA_vis1_msk | ISA_blk_init_msk,
niagara2_msk = niagara1_msk | ISA_popc_msk,
core_S2_msk = niagara2_msk | ISA_vis2_msk,
core_C2_msk = niagara2_msk | ISA_vis2_msk,
core_S3_msk = core_S2_msk | ISA_fmaf_msk | ISA_vis3_msk | ISA_hpc_msk |
core_C3_msk = core_C2_msk | ISA_fmaf_msk | ISA_vis3_msk | ISA_hpc_msk |
ISA_ima_msk | ISA_aes_msk | ISA_des_msk | ISA_kasumi_msk |
ISA_camellia_msk | ISA_md5_msk | ISA_sha1_msk | ISA_sha256_msk |
ISA_sha512_msk | ISA_mpmul_msk | ISA_mont_msk | ISA_pause_msk |
ISA_cbcond_msk | ISA_crc32c_msk,
core_S4_msk = core_S3_msk - ISA_kasumi_msk |
core_C4_msk = core_C3_msk - ISA_kasumi_msk |
ISA_vis3b_msk | ISA_adi_msk | ISA_sparc5_msk | ISA_mwait_msk |
ISA_xmpmul_msk | ISA_xmont_msk | ISA_pause_nsec_msk | ISA_vamask_msk,
core_C5_msk = core_C4_msk | ISA_sparc6_msk | ISA_dictunp_msk |
ISA_fpcmpshl_msk | ISA_rle_msk | ISA_sha3_msk | ISA_mme_msk,
ultra_sparc_t1_msk = niagara1_msk,
ultra_sparc_t2_msk = niagara2_msk,
ultra_sparc_t3_msk = core_S2_msk,
ultra_sparc_m5_msk = core_S3_msk, // NOTE: First out-of-order pipeline.
ultra_sparc_m7_msk = core_S4_msk
ultra_sparc_t3_msk = core_C2_msk,
ultra_sparc_m5_msk = core_C3_msk, // NOTE: First out-of-order pipeline.
ultra_sparc_m7_msk = core_C4_msk,
ultra_sparc_m8_msk = core_C5_msk
};
static uint _L2_data_cache_line_size;
@ -247,6 +278,16 @@ public:
static bool has_pause_nsec() { return (_features & ISA_pause_nsec_msk) != 0; }
static bool has_vamask() { return (_features & ISA_vamask_msk) != 0; }
static bool has_sparc6() { return (_features & ISA_sparc6_msk) != 0; }
static bool has_dictunp() { return (_features & ISA_dictunp_msk) != 0; }
static bool has_fpcmpshl() { return (_features & ISA_fpcmpshl_msk) != 0; }
static bool has_rle() { return (_features & ISA_rle_msk) != 0; }
static bool has_sha3() { return (_features & ISA_sha3_msk) != 0; }
static bool has_athena_plus2() { return (_features & ISA_fjathplus2_msk) != 0; }
static bool has_vis3c() { return (_features & ISA_vis3c_msk) != 0; }
static bool has_sparc5b() { return (_features & ISA_sparc5b_msk) != 0; }
static bool has_mme() { return (_features & ISA_mme_msk) != 0; }
static bool has_fast_idiv() { return (_features & CPU_fast_idiv_msk) != 0; }
static bool has_fast_rdpc() { return (_features & CPU_fast_rdpc_msk) != 0; }
static bool has_fast_bis() { return (_features & CPU_fast_bis_msk) != 0; }

View File

@ -2571,7 +2571,7 @@ void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2,
if (opr2->is_single_cpu()) {
// cpu register - cpu register
if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) {
__ cmpptr(reg1, opr2->as_register());
__ cmpoop(reg1, opr2->as_register());
} else {
assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY, "cmp int, oop?");
__ cmpl(reg1, opr2->as_register());
@ -2579,7 +2579,7 @@ void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2,
} else if (opr2->is_stack()) {
// cpu register - stack
if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) {
__ cmpptr(reg1, frame_map()->address_for_slot(opr2->single_stack_ix()));
__ cmpoop(reg1, frame_map()->address_for_slot(opr2->single_stack_ix()));
} else {
__ cmpl(reg1, frame_map()->address_for_slot(opr2->single_stack_ix()));
}
@ -2594,12 +2594,7 @@ void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2,
if (o == NULL) {
__ cmpptr(reg1, (int32_t)NULL_WORD);
} else {
#ifdef _LP64
__ movoop(rscratch1, o);
__ cmpptr(reg1, rscratch1);
#else
__ cmpoop(reg1, c->as_jobject());
#endif // _LP64
__ cmpoop(reg1, o);
}
} else {
fatal("unexpected type: %s", basictype_to_str(c->type()));
@ -2709,7 +2704,7 @@ void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2,
#ifdef _LP64
// %%% Make this explode if addr isn't reachable until we figure out a
// better strategy by giving noreg as the temp for as_Address
__ cmpptr(rscratch1, as_Address(addr, noreg));
__ cmpoop(rscratch1, as_Address(addr, noreg));
#else
__ cmpoop(as_Address(addr), c->as_jobject());
#endif // _LP64
@ -3487,13 +3482,9 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
Register mdo = op->mdo()->as_register();
__ mov_metadata(mdo, md->constant_encoding());
Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
Bytecodes::Code bc = method->java_code_at_bci(bci);
const bool callee_is_static = callee->is_loaded() && callee->is_static();
// Perform additional virtual call profiling for invokevirtual and
// invokeinterface bytecodes
if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
!callee_is_static && // required for optimized MH invokes
C1ProfileVirtualCalls) {
if (op->should_profile_receiver_type()) {
assert(op->recv()->is_single_cpu(), "recv must be allocated");
Register recv = op->recv()->as_register();
assert_different_registers(mdo, recv);

View File

@ -383,6 +383,7 @@ void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp
//------------------------------------------------------------------------------
// frame::adjust_unextended_sp
#ifdef ASSERT
void frame::adjust_unextended_sp() {
// On x86, sites calling method handle intrinsics and lambda forms are treated
// as any other call site. Therefore, no special action is needed when we are
@ -394,11 +395,12 @@ void frame::adjust_unextended_sp() {
// If the sender PC is a deoptimization point, get the original PC.
if (sender_cm->is_deopt_entry(_pc) ||
sender_cm->is_deopt_mh_entry(_pc)) {
DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp));
verify_deopt_original_pc(sender_cm, _unextended_sp);
}
}
}
}
#endif
//------------------------------------------------------------------------------
// frame::update_map_with_saved_link

View File

@ -117,7 +117,7 @@
// original sp we use that convention.
intptr_t* _unextended_sp;
void adjust_unextended_sp();
void adjust_unextended_sp() NOT_DEBUG_RETURN;
intptr_t* ptr_at_addr(int offset) const {
return (intptr_t*) addr_at(offset);

View File

@ -46,7 +46,7 @@ const bool CCallingConventionRequiresIntsAsLongs = false;
// pure C1, 32-bit, small machine
// i486 was the last Intel chip with 16-byte cache line size
#define DEFAULT_CACHE_LINE_SIZE 32
#elif defined(COMPILER2) || defined(SHARK)
#elif defined(COMPILER2)
#ifdef _LP64
// pure C2, 64-bit, large machine
#define DEFAULT_CACHE_LINE_SIZE 128

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,9 +25,9 @@
#ifndef CPU_X86_VM_JNITYPES_X86_HPP
#define CPU_X86_VM_JNITYPES_X86_HPP
#include "jni.h"
#include "memory/allocation.hpp"
#include "oops/oop.hpp"
#include "prims/jni.h"
// This file holds platform-dependent routines used to write primitive jni
// types to the array of arguments passed into JavaCalls::call

View File

@ -2783,6 +2783,21 @@ void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
#endif // _LP64
}
void MacroAssembler::cmpoop(Register src1, Register src2) {
cmpptr(src1, src2);
}
void MacroAssembler::cmpoop(Register src1, Address src2) {
cmpptr(src1, src2);
}
#ifdef _LP64
void MacroAssembler::cmpoop(Register src1, jobject src2) {
movoop(rscratch1, src2);
cmpptr(src1, rscratch1);
}
#endif
void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
if (reachable(adr)) {
if (os::is_MP())
@ -6617,6 +6632,7 @@ void MacroAssembler::load_mirror(Register mirror, Register method) {
movptr(mirror, Address(mirror, ConstMethod::constants_offset()));
movptr(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes()));
movptr(mirror, Address(mirror, mirror_offset));
resolve_oop_handle(mirror);
}
void MacroAssembler::load_klass(Register dst, Register src) {
@ -8398,7 +8414,7 @@ void MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ar
if (is_array_equ) {
// Check the input args
cmpptr(ary1, ary2);
cmpoop(ary1, ary2);
jcc(Assembler::equal, TRUE_LABEL);
// Need additional checks for arrays_equals.

View File

@ -750,9 +750,12 @@ class MacroAssembler: public Assembler {
void cmpklass(Address dst, Metadata* obj);
void cmpklass(Register dst, Metadata* obj);
void cmpoop(Address dst, jobject obj);
void cmpoop(Register dst, jobject obj);
#endif // _LP64
void cmpoop(Register src1, Register src2);
void cmpoop(Register src1, Address src2);
void cmpoop(Register dst, jobject obj);
// NOTE src2 must be the lval. This is NOT an mem-mem compare
void cmpptr(Address src1, AddressLiteral src2);

View File

@ -182,7 +182,7 @@ void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
sizeof(u2), /*is_signed*/ false);
// assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
Label L;
__ cmpptr(recv, __ argument_address(temp2, -1));
__ cmpoop(recv, __ argument_address(temp2, -1));
__ jcc(Assembler::equal, L);
__ movptr(rax, __ argument_address(temp2, -1));
__ STOP("receiver not on stack");

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -566,7 +566,7 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
// Support for intptr_t atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest)
// Support for intptr_t atomic::xchg_long(jlong exchange_value, volatile jlong* dest)
//
// Arguments :
// c_rarg0: exchange_value
@ -574,8 +574,8 @@ class StubGenerator: public StubCodeGenerator {
//
// Result:
// *dest <- ex, return (orig *dest)
address generate_atomic_xchg_ptr() {
StubCodeMark mark(this, "StubRoutines", "atomic_xchg_ptr");
address generate_atomic_xchg_long() {
StubCodeMark mark(this, "StubRoutines", "atomic_xchg_long");
address start = __ pc();
__ movptr(rax, c_rarg0); // Copy to eax we need a return value anyhow
@ -4998,7 +4998,7 @@ class StubGenerator: public StubCodeGenerator {
// atomic calls
StubRoutines::_atomic_xchg_entry = generate_atomic_xchg();
StubRoutines::_atomic_xchg_ptr_entry = generate_atomic_xchg_ptr();
StubRoutines::_atomic_xchg_long_entry = generate_atomic_xchg_long();
StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg();
StubRoutines::_atomic_cmpxchg_byte_entry = generate_atomic_cmpxchg_byte();
StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long();

View File

@ -2315,7 +2315,7 @@ void TemplateTable::if_acmp(Condition cc) {
// assume branch is more often taken than not (loops use backward branches)
Label not_taken;
__ pop_ptr(rdx);
__ cmpptr(rdx, rax);
__ cmpoop(rdx, rax);
__ jcc(j_not(cc), not_taken);
branch(false, false);
__ bind(not_taken);
@ -2563,6 +2563,13 @@ void TemplateTable::_return(TosState state) {
__ bind(skip_register_finalizer);
}
// Explicitly reset last_sp, for handling special case in TemplateInterpreter::deopt_reexecute_entry
#ifdef ASSERT
if (state == vtos) {
__ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
}
#endif
// Narrow result if state is itos but result type is smaller.
// Need to narrow in the return bytecode rather than in generate_return_entry
// since compiled code callers expect the result to already be narrowed.
@ -2665,6 +2672,7 @@ void TemplateTable::load_field_cp_cache_entry(Register obj,
ConstantPoolCacheEntry::f1_offset())));
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ movptr(obj, Address(obj, mirror_offset));
__ resolve_oop_handle(obj);
}
}

View File

@ -46,7 +46,7 @@ address VM_Version::_cpuinfo_segv_addr = 0;
address VM_Version::_cpuinfo_cont_addr = 0;
static BufferBlob* stub_blob;
static const int stub_size = 1000;
static const int stub_size = 1100;
extern "C" {
typedef void (*get_cpu_info_stub_t)(void*);
@ -70,7 +70,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done, wrapup;
Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
@ -267,14 +267,30 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
__ jcc(Assembler::belowEqual, done);
__ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
__ jccb(Assembler::belowEqual, ext_cpuid1);
__ jcc(Assembler::belowEqual, ext_cpuid1);
__ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported?
__ jccb(Assembler::belowEqual, ext_cpuid5);
__ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
__ jccb(Assembler::belowEqual, ext_cpuid7);
__ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported?
__ jccb(Assembler::belowEqual, ext_cpuid8);
__ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported?
__ jccb(Assembler::below, ext_cpuid8);
//
// Extended cpuid(0x8000001E)
//
__ movl(rax, 0x8000001E);
__ cpuid();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
__ movl(Address(rsi, 0), rax);
__ movl(Address(rsi, 4), rbx);
__ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi,12), rdx);
//
// Extended cpuid(0x80000008)
//
__ bind(ext_cpuid8);
__ movl(rax, 0x80000008);
__ cpuid();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
@ -1109,11 +1125,27 @@ void VM_Version::get_processor_features() {
}
#ifdef COMPILER2
if (MaxVectorSize > 16) {
// Limit vectors size to 16 bytes on current AMD cpus.
if (cpu_family() < 0x17 && MaxVectorSize > 16) {
// Limit vectors size to 16 bytes on AMD cpus < 17h.
FLAG_SET_DEFAULT(MaxVectorSize, 16);
}
#endif // COMPILER2
// Some defaults for AMD family 17h
if ( cpu_family() == 0x17 ) {
// On family 17h processors use XMM and UnalignedLoadStores for Array Copy
if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
}
if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
}
#ifdef COMPILER2
if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
FLAG_SET_DEFAULT(UseFPUForSpilling, true);
}
#endif
}
}
if( is_intel() ) { // Intel cpus specific settings

View File

@ -228,6 +228,15 @@ class VM_Version : public Abstract_VM_Version {
} bits;
};
union ExtCpuid1EEbx {
uint32_t value;
struct {
uint32_t : 8,
threads_per_core : 8,
: 16;
} bits;
};
union XemXcr0Eax {
uint32_t value;
struct {
@ -398,6 +407,12 @@ protected:
ExtCpuid8Ecx ext_cpuid8_ecx;
uint32_t ext_cpuid8_edx; // reserved
// cpuid function 0x8000001E // AMD 17h
uint32_t ext_cpuid1E_eax;
ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
uint32_t ext_cpuid1E_ecx;
uint32_t ext_cpuid1E_edx; // unused currently
// extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
XemXcr0Eax xem_xcr0_eax;
uint32_t xem_xcr0_edx; // reserved
@ -505,6 +520,14 @@ protected:
result |= CPU_CLMUL;
if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
result |= CPU_RTM;
if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
result |= CPU_ADX;
if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
result |= CPU_BMI2;
if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
result |= CPU_SHA;
if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
result |= CPU_FMA;
// AMD features.
if (is_amd()) {
@ -518,16 +541,8 @@ protected:
}
// Intel features.
if(is_intel()) {
if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
result |= CPU_ADX;
if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
result |= CPU_BMI2;
if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
result |= CPU_SHA;
if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
result |= CPU_LZCNT;
if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
result |= CPU_FMA;
// for Intel, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
result |= CPU_3DNOW_PREFETCH;
@ -590,6 +605,7 @@ public:
static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
static ByteSize ext_cpuid1E_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
@ -673,9 +689,13 @@ public:
if (is_intel() && supports_processor_topology()) {
result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
} else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
if (cpu_family() >= 0x17) {
result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
} else {
result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
cores_per_cpu();
}
}
return (result == 0 ? 1 : result);
}

View File

@ -50,9 +50,6 @@
#include "stack_zero.inline.hpp"
#include "utilities/debug.hpp"
#include "utilities/macros.hpp"
#ifdef SHARK
#include "shark/shark_globals.hpp"
#endif
#ifdef CC_INTERP
@ -276,7 +273,7 @@ int CppInterpreter::native_entry(Method* method, intptr_t UNUSED, TRAPS) {
markOop disp = lockee->mark()->set_unlocked();
monitor->lock()->set_displaced_header(disp);
if (Atomic::cmpxchg_ptr(monitor, lockee->mark_addr(), disp) != disp) {
if (Atomic::cmpxchg((markOop)monitor, lockee->mark_addr(), disp) != disp) {
if (thread->is_lock_owned((address) disp->clear_lock_bits())) {
monitor->lock()->set_displaced_header(NULL);
}
@ -420,7 +417,8 @@ int CppInterpreter::native_entry(Method* method, intptr_t UNUSED, TRAPS) {
monitor->set_obj(NULL);
if (header != NULL) {
if (Atomic::cmpxchg_ptr(header, rcvr->mark_addr(), lock) != lock) {
markOop old_header = markOopDesc::encode(lock);
if (rcvr->cas_set_mark(header, old_header) != old_header) {
monitor->set_obj(rcvr); {
HandleMark hm(thread);
CALL_VM_NOCHECK(InterpreterRuntime::monitorexit(thread, monitor));

Some files were not shown because too many files have changed in this diff Show More