diff --git a/doc/building.html b/doc/building.html
index 11bac88d460..357b0f5fb4b 100644
--- a/doc/building.html
+++ b/doc/building.html
@@ -463,7 +463,7 @@ tar -xzf freetype-2.5.3.tar.gz
--with-native-debug-symbols=<method>
- Specify if and how native debug symbols should be built. Available methods are none
, internal
, external
, zipped
. Default behavior depends on platform. See Native Debug Symbols for more details.
--with-version-string=<string>
- Specify the version string this build will be identified with.
--with-version-<part>=<value>
- A group of options, where <part>
can be any of pre
, opt
, build
, major
, minor
, security
or patch
. Use these options to modify just the corresponding part of the version string from the default, or the value provided by --with-version-string
.
---with-jvm-variants=<variant>[,<variant>...]
- Build the specified variant (or variants) of Hotspot. Valid variants are: server
, client
, minimal
, core
, zero
, zeroshark
, custom
. Note that not all variants are possible to combine in a single build.
+--with-jvm-variants=<variant>[,<variant>...]
- Build the specified variant (or variants) of Hotspot. Valid variants are: server
, client
, minimal
, core
, zero
, custom
. Note that not all variants are possible to combine in a single build.
--with-jvm-features=<feature>[,<feature>...]
- Use the specified JVM features when building Hotspot. The list of features will be enabled on top of the default list. For the custom
JVM variant, this default list is empty. A complete list of available JVM features can be found using bash configure --help
.
--with-target-bits=<bits>
- Create a target binary suitable for running on a <bits>
platform. Use this to create 32-bit output on a 64-bit build platform, instead of doing a full cross-compile. (This is known as a reduced build.)
diff --git a/doc/building.md b/doc/building.md
index 75827aa2b17..878bdf29771 100644
--- a/doc/building.md
+++ b/doc/building.md
@@ -668,7 +668,7 @@ features, use `bash configure --help=short` instead.)
from the default, or the value provided by `--with-version-string`.
* `--with-jvm-variants=[,...]` - Build the specified variant
(or variants) of Hotspot. Valid variants are: `server`, `client`,
- `minimal`, `core`, `zero`, `zeroshark`, `custom`. Note that not all
+ `minimal`, `core`, `zero`, `custom`. Note that not all
variants are possible to combine in a single build.
* `--with-jvm-features=[,...]` - Use the specified JVM
features when building Hotspot. The list of features will be enabled on top
diff --git a/make/autoconf/flags.m4 b/make/autoconf/flags.m4
index 3e3770208e1..89bb736f2bf 100644
--- a/make/autoconf/flags.m4
+++ b/make/autoconf/flags.m4
@@ -1097,7 +1097,7 @@ AC_DEFUN([FLAGS_SETUP_COMPILER_FLAGS_FOR_JDK_HELPER],
]
)
fi
- if ! HOTSPOT_CHECK_JVM_VARIANT(zero) && ! HOTSPOT_CHECK_JVM_VARIANT(zeroshark); then
+ if ! HOTSPOT_CHECK_JVM_VARIANT(zero); then
# Non-zero builds have stricter warnings
$2JVM_CFLAGS="[$]$2JVM_CFLAGS -Wreturn-type -Wundef -Wformat=2"
else
diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4
index 487dbde68ac..3de1175c961 100644
--- a/make/autoconf/hotspot.m4
+++ b/make/autoconf/hotspot.m4
@@ -24,12 +24,12 @@
#
# All valid JVM features, regardless of platform
-VALID_JVM_FEATURES="compiler1 compiler2 zero shark minimal dtrace jvmti jvmci \
+VALID_JVM_FEATURES="compiler1 compiler2 zero minimal dtrace jvmti jvmci \
graal vm-structs jni-check services management all-gcs nmt cds \
static-build link-time-opt aot"
# All valid JVM variants
-VALID_JVM_VARIANTS="server client minimal core zero zeroshark custom"
+VALID_JVM_VARIANTS="server client minimal core zero custom"
###############################################################################
# Check if the specified JVM variant should be built. To be used in shell if
@@ -62,13 +62,12 @@ AC_DEFUN([HOTSPOT_CHECK_JVM_FEATURE],
# minimal: reduced form of client with optional features stripped out
# core: normal interpreter only, no compiler
# zero: C++ based interpreter only, no compiler
-# zeroshark: C++ based interpreter, and a llvm-based compiler
# custom: baseline JVM with no default features
#
AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_VARIANTS],
[
AC_ARG_WITH([jvm-variants], [AS_HELP_STRING([--with-jvm-variants],
- [JVM variants (separated by commas) to build (server,client,minimal,core,zero,zeroshark,custom) @<:@server@:>@])])
+ [JVM variants (separated by commas) to build (server,client,minimal,core,zero,custom) @<:@server@:>@])])
SETUP_HOTSPOT_TARGET_CPU_PORT
@@ -132,7 +131,7 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_VARIANTS],
AC_SUBST(VALID_JVM_VARIANTS)
AC_SUBST(JVM_VARIANT_MAIN)
- if HOTSPOT_CHECK_JVM_VARIANT(zero) || HOTSPOT_CHECK_JVM_VARIANT(zeroshark); then
+ if HOTSPOT_CHECK_JVM_VARIANT(zero); then
# zero behaves as a platform and rewrites these values. This is really weird. :(
# We are guaranteed that we do not build any other variants when building zero.
HOTSPOT_TARGET_CPU=zero
@@ -325,15 +324,9 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
fi
fi
- if ! HOTSPOT_CHECK_JVM_VARIANT(zero) && ! HOTSPOT_CHECK_JVM_VARIANT(zeroshark); then
+ if ! HOTSPOT_CHECK_JVM_VARIANT(zero); then
if HOTSPOT_CHECK_JVM_FEATURE(zero); then
- AC_MSG_ERROR([To enable zero/zeroshark, you must use --with-jvm-variants=zero/zeroshark])
- fi
- fi
-
- if ! HOTSPOT_CHECK_JVM_VARIANT(zeroshark); then
- if HOTSPOT_CHECK_JVM_FEATURE(shark); then
- AC_MSG_ERROR([To enable shark, you must use --with-jvm-variants=zeroshark])
+ AC_MSG_ERROR([To enable zero, you must use --with-jvm-variants=zero])
fi
fi
@@ -408,7 +401,6 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
JVM_FEATURES_core="$NON_MINIMAL_FEATURES $JVM_FEATURES"
JVM_FEATURES_minimal="compiler1 minimal $JVM_FEATURES $JVM_FEATURES_link_time_opt"
JVM_FEATURES_zero="zero $NON_MINIMAL_FEATURES $JVM_FEATURES"
- JVM_FEATURES_zeroshark="zero shark $NON_MINIMAL_FEATURES $JVM_FEATURES"
JVM_FEATURES_custom="$JVM_FEATURES"
AC_SUBST(JVM_FEATURES_server)
@@ -416,7 +408,6 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
AC_SUBST(JVM_FEATURES_core)
AC_SUBST(JVM_FEATURES_minimal)
AC_SUBST(JVM_FEATURES_zero)
- AC_SUBST(JVM_FEATURES_zeroshark)
AC_SUBST(JVM_FEATURES_custom)
# Used for verification of Makefiles by check-jvm-feature
@@ -437,7 +428,6 @@ AC_DEFUN_ONCE([HOTSPOT_VALIDATE_JVM_FEATURES],
JVM_FEATURES_core="$($ECHO $($PRINTF '%s\n' $JVM_FEATURES_core | $SORT -u))"
JVM_FEATURES_minimal="$($ECHO $($PRINTF '%s\n' $JVM_FEATURES_minimal | $SORT -u))"
JVM_FEATURES_zero="$($ECHO $($PRINTF '%s\n' $JVM_FEATURES_zero | $SORT -u))"
- JVM_FEATURES_zeroshark="$($ECHO $($PRINTF '%s\n' $JVM_FEATURES_zeroshark | $SORT -u))"
JVM_FEATURES_custom="$($ECHO $($PRINTF '%s\n' $JVM_FEATURES_custom | $SORT -u))"
# Validate features
diff --git a/make/autoconf/jdk-options.m4 b/make/autoconf/jdk-options.m4
index fb4849e70b0..30215ee372a 100644
--- a/make/autoconf/jdk-options.m4
+++ b/make/autoconf/jdk-options.m4
@@ -232,7 +232,7 @@ AC_DEFUN_ONCE([JDKOPT_SETUP_JDK_OPTIONS],
# Should we build the serviceability agent (SA)?
INCLUDE_SA=true
- if HOTSPOT_CHECK_JVM_VARIANT(zero) || HOTSPOT_CHECK_JVM_VARIANT(zeroshark); then
+ if HOTSPOT_CHECK_JVM_VARIANT(zero); then
INCLUDE_SA=false
fi
if test "x$OPENJDK_TARGET_OS" = xaix ; then
diff --git a/make/autoconf/lib-std.m4 b/make/autoconf/lib-std.m4
index ceb8a45ca89..0b875bc7034 100644
--- a/make/autoconf/lib-std.m4
+++ b/make/autoconf/lib-std.m4
@@ -65,8 +65,7 @@ AC_DEFUN_ONCE([LIB_SETUP_STD_LIBS],
# If dynamic was requested, it's available since it would fail above otherwise.
# If dynamic wasn't requested, go with static unless it isn't available.
AC_MSG_CHECKING([how to link with libstdc++])
- if test "x$with_stdc__lib" = xdynamic || test "x$has_static_libstdcxx" = xno \
- || HOTSPOT_CHECK_JVM_VARIANT(zeroshark); then
+ if test "x$with_stdc__lib" = xdynamic || test "x$has_static_libstdcxx" = xno ; then
AC_MSG_RESULT([dynamic])
else
LIBCXX="$LIBCXX $STATIC_STDCXX_FLAGS"
diff --git a/make/autoconf/libraries.m4 b/make/autoconf/libraries.m4
index 98d202c589c..7d10ea4dd7f 100644
--- a/make/autoconf/libraries.m4
+++ b/make/autoconf/libraries.m4
@@ -79,7 +79,7 @@ AC_DEFUN_ONCE([LIB_DETERMINE_DEPENDENCIES],
fi
# Check if ffi is needed
- if HOTSPOT_CHECK_JVM_VARIANT(zero) || HOTSPOT_CHECK_JVM_VARIANT(zeroshark); then
+ if HOTSPOT_CHECK_JVM_VARIANT(zero); then
NEEDS_LIB_FFI=true
else
NEEDS_LIB_FFI=false
@@ -98,69 +98,11 @@ AC_DEFUN_ONCE([LIB_SETUP_LIBRARIES],
LIB_SETUP_FREETYPE
LIB_SETUP_ALSA
LIB_SETUP_LIBFFI
- LIB_SETUP_LLVM
LIB_SETUP_BUNDLED_LIBS
LIB_SETUP_MISC_LIBS
LIB_SETUP_SOLARIS_STLPORT
])
-################################################################################
-# Setup llvm (Low-Level VM)
-################################################################################
-AC_DEFUN_ONCE([LIB_SETUP_LLVM],
-[
- if HOTSPOT_CHECK_JVM_VARIANT(zeroshark); then
- AC_CHECK_PROG([LLVM_CONFIG], [llvm-config], [llvm-config])
-
- if test "x$LLVM_CONFIG" != xllvm-config; then
- AC_MSG_ERROR([llvm-config not found in $PATH.])
- fi
-
- llvm_components="jit mcjit engine nativecodegen native"
- unset LLVM_CFLAGS
- for flag in $("$LLVM_CONFIG" --cxxflags); do
- if echo "${flag}" | grep -q '^-@<:@ID@:>@'; then
- if test "${flag}" != "-D_DEBUG" ; then
- if test "${LLVM_CFLAGS}" != "" ; then
- LLVM_CFLAGS="${LLVM_CFLAGS} "
- fi
- LLVM_CFLAGS="${LLVM_CFLAGS}${flag}"
- fi
- fi
- done
- llvm_version=$("${LLVM_CONFIG}" --version | $SED 's/\.//; s/svn.*//')
- LLVM_CFLAGS="${LLVM_CFLAGS} -DSHARK_LLVM_VERSION=${llvm_version}"
-
- unset LLVM_LDFLAGS
- for flag in $("${LLVM_CONFIG}" --ldflags); do
- if echo "${flag}" | grep -q '^-L'; then
- if test "${LLVM_LDFLAGS}" != ""; then
- LLVM_LDFLAGS="${LLVM_LDFLAGS} "
- fi
- LLVM_LDFLAGS="${LLVM_LDFLAGS}${flag}"
- fi
- done
-
- unset LLVM_LIBS
- for flag in $("${LLVM_CONFIG}" --libs ${llvm_components}); do
- if echo "${flag}" | grep -q '^-l'; then
- if test "${LLVM_LIBS}" != ""; then
- LLVM_LIBS="${LLVM_LIBS} "
- fi
- LLVM_LIBS="${LLVM_LIBS}${flag}"
- fi
- done
-
- # Due to https://llvm.org/bugs/show_bug.cgi?id=16902, llvm does not
- # always properly detect -ltinfo
- LLVM_LIBS="${LLVM_LIBS} -ltinfo"
-
- AC_SUBST(LLVM_CFLAGS)
- AC_SUBST(LLVM_LDFLAGS)
- AC_SUBST(LLVM_LIBS)
- fi
-])
-
################################################################################
# Setup various libraries, typically small system libraries
################################################################################
diff --git a/make/autoconf/spec.gmk.in b/make/autoconf/spec.gmk.in
index 06deccd8e31..ac590a32bce 100644
--- a/make/autoconf/spec.gmk.in
+++ b/make/autoconf/spec.gmk.in
@@ -219,7 +219,6 @@ JVM_FEATURES_client := @JVM_FEATURES_client@
JVM_FEATURES_core := @JVM_FEATURES_core@
JVM_FEATURES_minimal := @JVM_FEATURES_minimal@
JVM_FEATURES_zero := @JVM_FEATURES_zero@
-JVM_FEATURES_zeroshark := @JVM_FEATURES_zeroshark@
JVM_FEATURES_custom := @JVM_FEATURES_custom@
# Used for make-time verifications
@@ -403,11 +402,6 @@ JVM_ASFLAGS := @JVM_ASFLAGS@
JVM_LIBS := @JVM_LIBS@
JVM_RCFLAGS := @JVM_RCFLAGS@
-# Flags for zeroshark
-LLVM_CFLAGS := @LLVM_CFLAGS@
-LLVM_LIBS := @LLVM_LIBS@
-LLVM_LDFLAGS := @LLVM_LDFLAGS@
-
# These flags might contain variables set by a custom extension that is included later.
EXTRA_CFLAGS = @EXTRA_CFLAGS@
EXTRA_CXXFLAGS = @EXTRA_CXXFLAGS@
diff --git a/make/common/Modules.gmk b/make/common/Modules.gmk
index 04cafcd115c..cdbb01b0761 100644
--- a/make/common/Modules.gmk
+++ b/make/common/Modules.gmk
@@ -113,6 +113,7 @@ PLATFORM_MODULES += \
jdk.dynalink \
jdk.httpserver \
jdk.incubator.httpclient \
+ jdk.internal.vm.compiler.management \
jdk.jsobject \
jdk.localedata \
jdk.naming.dns \
@@ -215,6 +216,7 @@ endif
ifeq ($(INCLUDE_GRAAL), false)
MODULES_FILTER += jdk.internal.vm.compiler
+ MODULES_FILTER += jdk.internal.vm.compiler.management
endif
################################################################################
diff --git a/make/conf/jib-profiles.js b/make/conf/jib-profiles.js
index 1ef8a3beb3d..e5dd458f806 100644
--- a/make/conf/jib-profiles.js
+++ b/make/conf/jib-profiles.js
@@ -1060,7 +1060,7 @@ var getJibProfilesDependencies = function (input, common) {
jtreg: {
server: "javare",
revision: "4.2",
- build_number: "b08",
+ build_number: "b09",
checksum_file: "MD5_VALUES",
file: "jtreg_bin-4.2.zip",
environment_name: "JT_HOME",
diff --git a/make/copy/Copy-java.base.gmk b/make/copy/Copy-java.base.gmk
index 7cbe140c25b..89d7d791c06 100644
--- a/make/copy/Copy-java.base.gmk
+++ b/make/copy/Copy-java.base.gmk
@@ -87,7 +87,7 @@ endif
#
# How to install jvm.cfg.
#
-ifeq ($(call check-jvm-variant, zero zeroshark), true)
+ifeq ($(call check-jvm-variant, zero), true)
JVMCFG_ARCH := zero
else
JVMCFG_ARCH := $(OPENJDK_TARGET_CPU_LEGACY)
@@ -102,8 +102,6 @@ else
endif
JVMCFG := $(LIB_DST_DIR)/jvm.cfg
-# To do: should this also support -zeroshark?
-
ifeq ($(OPENJDK_TARGET_CPU_BITS), 64)
COPY_JVM_CFG_FILE := true
else
@@ -120,7 +118,7 @@ else
COPY_JVM_CFG_FILE := true
else
# For zero, the default jvm.cfg file is sufficient
- ifeq ($(call check-jvm-variant, zero zeroshark), true)
+ ifeq ($(call check-jvm-variant, zero), true)
COPY_JVM_CFG_FILE := true
endif
endif
diff --git a/make/gensrc/GensrcModuleLoaderMap.gmk b/make/gensrc/GensrcModuleLoaderMap.gmk
index 5d4adeeccba..86d4446496a 100644
--- a/make/gensrc/GensrcModuleLoaderMap.gmk
+++ b/make/gensrc/GensrcModuleLoaderMap.gmk
@@ -54,15 +54,4 @@ $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/module/ModuleLoaderMap.java:
GENSRC_JAVA_BASE += $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/module/ModuleLoaderMap.java
-$(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat: \
- $(TOPDIR)/src/java.base/share/classes/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat \
- $(VARDEPS_FILE) $(BUILD_TOOLS_JDK)
- $(MKDIR) -p $(@D)
- $(RM) $@ $@.tmp
- $(TOOL_GENCLASSLOADERMAP) -boot $(BOOT_MODULES_LIST) \
- -platform $(PLATFORM_MODULES_LIST) -o $@.tmp $<
- $(MV) $@.tmp $@
-
-GENSRC_JAVA_BASE += $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat
-
################################################################################
diff --git a/make/hotspot/ide/CreateVSProject.gmk b/make/hotspot/ide/CreateVSProject.gmk
index 1ba83b21a28..2040e3a1d37 100644
--- a/make/hotspot/ide/CreateVSProject.gmk
+++ b/make/hotspot/ide/CreateVSProject.gmk
@@ -75,7 +75,6 @@ ifeq ($(OPENJDK_TARGET_OS), windows)
-ignorePath linux \
-ignorePath posix \
-ignorePath ppc \
- -ignorePath shark \
-ignorePath solaris \
-ignorePath sparc \
-ignorePath x86_32 \
diff --git a/make/hotspot/lib/CompileJvm.gmk b/make/hotspot/lib/CompileJvm.gmk
index bf4280b8dea..5c7d7177420 100644
--- a/make/hotspot/lib/CompileJvm.gmk
+++ b/make/hotspot/lib/CompileJvm.gmk
@@ -58,6 +58,7 @@ JVM_CFLAGS_INCLUDES += \
-I$(JVM_VARIANT_OUTPUTDIR)/gensrc \
-I$(TOPDIR)/src/hotspot/share/precompiled \
-I$(TOPDIR)/src/hotspot/share/prims \
+ -I$(TOPDIR)/src/java.base/share/native/include \
#
# INCLUDE_SUFFIX_* is only meant for including the proper
diff --git a/make/hotspot/lib/JvmFeatures.gmk b/make/hotspot/lib/JvmFeatures.gmk
index 9a69c5330bf..348e645d966 100644
--- a/make/hotspot/lib/JvmFeatures.gmk
+++ b/make/hotspot/lib/JvmFeatures.gmk
@@ -47,14 +47,9 @@ endif
ifeq ($(call check-jvm-feature, zero), true)
JVM_CFLAGS_FEATURES += -DZERO -DCC_INTERP -DZERO_LIBARCH='"$(OPENJDK_TARGET_CPU_LEGACY_LIB)"' $(LIBFFI_CFLAGS)
JVM_LIBS_FEATURES += $(LIBFFI_LIBS)
-endif
-
-ifeq ($(call check-jvm-feature, shark), true)
- JVM_CFLAGS_FEATURES += -DSHARK $(LLVM_CFLAGS)
- JVM_LDFLAGS_FEATURES += $(LLVM_LDFLAGS)
- JVM_LIBS_FEATURES += $(LLVM_LIBS)
-else
- JVM_EXCLUDES += shark
+ ifeq ($(OPENJDK_TARGET_CPU), sparcv9)
+ BUILD_LIBJVM_EXTRA_FILES := $(TOPDIR)/src/hotspot/cpu/sparc/memset_with_concurrent_readers_sparc.cpp
+ endif
endif
ifeq ($(call check-jvm-feature, minimal), true)
@@ -129,6 +124,7 @@ ifneq ($(call check-jvm-feature, all-gcs), true)
cms/ g1/ parallel/
JVM_EXCLUDE_FILES += \
concurrentGCThread.cpp \
+ suspendibleThreadSet.cpp \
plab.cpp
JVM_EXCLUDE_FILES += \
g1MemoryPool.cpp \
diff --git a/make/jdk/src/classes/build/tools/module/GenModuleLoaderMap.java b/make/jdk/src/classes/build/tools/module/GenModuleLoaderMap.java
index 6719277fac1..c6c90e0fb59 100644
--- a/make/jdk/src/classes/build/tools/module/GenModuleLoaderMap.java
+++ b/make/jdk/src/classes/build/tools/module/GenModuleLoaderMap.java
@@ -77,30 +77,22 @@ public class GenModuleLoaderMap {
throw new IllegalArgumentException(source + " not exist");
}
- boolean needsQuotes = outfile.toString().contains(".java.tmp");
-
try (BufferedWriter bw = Files.newBufferedWriter(outfile, StandardCharsets.UTF_8);
PrintWriter writer = new PrintWriter(bw)) {
for (String line : Files.readAllLines(source)) {
if (line.contains("@@BOOT_MODULE_NAMES@@")) {
- line = patch(line, "@@BOOT_MODULE_NAMES@@", bootModules, needsQuotes);
+ line = patch(line, "@@BOOT_MODULE_NAMES@@", bootModules);
} else if (line.contains("@@PLATFORM_MODULE_NAMES@@")) {
- line = patch(line, "@@PLATFORM_MODULE_NAMES@@", platformModules, needsQuotes);
+ line = patch(line, "@@PLATFORM_MODULE_NAMES@@", platformModules);
}
writer.println(line);
}
}
}
- private static String patch(String s, String tag, Stream stream, boolean needsQuotes) {
- String mns = null;
- if (needsQuotes) {
- mns = stream.sorted()
- .collect(Collectors.joining("\",\n \""));
- } else {
- mns = stream.sorted()
- .collect(Collectors.joining("\n"));
- }
+ private static String patch(String s, String tag, Stream stream) {
+ String mns = stream.sorted()
+ .collect(Collectors.joining("\",\n \""));
return s.replace(tag, mns);
}
diff --git a/make/lib/CoreLibraries.gmk b/make/lib/CoreLibraries.gmk
index f8e0983a00d..090e425d703 100644
--- a/make/lib/CoreLibraries.gmk
+++ b/make/lib/CoreLibraries.gmk
@@ -300,7 +300,7 @@ LIBJLI_SRC_DIRS := $(call FindSrcDirsForLib, java.base, jli)
LIBJLI_CFLAGS := $(CFLAGS_JDKLIB)
-ifeq ($(call check-jvm-variant, zero zeroshark), true)
+ifeq ($(call check-jvm-variant, zero), true)
ERGO_FAMILY := zero
else
ifeq ($(OPENJDK_TARGET_CPU_ARCH), x86)
diff --git a/make/test/JtregNativeHotspot.gmk b/make/test/JtregNativeHotspot.gmk
index dfc45a7a86c..504c3b2d9b1 100644
--- a/make/test/JtregNativeHotspot.gmk
+++ b/make/test/JtregNativeHotspot.gmk
@@ -50,6 +50,7 @@ BUILD_HOTSPOT_JTREG_NATIVE_SRC += \
$(TOPDIR)/test/hotspot/jtreg/runtime/jni/8025979 \
$(TOPDIR)/test/hotspot/jtreg/runtime/jni/8033445 \
$(TOPDIR)/test/hotspot/jtreg/runtime/jni/checked \
+ $(TOPDIR)/test/hotspot/jtreg/runtime/jni/FindClass \
$(TOPDIR)/test/hotspot/jtreg/runtime/jni/PrivateInterfaceMethods \
$(TOPDIR)/test/hotspot/jtreg/runtime/jni/ToStringInInterfaceTest \
$(TOPDIR)/test/hotspot/jtreg/runtime/jni/CalleeSavedRegisters \
@@ -59,6 +60,7 @@ BUILD_HOTSPOT_JTREG_NATIVE_SRC += \
$(TOPDIR)/test/hotspot/jtreg/runtime/SameObject \
$(TOPDIR)/test/hotspot/jtreg/runtime/BoolReturn \
$(TOPDIR)/test/hotspot/jtreg/runtime/noClassDefFoundMsg \
+ $(TOPDIR)/test/hotspot/jtreg/runtime/RedefineTests \
$(TOPDIR)/test/hotspot/jtreg/compiler/floatingpoint/ \
$(TOPDIR)/test/hotspot/jtreg/compiler/calls \
$(TOPDIR)/test/hotspot/jtreg/serviceability/jvmti/GetOwnedMonitorInfo \
@@ -103,6 +105,7 @@ ifeq ($(TOOLCHAIN_TYPE), solstudio)
BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libMAAClassLoadPrepare := -lc
BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libMAAThreadStart := -lc
BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libAllowedFunctions := -lc
+ BUILD_HOTSPOT_JTREG_LIBRARIES_LIBS_libRedefineDoubleDelete := -lc
endif
ifeq ($(OPENJDK_TARGET_OS), linux)
diff --git a/src/hotspot/.mx.jvmci/hotspot/templates/eclipse/cproject b/src/hotspot/.mx.jvmci/hotspot/templates/eclipse/cproject
index b156340e98b..6f2353059e7 100644
--- a/src/hotspot/.mx.jvmci/hotspot/templates/eclipse/cproject
+++ b/src/hotspot/.mx.jvmci/hotspot/templates/eclipse/cproject
@@ -70,7 +70,7 @@
-
+
diff --git a/src/hotspot/.mx.jvmci/mx_jvmci.py b/src/hotspot/.mx.jvmci/mx_jvmci.py
index 37a9baff97e..5b582b3d45d 100644
--- a/src/hotspot/.mx.jvmci/mx_jvmci.py
+++ b/src/hotspot/.mx.jvmci/mx_jvmci.py
@@ -256,14 +256,10 @@ class HotSpotProject(mx.NativeProject):
"""
roots = [
- 'ASSEMBLY_EXCEPTION',
- 'LICENSE',
- 'README',
- 'THIRD_PARTY_README',
- 'agent',
- 'make',
- 'src',
- 'test'
+ 'cpu',
+ 'os',
+ 'os_cpu',
+ 'share'
]
for jvmVariant in _jdkJvmVariants:
@@ -605,6 +601,16 @@ def _get_openjdk_cpu():
def _get_openjdk_os_cpu():
return _get_openjdk_os() + '-' + _get_openjdk_cpu()
+def _get_jdk_dir():
+ suiteParentDir = dirname(_suite.dir)
+ # suitParentDir is now something like: /some_prefix/jdk10-hs/open/src
+ pathComponents = suiteParentDir.split(os.sep)
+ for i in range(0, len(pathComponents)):
+ if pathComponents[i] in ["open", "src"]:
+ del pathComponents[i:]
+ break
+ return os.path.join(os.sep, *pathComponents)
+
def _get_jdk_build_dir(debugLevel=None):
"""
Gets the directory into which the JDK is built. This directory contains
@@ -613,7 +619,7 @@ def _get_jdk_build_dir(debugLevel=None):
if debugLevel is None:
debugLevel = _vm.debugLevel
name = '{}-{}-{}-{}'.format(_get_openjdk_os_cpu(), 'normal', _vm.jvmVariant, debugLevel)
- return join(dirname(_suite.dir), 'build', name)
+ return join(_get_jdk_dir(), 'build', name)
_jvmci_bootclasspath_prepends = []
diff --git a/src/hotspot/.mx.jvmci/suite.py b/src/hotspot/.mx.jvmci/suite.py
index 9415623d92b..edea9f2d82e 100644
--- a/src/hotspot/.mx.jvmci/suite.py
+++ b/src/hotspot/.mx.jvmci/suite.py
@@ -24,9 +24,7 @@ suite = {
"defaultLicense" : "GPLv2-CPE",
- # This puts mx/ as a sibling of the JDK build configuration directories
- # (e.g., macosx-x86_64-normal-server-release).
- "outputRoot" : "../build/mx/hotspot",
+ "outputRoot" : "../../build/mx/hotspot",
# ------------- Libraries -------------
@@ -43,7 +41,7 @@ suite = {
# ------------- JVMCI:Service -------------
"jdk.vm.ci.services" : {
- "subDir" : "src/jdk.internal.vm.ci/share/classes",
+ "subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"javaCompliance" : "9",
"workingSets" : "API,JVMCI",
@@ -52,7 +50,7 @@ suite = {
# ------------- JVMCI:API -------------
"jdk.vm.ci.common" : {
- "subDir" : "src/jdk.internal.vm.ci/share/classes",
+ "subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"checkstyle" : "jdk.vm.ci.services",
"javaCompliance" : "9",
@@ -60,7 +58,7 @@ suite = {
},
"jdk.vm.ci.meta" : {
- "subDir" : "src/jdk.internal.vm.ci/share/classes",
+ "subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"checkstyle" : "jdk.vm.ci.services",
"javaCompliance" : "9",
@@ -68,7 +66,7 @@ suite = {
},
"jdk.vm.ci.code" : {
- "subDir" : "src/jdk.internal.vm.ci/share/classes",
+ "subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : ["jdk.vm.ci.meta"],
"checkstyle" : "jdk.vm.ci.services",
@@ -77,7 +75,7 @@ suite = {
},
"jdk.vm.ci.code.test" : {
- "subDir" : "test/compiler/jvmci",
+ "subDir" : "../../test/hotspot/jtreg/compiler/jvmci",
"sourceDirs" : ["src"],
"dependencies" : [
"mx:JUNIT",
@@ -92,7 +90,7 @@ suite = {
},
"jdk.vm.ci.runtime" : {
- "subDir" : "src/jdk.internal.vm.ci/share/classes",
+ "subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : [
"jdk.vm.ci.code",
@@ -104,7 +102,7 @@ suite = {
},
"jdk.vm.ci.runtime.test" : {
- "subDir" : "test/compiler/jvmci",
+ "subDir" : "../../test/hotspot/jtreg/compiler/jvmci",
"sourceDirs" : ["src"],
"dependencies" : [
"mx:JUNIT",
@@ -119,7 +117,7 @@ suite = {
# ------------- JVMCI:HotSpot -------------
"jdk.vm.ci.aarch64" : {
- "subDir" : "src/jdk.internal.vm.ci/share/classes",
+ "subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : ["jdk.vm.ci.code"],
"checkstyle" : "jdk.vm.ci.services",
@@ -128,7 +126,7 @@ suite = {
},
"jdk.vm.ci.amd64" : {
- "subDir" : "src/jdk.internal.vm.ci/share/classes",
+ "subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : ["jdk.vm.ci.code"],
"checkstyle" : "jdk.vm.ci.services",
@@ -137,7 +135,7 @@ suite = {
},
"jdk.vm.ci.sparc" : {
- "subDir" : "src/jdk.internal.vm.ci/share/classes",
+ "subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : ["jdk.vm.ci.code"],
"checkstyle" : "jdk.vm.ci.services",
@@ -146,7 +144,7 @@ suite = {
},
"jdk.vm.ci.hotspot" : {
- "subDir" : "src/jdk.internal.vm.ci/share/classes",
+ "subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : [
"jdk.vm.ci.common",
@@ -163,7 +161,7 @@ suite = {
},
"jdk.vm.ci.hotspot.test" : {
- "subDir" : "test/compiler/jvmci",
+ "subDir" : "../../test/hotspot/jtreg/compiler/jvmci",
"sourceDirs" : ["src"],
"dependencies" : [
"TESTNG",
@@ -175,7 +173,7 @@ suite = {
},
"jdk.vm.ci.hotspot.aarch64" : {
- "subDir" : "src/jdk.internal.vm.ci/share/classes",
+ "subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : [
"jdk.vm.ci.aarch64",
@@ -187,7 +185,7 @@ suite = {
},
"jdk.vm.ci.hotspot.amd64" : {
- "subDir" : "src/jdk.internal.vm.ci/share/classes",
+ "subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : [
"jdk.vm.ci.amd64",
@@ -199,7 +197,7 @@ suite = {
},
"jdk.vm.ci.hotspot.sparc" : {
- "subDir" : "src/jdk.internal.vm.ci/share/classes",
+ "subDir" : "../jdk.internal.vm.ci/share/classes",
"sourceDirs" : ["src"],
"dependencies" : [
"jdk.vm.ci.sparc",
@@ -221,12 +219,12 @@ suite = {
# ------------- Distributions -------------
"JVMCI_SERVICES" : {
- "subDir" : "src/jdk.internal.vm.ci/share/classes",
+ "subDir" : "../jdk.internal.vm.ci/share/classes",
"dependencies" : ["jdk.vm.ci.services"],
},
"JVMCI_API" : {
- "subDir" : "src/jdk.internal.vm.ci/share/classes",
+ "subDir" : "../jdk.internal.vm.ci/share/classes",
"dependencies" : [
"jdk.vm.ci.runtime",
"jdk.vm.ci.common",
@@ -240,7 +238,7 @@ suite = {
},
"JVMCI_HOTSPOT" : {
- "subDir" : "src/jdk.internal.vm.ci/share/classes",
+ "subDir" : "../jdk.internal.vm.ci/share/classes",
"dependencies" : [
"jdk.vm.ci.hotspot.aarch64",
"jdk.vm.ci.hotspot.amd64",
@@ -253,7 +251,7 @@ suite = {
},
"JVMCI_TEST" : {
- "subDir" : "test/compiler/jvmci",
+ "subDir" : "../../test/hotspot/jtreg/compiler/jvmci",
"dependencies" : [
"jdk.vm.ci.runtime.test",
],
diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
index e6d9de99685..b7ff274e212 100644
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
@@ -2575,13 +2575,9 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
Register mdo = op->mdo()->as_register();
__ mov_metadata(mdo, md->constant_encoding());
Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
- Bytecodes::Code bc = method->java_code_at_bci(bci);
- const bool callee_is_static = callee->is_loaded() && callee->is_static();
// Perform additional virtual call profiling for invokevirtual and
// invokeinterface bytecodes
- if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
- !callee_is_static && // required for optimized MH invokes
- C1ProfileVirtualCalls) {
+ if (op->should_profile_receiver_type()) {
assert(op->recv()->is_single_cpu(), "recv must be allocated");
Register recv = op->recv()->as_register();
assert_different_registers(mdo, recv);
diff --git a/src/hotspot/cpu/aarch64/jniTypes_aarch64.hpp b/src/hotspot/cpu/aarch64/jniTypes_aarch64.hpp
index 857a45a6071..bdf4d55bfbc 100644
--- a/src/hotspot/cpu/aarch64/jniTypes_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/jniTypes_aarch64.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -26,9 +26,9 @@
#ifndef CPU_AARCH64_VM_JNITYPES_AARCH64_HPP
#define CPU_AARCH64_VM_JNITYPES_AARCH64_HPP
+#include "jni.h"
#include "memory/allocation.hpp"
#include "oops/oop.hpp"
-#include "prims/jni.h"
// This file holds platform-dependent routines used to write primitive jni
// types to the array of arguments passed into JavaCalls::call
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
index 78637f21dc1..a7fb3df4ee7 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
@@ -2840,6 +2840,44 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi
bind(L_done);
}
+// Code for BigInteger::mulAdd instrinsic
+// out = r0
+// in = r1
+// offset = r2 (already out.length-offset)
+// len = r3
+// k = r4
+//
+// pseudo code from java implementation:
+// carry = 0;
+// offset = out.length-offset - 1;
+// for (int j=len-1; j >= 0; j--) {
+// product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
+// out[offset--] = (int)product;
+// carry = product >>> 32;
+// }
+// return (int)carry;
+void MacroAssembler::mul_add(Register out, Register in, Register offset,
+ Register len, Register k) {
+ Label LOOP, END;
+ // pre-loop
+ cmp(len, zr); // cmp, not cbz/cbnz: to use condition twice => less branches
+ csel(out, zr, out, Assembler::EQ);
+ br(Assembler::EQ, END);
+ add(in, in, len, LSL, 2); // in[j+1] address
+ add(offset, out, offset, LSL, 2); // out[offset + 1] address
+ mov(out, zr); // used to keep carry now
+ BIND(LOOP);
+ ldrw(rscratch1, Address(pre(in, -4)));
+ madd(rscratch1, rscratch1, k, out);
+ ldrw(rscratch2, Address(pre(offset, -4)));
+ add(rscratch1, rscratch1, rscratch2);
+ strw(rscratch1, Address(offset));
+ lsr(out, rscratch1, 32);
+ subs(len, len, 1);
+ br(Assembler::NE, LOOP);
+ BIND(END);
+}
+
/**
* Emits code to update CRC-32 with a byte value according to constants in table
*
@@ -3291,6 +3329,7 @@ void MacroAssembler::load_mirror(Register dst, Register method) {
ldr(dst, Address(dst, ConstMethod::constants_offset()));
ldr(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes()));
ldr(dst, Address(dst, mirror_offset));
+ resolve_oop_handle(dst);
}
void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) {
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
index a3a3c74c626..d0ca968bd44 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
@@ -1265,6 +1265,7 @@ public:
void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z,
Register zlen, Register tmp1, Register tmp2, Register tmp3,
Register tmp4, Register tmp5, Register tmp6, Register tmp7);
+ void mul_add(Register out, Register in, Register offs, Register len, Register k);
// ISB may be needed because of a safepoint
void maybe_isb() { isb(); }
diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
index e6e79a468d2..d9c2cdc4a52 100644
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
@@ -3607,6 +3607,63 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ address generate_squareToLen() {
+ // squareToLen algorithm for sizes 1..127 described in java code works
+ // faster than multiply_to_len on some CPUs and slower on others, but
+ // multiply_to_len shows a bit better overall results
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "squareToLen");
+ address start = __ pc();
+
+ const Register x = r0;
+ const Register xlen = r1;
+ const Register z = r2;
+ const Register zlen = r3;
+ const Register y = r4; // == x
+ const Register ylen = r5; // == xlen
+
+ const Register tmp1 = r10;
+ const Register tmp2 = r11;
+ const Register tmp3 = r12;
+ const Register tmp4 = r13;
+ const Register tmp5 = r14;
+ const Register tmp6 = r15;
+ const Register tmp7 = r16;
+
+ RegSet spilled_regs = RegSet::of(y, ylen);
+ BLOCK_COMMENT("Entry:");
+ __ enter();
+ __ push(spilled_regs, sp);
+ __ mov(y, x);
+ __ mov(ylen, xlen);
+ __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
+ __ pop(spilled_regs, sp);
+ __ leave();
+ __ ret(lr);
+ return start;
+ }
+
+ address generate_mulAdd() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "mulAdd");
+
+ address start = __ pc();
+
+ const Register out = r0;
+ const Register in = r1;
+ const Register offset = r2;
+ const Register len = r3;
+ const Register k = r4;
+
+ BLOCK_COMMENT("Entry:");
+ __ enter();
+ __ mul_add(out, in, offset, len, k);
+ __ leave();
+ __ ret(lr);
+
+ return start;
+ }
+
void ghash_multiply(FloatRegister result_lo, FloatRegister result_hi,
FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0,
FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3, FloatRegister tmp4) {
@@ -4913,6 +4970,14 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_multiplyToLen = generate_multiplyToLen();
}
+ if (UseSquareToLenIntrinsic) {
+ StubRoutines::_squareToLen = generate_squareToLen();
+ }
+
+ if (UseMulAddIntrinsic) {
+ StubRoutines::_mulAdd = generate_mulAdd();
+ }
+
if (UseMontgomeryMultiplyIntrinsic) {
StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
diff --git a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
index ae182677be5..11b3f49292c 100644
--- a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
@@ -2195,6 +2195,13 @@ void TemplateTable::_return(TosState state)
__ bind(skip_register_finalizer);
}
+ // Explicitly reset last_sp, for handling special case in TemplateInterpreter::deopt_reexecute_entry
+#ifdef ASSERT
+ if (state == vtos) {
+ __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
+ }
+#endif
+
// Issue a StoreStore barrier after all stores but before return
// from any constructor for any class with a final field. We don't
// know if this is a finalizer, so we always do so.
@@ -2297,6 +2304,7 @@ void TemplateTable::load_field_cp_cache_entry(Register obj,
ConstantPoolCacheEntry::f1_offset())));
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ ldr(obj, Address(obj, mirror_offset));
+ __ resolve_oop_handle(obj);
}
}
diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
index 722ebbba322..8480e4e8e71 100644
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
@@ -340,6 +340,14 @@ void VM_Version::get_processor_features() {
UseMultiplyToLenIntrinsic = true;
}
+ if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
+ UseSquareToLenIntrinsic = true;
+ }
+
+ if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
+ UseMulAddIntrinsic = true;
+ }
+
if (FLAG_IS_DEFAULT(UseBarriersForVolatile)) {
UseBarriersForVolatile = (_features & CPU_DMB_ATOMICS) != 0;
}
diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
index caf00718656..3f1692f7544 100644
--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
@@ -3168,14 +3168,9 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
}
Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias);
- Bytecodes::Code bc = method->java_code_at_bci(bci);
- const bool callee_is_static = callee->is_loaded() && callee->is_static();
// Perform additional virtual call profiling for invokevirtual and
// invokeinterface bytecodes
- if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
- !callee_is_static && // required for optimized MH invokes
- C1ProfileVirtualCalls) {
-
+ if (op->should_profile_receiver_type()) {
assert(op->recv()->is_single_cpu(), "recv must be allocated");
Register recv = op->recv()->as_register();
assert_different_registers(mdo, tmp1, recv);
diff --git a/src/hotspot/cpu/arm/jniTypes_arm.hpp b/src/hotspot/cpu/arm/jniTypes_arm.hpp
index 7b23ccfe262..636f8e81a2c 100644
--- a/src/hotspot/cpu/arm/jniTypes_arm.hpp
+++ b/src/hotspot/cpu/arm/jniTypes_arm.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -25,9 +25,9 @@
#ifndef CPU_ARM_VM_JNITYPES_ARM_HPP
#define CPU_ARM_VM_JNITYPES_ARM_HPP
+#include "jni.h"
#include "memory/allocation.hpp"
#include "oops/oop.hpp"
-#include "prims/jni.h"
// This file holds platform-dependent routines used to write primitive jni
// types to the array of arguments passed into JavaCalls::call
diff --git a/src/hotspot/cpu/arm/macroAssembler_arm.cpp b/src/hotspot/cpu/arm/macroAssembler_arm.cpp
index 53eb53f2c7f..ea4d7782983 100644
--- a/src/hotspot/cpu/arm/macroAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.cpp
@@ -2899,6 +2899,7 @@ void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp)
ldr(tmp, Address(tmp, ConstMethod::constants_offset()));
ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes()));
ldr(mirror, Address(tmp, mirror_offset));
+ resolve_oop_handle(mirror);
}
diff --git a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp
index 59bc2e5b5d8..666cbd8860e 100644
--- a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp
+++ b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp
@@ -42,10 +42,6 @@
#ifdef COMPILER2
#include "opto/runtime.hpp"
#endif
-#ifdef SHARK
-#include "compiler/compileBroker.hpp"
-#include "shark/sharkCompiler.hpp"
-#endif
#define __ masm->
diff --git a/src/hotspot/cpu/arm/stubGenerator_arm.cpp b/src/hotspot/cpu/arm/stubGenerator_arm.cpp
index d5ca6401fa4..97ef93c2e4f 100644
--- a/src/hotspot/cpu/arm/stubGenerator_arm.cpp
+++ b/src/hotspot/cpu/arm/stubGenerator_arm.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -2867,46 +2867,51 @@ class StubGenerator: public StubCodeGenerator {
// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR) except for callee_saved_regs.
void gen_write_ref_array_pre_barrier(Register addr, Register count, int callee_saved_regs) {
BarrierSet* bs = Universe::heap()->barrier_set();
- if (bs->has_write_ref_pre_barrier()) {
- assert(bs->has_write_ref_array_pre_opt(),
- "Else unsupported barrier set.");
+ switch (bs->kind()) {
+ case BarrierSet::G1SATBCTLogging:
+ {
+ assert( addr->encoding() < callee_saved_regs, "addr must be saved");
+ assert(count->encoding() < callee_saved_regs, "count must be saved");
- assert( addr->encoding() < callee_saved_regs, "addr must be saved");
- assert(count->encoding() < callee_saved_regs, "count must be saved");
-
- BLOCK_COMMENT("PreBarrier");
+ BLOCK_COMMENT("PreBarrier");
#ifdef AARCH64
- callee_saved_regs = align_up(callee_saved_regs, 2);
- for (int i = 0; i < callee_saved_regs; i += 2) {
- __ raw_push(as_Register(i), as_Register(i+1));
- }
+ callee_saved_regs = align_up(callee_saved_regs, 2);
+ for (int i = 0; i < callee_saved_regs; i += 2) {
+ __ raw_push(as_Register(i), as_Register(i+1));
+ }
#else
- RegisterSet saved_regs = RegisterSet(R0, as_Register(callee_saved_regs-1));
- __ push(saved_regs | R9ifScratched);
+ RegisterSet saved_regs = RegisterSet(R0, as_Register(callee_saved_regs-1));
+ __ push(saved_regs | R9ifScratched);
#endif // AARCH64
- if (addr != R0) {
- assert_different_registers(count, R0);
- __ mov(R0, addr);
- }
+ if (addr != R0) {
+ assert_different_registers(count, R0);
+ __ mov(R0, addr);
+ }
#ifdef AARCH64
- __ zero_extend(R1, count, 32); // BarrierSet::static_write_ref_array_pre takes size_t
+ __ zero_extend(R1, count, 32); // BarrierSet::static_write_ref_array_pre takes size_t
#else
- if (count != R1) {
- __ mov(R1, count);
- }
+ if (count != R1) {
+ __ mov(R1, count);
+ }
#endif // AARCH64
- __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
+ __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
#ifdef AARCH64
- for (int i = callee_saved_regs - 2; i >= 0; i -= 2) {
- __ raw_pop(as_Register(i), as_Register(i+1));
- }
+ for (int i = callee_saved_regs - 2; i >= 0; i -= 2) {
+ __ raw_pop(as_Register(i), as_Register(i+1));
+ }
#else
- __ pop(saved_regs | R9ifScratched);
+ __ pop(saved_regs | R9ifScratched);
#endif // AARCH64
+ }
+ case BarrierSet::CardTableForRS:
+ case BarrierSet::CardTableExtension:
+ break;
+ default:
+ ShouldNotReachHere();
}
}
#endif // INCLUDE_ALL_GCS
diff --git a/src/hotspot/cpu/arm/templateTable_arm.cpp b/src/hotspot/cpu/arm/templateTable_arm.cpp
index 7fd60ce8d2d..93018d26820 100644
--- a/src/hotspot/cpu/arm/templateTable_arm.cpp
+++ b/src/hotspot/cpu/arm/templateTable_arm.cpp
@@ -2844,6 +2844,19 @@ void TemplateTable::_return(TosState state) {
__ bind(skip_register_finalizer);
}
+ // Explicitly reset last_sp, for handling special case in TemplateInterpreter::deopt_reexecute_entry
+#ifdef ASSERT
+ if (state == vtos) {
+#ifndef AARCH64
+ __ mov(Rtemp, 0);
+ __ str(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
+#else
+ __ restore_sp_after_call(Rtemp);
+ __ restore_stack_top();
+#endif
+ }
+#endif
+
// Narrow result if state is itos but result type is smaller.
// Need to narrow in the return bytecode rather than in generate_return_entry
// since compiled code callers expect the result to already be narrowed.
@@ -2963,6 +2976,7 @@ void TemplateTable::load_field_cp_cache_entry(Register Rcache,
cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ ldr(Robj, Address(Robj, mirror_offset));
+ __ resolve_oop_handle(Robj);
}
}
diff --git a/src/hotspot/cpu/ppc/assembler_ppc.hpp b/src/hotspot/cpu/ppc/assembler_ppc.hpp
index 26b77b8cbc3..03be7f4bdcb 100644
--- a/src/hotspot/cpu/ppc/assembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/assembler_ppc.hpp
@@ -517,6 +517,9 @@ class Assembler : public AbstractAssembler {
XXPERMDI_OPCODE= (60u << OPCODE_SHIFT | 10u << 3),
XXMRGHW_OPCODE = (60u << OPCODE_SHIFT | 18u << 3),
XXMRGLW_OPCODE = (60u << OPCODE_SHIFT | 50u << 3),
+ XXSPLTW_OPCODE = (60u << OPCODE_SHIFT | 164u << 2),
+ XXLXOR_OPCODE = (60u << OPCODE_SHIFT | 154u << 3),
+ XXLEQV_OPCODE = (60u << OPCODE_SHIFT | 186u << 3),
// Vector Permute and Formatting
VPKPX_OPCODE = (4u << OPCODE_SHIFT | 782u ),
@@ -1125,6 +1128,7 @@ class Assembler : public AbstractAssembler {
static int vsplti_sim(int x) { return opp_u_field(x, 15, 11); } // for vsplti* instructions
static int vsldoi_shb(int x) { return opp_u_field(x, 25, 22); } // for vsldoi instruction
static int vcmp_rc( int x) { return opp_u_field(x, 21, 21); } // for vcmp* instructions
+ static int xxsplt_uim(int x) { return opp_u_field(x, 15, 14); } // for xxsplt* instructions
//static int xo1( int x) { return opp_u_field(x, 29, 21); }// is contained in our opcodes
//static int xo2( int x) { return opp_u_field(x, 30, 21); }// is contained in our opcodes
@@ -1308,6 +1312,7 @@ class Assembler : public AbstractAssembler {
inline void li( Register d, int si16);
inline void lis( Register d, int si16);
inline void addir(Register d, int si16, Register a);
+ inline void subi( Register d, Register a, int si16);
static bool is_addi(int x) {
return ADDI_OPCODE == (x & ADDI_OPCODE_MASK);
@@ -2154,6 +2159,11 @@ class Assembler : public AbstractAssembler {
inline void xxpermdi( VectorSRegister d, VectorSRegister a, VectorSRegister b, int dm);
inline void xxmrghw( VectorSRegister d, VectorSRegister a, VectorSRegister b);
inline void xxmrglw( VectorSRegister d, VectorSRegister a, VectorSRegister b);
+ inline void mtvsrd( VectorSRegister d, Register a);
+ inline void mtvsrwz( VectorSRegister d, Register a);
+ inline void xxspltw( VectorSRegister d, VectorSRegister b, int ui2);
+ inline void xxlxor( VectorSRegister d, VectorSRegister a, VectorSRegister b);
+ inline void xxleqv( VectorSRegister d, VectorSRegister a, VectorSRegister b);
// VSX Extended Mnemonics
inline void xxspltd( VectorSRegister d, VectorSRegister a, int x);
@@ -2174,7 +2184,8 @@ class Assembler : public AbstractAssembler {
inline void vsbox( VectorRegister d, VectorRegister a);
// SHA (introduced with Power 8)
- // Not yet implemented.
+ inline void vshasigmad(VectorRegister d, VectorRegister a, bool st, int six);
+ inline void vshasigmaw(VectorRegister d, VectorRegister a, bool st, int six);
// Vector Binary Polynomial Multiplication (introduced with Power 8)
inline void vpmsumb( VectorRegister d, VectorRegister a, VectorRegister b);
@@ -2285,6 +2296,11 @@ class Assembler : public AbstractAssembler {
inline void lvsl( VectorRegister d, Register s2);
inline void lvsr( VectorRegister d, Register s2);
+ // Endianess specific concatenation of 2 loaded vectors.
+ inline void load_perm(VectorRegister perm, Register addr);
+ inline void vec_perm(VectorRegister first_dest, VectorRegister second, VectorRegister perm);
+ inline void vec_perm(VectorRegister dest, VectorRegister first, VectorRegister second, VectorRegister perm);
+
// RegisterOrConstant versions.
// These emitters choose between the versions using two registers and
// those with register and immediate, depending on the content of roc.
diff --git a/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp b/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp
index d21ffaf8fcb..3931b56093e 100644
--- a/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp
+++ b/src/hotspot/cpu/ppc/assembler_ppc.inline.hpp
@@ -164,6 +164,7 @@ inline void Assembler::divwo_( Register d, Register a, Register b) { emit_int32
inline void Assembler::li( Register d, int si16) { Assembler::addi_r0ok( d, R0, si16); }
inline void Assembler::lis( Register d, int si16) { Assembler::addis_r0ok(d, R0, si16); }
inline void Assembler::addir(Register d, int si16, Register a) { Assembler::addi(d, a, si16); }
+inline void Assembler::subi( Register d, Register a, int si16) { Assembler::addi(d, a, -si16); }
// PPC 1, section 3.3.9, Fixed-Point Compare Instructions
inline void Assembler::cmpi( ConditionRegister f, int l, Register a, int si16) { emit_int32( CMPI_OPCODE | bf(f) | l10(l) | ra(a) | simm(si16,16)); }
@@ -760,9 +761,14 @@ inline void Assembler::lvsr( VectorRegister d, Register s1, Register s2) { emit
// Vector-Scalar (VSX) instructions.
inline void Assembler::lxvd2x( VectorSRegister d, Register s1) { emit_int32( LXVD2X_OPCODE | vsrt(d) | ra(0) | rb(s1)); }
inline void Assembler::lxvd2x( VectorSRegister d, Register s1, Register s2) { emit_int32( LXVD2X_OPCODE | vsrt(d) | ra0mem(s1) | rb(s2)); }
-inline void Assembler::stxvd2x( VectorSRegister d, Register s1) { emit_int32( STXVD2X_OPCODE | vsrt(d) | ra(0) | rb(s1)); }
-inline void Assembler::stxvd2x( VectorSRegister d, Register s1, Register s2) { emit_int32( STXVD2X_OPCODE | vsrt(d) | ra0mem(s1) | rb(s2)); }
-inline void Assembler::mtvrd( VectorRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
+inline void Assembler::stxvd2x( VectorSRegister d, Register s1) { emit_int32( STXVD2X_OPCODE | vsrs(d) | ra(0) | rb(s1)); }
+inline void Assembler::stxvd2x( VectorSRegister d, Register s1, Register s2) { emit_int32( STXVD2X_OPCODE | vsrs(d) | ra0mem(s1) | rb(s2)); }
+inline void Assembler::mtvsrd( VectorSRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vsrt(d) | ra(a)); }
+inline void Assembler::mtvsrwz( VectorSRegister d, Register a) { emit_int32( MTVSRWZ_OPCODE | vsrt(d) | ra(a)); }
+inline void Assembler::xxspltw( VectorSRegister d, VectorSRegister b, int ui2) { emit_int32( XXSPLTW_OPCODE | vsrt(d) | vsrb(b) | xxsplt_uim(uimm(ui2,2))); }
+inline void Assembler::xxlxor( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XXLXOR_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
+inline void Assembler::xxleqv( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XXLEQV_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
+inline void Assembler::mtvrd( VectorRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
inline void Assembler::mfvrd( Register a, VectorRegister d) { emit_int32( MFVSRD_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
inline void Assembler::mtvrwz( VectorRegister d, Register a) { emit_int32( MTVSRWZ_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
inline void Assembler::mfvrwz( Register a, VectorRegister d) { emit_int32( MFVSRWZ_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
@@ -925,7 +931,8 @@ inline void Assembler::vncipherlast(VectorRegister d, VectorRegister a, VectorRe
inline void Assembler::vsbox( VectorRegister d, VectorRegister a) { emit_int32( VSBOX_OPCODE | vrt(d) | vra(a) ); }
// SHA (introduced with Power 8)
-// Not yet implemented.
+inline void Assembler::vshasigmad(VectorRegister d, VectorRegister a, bool st, int six) { emit_int32( VSHASIGMAD_OPCODE | vrt(d) | vra(a) | vst(st) | vsix(six)); }
+inline void Assembler::vshasigmaw(VectorRegister d, VectorRegister a, bool st, int six) { emit_int32( VSHASIGMAW_OPCODE | vrt(d) | vra(a) | vst(st) | vsix(six)); }
// Vector Binary Polynomial Multiplication (introduced with Power 8)
inline void Assembler::vpmsumb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPMSUMB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
@@ -1034,6 +1041,30 @@ inline void Assembler::stvxl( VectorRegister d, Register s2) { emit_int32( STVXL
inline void Assembler::lvsl( VectorRegister d, Register s2) { emit_int32( LVSL_OPCODE | vrt(d) | rb(s2)); }
inline void Assembler::lvsr( VectorRegister d, Register s2) { emit_int32( LVSR_OPCODE | vrt(d) | rb(s2)); }
+inline void Assembler::load_perm(VectorRegister perm, Register addr) {
+#if defined(VM_LITTLE_ENDIAN)
+ lvsr(perm, addr);
+#else
+ lvsl(perm, addr);
+#endif
+}
+
+inline void Assembler::vec_perm(VectorRegister first_dest, VectorRegister second, VectorRegister perm) {
+#if defined(VM_LITTLE_ENDIAN)
+ vperm(first_dest, second, first_dest, perm);
+#else
+ vperm(first_dest, first_dest, second, perm);
+#endif
+}
+
+inline void Assembler::vec_perm(VectorRegister dest, VectorRegister first, VectorRegister second, VectorRegister perm) {
+#if defined(VM_LITTLE_ENDIAN)
+ vperm(dest, second, first, perm);
+#else
+ vperm(dest, first, second, perm);
+#endif
+}
+
inline void Assembler::load_const(Register d, void* x, Register tmp) {
load_const(d, (long)x, tmp);
}
diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
index 62be4f4820b..58a5df65c09 100644
--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
@@ -2774,13 +2774,9 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
__ add_const_optimized(mdo, mdo, mdo_offset_bias, R0);
}
- Bytecodes::Code bc = method->java_code_at_bci(bci);
- const bool callee_is_static = callee->is_loaded() && callee->is_static();
// Perform additional virtual call profiling for invokevirtual and
- // invokeinterface bytecodes.
- if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
- !callee_is_static && // Required for optimized MH invokes.
- C1ProfileVirtualCalls) {
+ // invokeinterface bytecodes
+ if (op->should_profile_receiver_type()) {
assert(op->recv()->is_single_cpu(), "recv must be allocated");
Register recv = op->recv()->as_register();
assert_different_registers(mdo, tmp1, recv);
diff --git a/src/hotspot/cpu/ppc/globals_ppc.hpp b/src/hotspot/cpu/ppc/globals_ppc.hpp
index 0bc041e9188..b6d04455c84 100644
--- a/src/hotspot/cpu/ppc/globals_ppc.hpp
+++ b/src/hotspot/cpu/ppc/globals_ppc.hpp
@@ -32,7 +32,7 @@
// Sets the default values for platform dependent flags used by the runtime system.
// (see globals.hpp)
-define_pd_global(bool, ShareVtableStubs, false); // Improves performance markedly for mtrt and compress.
+define_pd_global(bool, ShareVtableStubs, true);
define_pd_global(bool, NeedsDeoptSuspend, false); // Only register window machines need this.
@@ -103,6 +103,9 @@ define_pd_global(intx, InitArrayShortSize, 9*BytesPerLong);
"CPU Version: x for PowerX. Currently recognizes Power5 to " \
"Power8. Default is 0. Newer CPUs will be recognized as Power8.") \
\
+ product(bool, SuperwordUseVSX, false, \
+ "Use Power8 VSX instructions for superword optimization.") \
+ \
/* Reoptimize code-sequences of calls at runtime, e.g. replace an */ \
/* indirect call by a direct call. */ \
product(bool, ReoptimizeCallSequences, true, \
diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
index 0db86269875..668fe93a7fa 100644
--- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
+++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
@@ -863,7 +863,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
//
// markOop displaced_header = obj->mark().set_unlocked();
// monitor->lock()->set_displaced_header(displaced_header);
- // if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
+ // if (Atomic::cmpxchg(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
// // We stored the monitor address into the object's mark word.
// } else if (THREAD->is_lock_owned((address)displaced_header))
// // Simple recursive case.
@@ -901,7 +901,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
std(displaced_header, BasicObjectLock::lock_offset_in_bytes() +
BasicLock::displaced_header_offset_in_bytes(), monitor);
- // if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
+ // if (Atomic::cmpxchg(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
// Store stack address of the BasicObjectLock (this is monitor) into object.
addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes());
@@ -977,7 +977,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor, bool check_for_e
// if ((displaced_header = monitor->displaced_header()) == NULL) {
// // Recursive unlock. Mark the monitor unlocked by setting the object field to NULL.
// monitor->set_obj(NULL);
- // } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) {
+ // } else if (Atomic::cmpxchg(displaced_header, obj->mark_addr(), monitor) == monitor) {
// // We swapped the unlocked mark in displaced_header into the object's mark word.
// monitor->set_obj(NULL);
// } else {
@@ -1010,7 +1010,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor, bool check_for_e
cmpdi(CCR0, displaced_header, 0);
beq(CCR0, free_slot); // recursive unlock
- // } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) {
+ // } else if (Atomic::cmpxchg(displaced_header, obj->mark_addr(), monitor) == monitor) {
// // We swapped the unlocked mark in displaced_header into the object's mark word.
// monitor->set_obj(NULL);
diff --git a/src/hotspot/cpu/ppc/jniTypes_ppc.hpp b/src/hotspot/cpu/ppc/jniTypes_ppc.hpp
index 7179e998931..6a6ecd24390 100644
--- a/src/hotspot/cpu/ppc/jniTypes_ppc.hpp
+++ b/src/hotspot/cpu/ppc/jniTypes_ppc.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2013 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -26,9 +26,9 @@
#ifndef CPU_PPC_VM_JNITYPES_PPC_HPP
#define CPU_PPC_VM_JNITYPES_PPC_HPP
+#include "jni.h"
#include "memory/allocation.hpp"
#include "oops/oop.hpp"
-#include "prims/jni.h"
// This file holds platform-dependent routines used to write primitive
// jni types to the array of arguments passed into JavaCalls::call.
diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
index 9ce7be54a84..bce57c7e137 100644
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
@@ -129,7 +129,7 @@ void MacroAssembler::calculate_address_from_global_toc(Register dst, address add
}
}
-int MacroAssembler::patch_calculate_address_from_global_toc_at(address a, address bound, address addr) {
+address MacroAssembler::patch_calculate_address_from_global_toc_at(address a, address bound, address addr) {
const int offset = MacroAssembler::offset_to_global_toc(addr);
const address inst2_addr = a;
@@ -155,7 +155,7 @@ int MacroAssembler::patch_calculate_address_from_global_toc_at(address a, addres
assert(is_addis(inst1) && inv_ra_field(inst1) == 29 /* R29 */, "source must be global TOC");
set_imm((int *)inst1_addr, MacroAssembler::largeoffset_si16_si16_hi(offset));
set_imm((int *)inst2_addr, MacroAssembler::largeoffset_si16_si16_lo(offset));
- return (int)((intptr_t)addr - (intptr_t)inst1_addr);
+ return inst1_addr;
}
address MacroAssembler::get_address_of_calculate_address_from_global_toc_at(address a, address bound) {
@@ -201,7 +201,7 @@ address MacroAssembler::get_address_of_calculate_address_from_global_toc_at(addr
// clrldi rx = rx & 0xFFFFffff // clearMS32b, optional
// ori rx = rx | const.lo
// Clrldi will be passed by.
-int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
+address MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
assert(UseCompressedOops, "Should only patch compressed oops");
const address inst2_addr = a;
@@ -227,7 +227,7 @@ int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop dat
set_imm((int *)inst1_addr, (short)(xc)); // see enc_load_con_narrow_hi/_lo
set_imm((int *)inst2_addr, (xd)); // unsigned int
- return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr);
+ return inst1_addr;
}
// Get compressed oop or klass constant.
@@ -3382,6 +3382,7 @@ void MacroAssembler::load_mirror_from_const_method(Register mirror, Register con
ld(mirror, in_bytes(ConstMethod::constants_offset()), const_method);
ld(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
ld(mirror, in_bytes(Klass::java_mirror_offset()), mirror);
+ resolve_oop_handle(mirror);
}
// Clear Array
@@ -5234,6 +5235,40 @@ void MacroAssembler::multiply_128_x_128_loop(Register x_xstart,
bind(L_post_third_loop_done);
} // multiply_128_x_128_loop
+void MacroAssembler::muladd(Register out, Register in,
+ Register offset, Register len, Register k,
+ Register tmp1, Register tmp2, Register carry) {
+
+ // Labels
+ Label LOOP, SKIP;
+
+ // Make sure length is positive.
+ cmpdi (CCR0, len, 0);
+
+ // Prepare variables
+ subi (offset, offset, 4);
+ li (carry, 0);
+ ble (CCR0, SKIP);
+
+ mtctr (len);
+ subi (len, len, 1 );
+ sldi (len, len, 2 );
+
+ // Main loop
+ bind(LOOP);
+ lwzx (tmp1, len, in );
+ lwzx (tmp2, offset, out );
+ mulld (tmp1, tmp1, k );
+ add (tmp2, carry, tmp2 );
+ add (tmp2, tmp1, tmp2 );
+ stwx (tmp2, offset, out );
+ srdi (carry, tmp2, 32 );
+ subi (offset, offset, 4 );
+ subi (len, len, 4 );
+ bdnz (LOOP);
+ bind(SKIP);
+}
+
void MacroAssembler::multiply_to_len(Register x, Register xlen,
Register y, Register ylen,
Register z, Register zlen,
diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
index db04a3700e7..f1fe2385907 100644
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
@@ -105,13 +105,15 @@ class MacroAssembler: public Assembler {
};
inline static bool is_calculate_address_from_global_toc_at(address a, address bound);
- static int patch_calculate_address_from_global_toc_at(address a, address addr, address bound);
+ // Returns address of first instruction in sequence.
+ static address patch_calculate_address_from_global_toc_at(address a, address bound, address addr);
static address get_address_of_calculate_address_from_global_toc_at(address a, address addr);
#ifdef _LP64
// Patch narrow oop constant.
inline static bool is_set_narrow_oop(address a, address bound);
- static int patch_set_narrow_oop(address a, address bound, narrowOop data);
+ // Returns address of first instruction in sequence.
+ static address patch_set_narrow_oop(address a, address bound, narrowOop data);
static narrowOop get_narrow_oop(address a, address bound);
#endif
@@ -813,6 +815,8 @@ class MacroAssembler: public Assembler {
Register yz_idx, Register idx, Register carry,
Register product_high, Register product,
Register carry2, Register tmp);
+ void muladd(Register out, Register in, Register offset, Register len, Register k,
+ Register tmp1, Register tmp2, Register carry);
void multiply_to_len(Register x, Register xlen,
Register y, Register ylen,
Register z, Register zlen,
@@ -862,6 +866,40 @@ class MacroAssembler: public Assembler {
void kernel_crc32_singleByteReg(Register crc, Register val, Register table,
bool invertCRC);
+ // SHA-2 auxiliary functions and public interfaces
+ private:
+ void sha256_deque(const VectorRegister src,
+ const VectorRegister dst1, const VectorRegister dst2, const VectorRegister dst3);
+ void sha256_load_h_vec(const VectorRegister a, const VectorRegister e, const Register hptr);
+ void sha256_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
+ void sha256_load_w_plus_k_vec(const Register buf_in, const VectorRegister* ws,
+ const int total_ws, const Register k, const VectorRegister* kpws,
+ const int total_kpws);
+ void sha256_calc_4w(const VectorRegister w0, const VectorRegister w1,
+ const VectorRegister w2, const VectorRegister w3, const VectorRegister kpw0,
+ const VectorRegister kpw1, const VectorRegister kpw2, const VectorRegister kpw3,
+ const Register j, const Register k);
+ void sha256_update_sha_state(const VectorRegister a, const VectorRegister b,
+ const VectorRegister c, const VectorRegister d, const VectorRegister e,
+ const VectorRegister f, const VectorRegister g, const VectorRegister h,
+ const Register hptr);
+
+ void sha512_load_w_vec(const Register buf_in, const VectorRegister* ws, const int total_ws);
+ void sha512_update_sha_state(const Register state, const VectorRegister* hs, const int total_hs);
+ void sha512_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
+ void sha512_load_h_vec(const Register state, const VectorRegister* hs, const int total_hs);
+ void sha512_calc_2w(const VectorRegister w0, const VectorRegister w1,
+ const VectorRegister w2, const VectorRegister w3,
+ const VectorRegister w4, const VectorRegister w5,
+ const VectorRegister w6, const VectorRegister w7,
+ const VectorRegister kpw0, const VectorRegister kpw1, const Register j,
+ const VectorRegister vRb, const Register k);
+
+ public:
+ void sha256(bool multi_block);
+ void sha512(bool multi_block);
+
+
//
// Debugging
//
diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc_sha.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc_sha.cpp
new file mode 100644
index 00000000000..7a82ed3f99d
--- /dev/null
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc_sha.cpp
@@ -0,0 +1,1136 @@
+// Copyright (c) 2017 Instituto de Pesquisas Eldorado. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+
+// Implemented according to "Descriptions of SHA-256, SHA-384, and SHA-512"
+// (http://www.iwar.org.uk/comsec/resources/cipher/sha256-384-512.pdf).
+
+#include "asm/macroAssembler.inline.hpp"
+#include "runtime/stubRoutines.hpp"
+
+/**********************************************************************
+ * SHA 256
+ *********************************************************************/
+
+void MacroAssembler::sha256_deque(const VectorRegister src,
+ const VectorRegister dst1,
+ const VectorRegister dst2,
+ const VectorRegister dst3) {
+ vsldoi (dst1, src, src, 12);
+ vsldoi (dst2, src, src, 8);
+ vsldoi (dst3, src, src, 4);
+}
+
+void MacroAssembler::sha256_round(const VectorRegister* hs,
+ const int total_hs,
+ int& h_cnt,
+ const VectorRegister kpw) {
+ // convenience registers: cycle from 0-7 downwards
+ const VectorRegister a = hs[(total_hs + 0 - (h_cnt % total_hs)) % total_hs];
+ const VectorRegister b = hs[(total_hs + 1 - (h_cnt % total_hs)) % total_hs];
+ const VectorRegister c = hs[(total_hs + 2 - (h_cnt % total_hs)) % total_hs];
+ const VectorRegister d = hs[(total_hs + 3 - (h_cnt % total_hs)) % total_hs];
+ const VectorRegister e = hs[(total_hs + 4 - (h_cnt % total_hs)) % total_hs];
+ const VectorRegister f = hs[(total_hs + 5 - (h_cnt % total_hs)) % total_hs];
+ const VectorRegister g = hs[(total_hs + 6 - (h_cnt % total_hs)) % total_hs];
+ const VectorRegister h = hs[(total_hs + 7 - (h_cnt % total_hs)) % total_hs];
+ // temporaries
+ VectorRegister ch = VR0;
+ VectorRegister maj = VR1;
+ VectorRegister bsa = VR2;
+ VectorRegister bse = VR3;
+ VectorRegister vt0 = VR4;
+ VectorRegister vt1 = VR5;
+ VectorRegister vt2 = VR6;
+ VectorRegister vt3 = VR7;
+
+ vsel (ch, g, f, e);
+ vxor (maj, a, b);
+ vshasigmaw (bse, e, 1, 0xf);
+ vadduwm (vt2, ch, kpw);
+ vadduwm (vt1, h, bse);
+ vsel (maj, b, c, maj);
+ vadduwm (vt3, vt1, vt2);
+ vshasigmaw (bsa, a, 1, 0);
+ vadduwm (vt0, bsa, maj);
+
+ vadduwm (d, d, vt3);
+ vadduwm (h, vt3, vt0);
+
+ // advance vector pointer to the next iteration
+ h_cnt++;
+}
+
+void MacroAssembler::sha256_load_h_vec(const VectorRegister a,
+ const VectorRegister e,
+ const Register hptr) {
+ // temporaries
+ Register tmp = R8;
+ VectorRegister vt0 = VR0;
+ VectorRegister vRb = VR6;
+ // labels
+ Label sha256_aligned;
+
+ andi_ (tmp, hptr, 0xf);
+ lvx (a, hptr);
+ addi (tmp, hptr, 16);
+ lvx (e, tmp);
+ beq (CCR0, sha256_aligned);
+
+ // handle unaligned accesses
+ load_perm(vRb, hptr);
+ addi (tmp, hptr, 32);
+ vec_perm(a, e, vRb);
+
+ lvx (vt0, tmp);
+ vec_perm(e, vt0, vRb);
+
+ // aligned accesses
+ bind(sha256_aligned);
+}
+
+void MacroAssembler::sha256_load_w_plus_k_vec(const Register buf_in,
+ const VectorRegister* ws,
+ const int total_ws,
+ const Register k,
+ const VectorRegister* kpws,
+ const int total_kpws) {
+ Label w_aligned, after_w_load;
+
+ Register tmp = R8;
+ VectorRegister vt0 = VR0;
+ VectorRegister vt1 = VR1;
+ VectorRegister vRb = VR6;
+
+ andi_ (tmp, buf_in, 0xF);
+ beq (CCR0, w_aligned); // address ends with 0x0, not 0x8
+
+ // deal with unaligned addresses
+ lvx (ws[0], buf_in);
+ load_perm(vRb, buf_in);
+
+ for (int n = 1; n < total_ws; n++) {
+ VectorRegister w_cur = ws[n];
+ VectorRegister w_prev = ws[n-1];
+
+ addi (tmp, buf_in, n * 16);
+ lvx (w_cur, tmp);
+ vec_perm(w_prev, w_cur, vRb);
+ }
+ addi (tmp, buf_in, total_ws * 16);
+ lvx (vt0, tmp);
+ vec_perm(ws[total_ws-1], vt0, vRb);
+ b (after_w_load);
+
+ bind(w_aligned);
+
+ // deal with aligned addresses
+ lvx(ws[0], buf_in);
+ for (int n = 1; n < total_ws; n++) {
+ VectorRegister w = ws[n];
+ addi (tmp, buf_in, n * 16);
+ lvx (w, tmp);
+ }
+
+ bind(after_w_load);
+
+#if defined(VM_LITTLE_ENDIAN)
+ // Byte swapping within int values
+ li (tmp, 8);
+ lvsl (vt0, tmp);
+ vspltisb (vt1, 0xb);
+ vxor (vt1, vt0, vt1);
+ for (int n = 0; n < total_ws; n++) {
+ VectorRegister w = ws[n];
+ vec_perm(w, w, vt1);
+ }
+#endif
+
+ // Loading k, which is always aligned to 16-bytes
+ lvx (kpws[0], k);
+ for (int n = 1; n < total_kpws; n++) {
+ VectorRegister kpw = kpws[n];
+ addi (tmp, k, 16 * n);
+ lvx (kpw, tmp);
+ }
+
+ // Add w to K
+ assert(total_ws == total_kpws, "Redesign the loop below");
+ for (int n = 0; n < total_kpws; n++) {
+ VectorRegister kpw = kpws[n];
+ VectorRegister w = ws[n];
+
+ vadduwm (kpw, kpw, w);
+ }
+}
+
+void MacroAssembler::sha256_calc_4w(const VectorRegister w0,
+ const VectorRegister w1,
+ const VectorRegister w2,
+ const VectorRegister w3,
+ const VectorRegister kpw0,
+ const VectorRegister kpw1,
+ const VectorRegister kpw2,
+ const VectorRegister kpw3,
+ const Register j,
+ const Register k) {
+ // Temporaries
+ const VectorRegister vt0 = VR0;
+ const VectorRegister vt1 = VR1;
+ const VectorSRegister vsrt1 = vt1->to_vsr();
+ const VectorRegister vt2 = VR2;
+ const VectorRegister vt3 = VR3;
+ const VectorSRegister vst3 = vt3->to_vsr();
+ const VectorRegister vt4 = VR4;
+
+ // load to k[j]
+ lvx (vt0, j, k);
+
+ // advance j
+ addi (j, j, 16); // 16 bytes were read
+
+#if defined(VM_LITTLE_ENDIAN)
+ // b = w[j-15], w[j-14], w[j-13], w[j-12]
+ vsldoi (vt1, w1, w0, 12);
+
+ // c = w[j-7], w[j-6], w[j-5], w[j-4]
+ vsldoi (vt2, w3, w2, 12);
+
+#else
+ // b = w[j-15], w[j-14], w[j-13], w[j-12]
+ vsldoi (vt1, w0, w1, 4);
+
+ // c = w[j-7], w[j-6], w[j-5], w[j-4]
+ vsldoi (vt2, w2, w3, 4);
+#endif
+
+ // d = w[j-2], w[j-1], w[j-4], w[j-3]
+ vsldoi (vt3, w3, w3, 8);
+
+ // b = s0(w[j-15]) , s0(w[j-14]) , s0(w[j-13]) , s0(w[j-12])
+ vshasigmaw (vt1, vt1, 0, 0);
+
+ // d = s1(w[j-2]) , s1(w[j-1]) , s1(w[j-4]) , s1(w[j-3])
+ vshasigmaw (vt3, vt3, 0, 0xf);
+
+ // c = s0(w[j-15]) + w[j-7],
+ // s0(w[j-14]) + w[j-6],
+ // s0(w[j-13]) + w[j-5],
+ // s0(w[j-12]) + w[j-4]
+ vadduwm (vt2, vt1, vt2);
+
+ // c = s0(w[j-15]) + w[j-7] + w[j-16],
+ // s0(w[j-14]) + w[j-6] + w[j-15],
+ // s0(w[j-13]) + w[j-5] + w[j-14],
+ // s0(w[j-12]) + w[j-4] + w[j-13]
+ vadduwm (vt2, vt2, w0);
+
+ // e = s0(w[j-15]) + w[j-7] + w[j-16] + s1(w[j-2]), // w[j]
+ // s0(w[j-14]) + w[j-6] + w[j-15] + s1(w[j-1]), // w[j+1]
+ // s0(w[j-13]) + w[j-5] + w[j-14] + s1(w[j-4]), // UNDEFINED
+ // s0(w[j-12]) + w[j-4] + w[j-13] + s1(w[j-3]) // UNDEFINED
+ vadduwm (vt4, vt2, vt3);
+
+ // At this point, e[0] and e[1] are the correct values to be stored at w[j]
+ // and w[j+1].
+ // e[2] and e[3] are not considered.
+ // b = s1(w[j]) , s1(s(w[j+1]) , UNDEFINED , UNDEFINED
+ vshasigmaw (vt1, vt4, 0, 0xf);
+
+ // v5 = s1(w[j-2]) , s1(w[j-1]) , s1(w[j]) , s1(w[j+1])
+#if defined(VM_LITTLE_ENDIAN)
+ xxmrgld (vst3, vsrt1, vst3);
+#else
+ xxmrghd (vst3, vst3, vsrt1);
+#endif
+
+ // c = s0(w[j-15]) + w[j-7] + w[j-16] + s1(w[j-2]), // w[j]
+ // s0(w[j-14]) + w[j-6] + w[j-15] + s1(w[j-1]), // w[j+1]
+ // s0(w[j-13]) + w[j-5] + w[j-14] + s1(w[j]), // w[j+2]
+ // s0(w[j-12]) + w[j-4] + w[j-13] + s1(w[j+1]) // w[j+4]
+ vadduwm (vt2, vt2, vt3);
+
+ // Updating w0 to w3 to hold the new previous 16 values from w.
+ vmr (w0, w1);
+ vmr (w1, w2);
+ vmr (w2, w3);
+ vmr (w3, vt2);
+
+ // store k + w to v9 (4 values at once)
+#if defined(VM_LITTLE_ENDIAN)
+ vadduwm (kpw0, vt2, vt0);
+
+ vsldoi (kpw1, kpw0, kpw0, 12);
+ vsldoi (kpw2, kpw0, kpw0, 8);
+ vsldoi (kpw3, kpw0, kpw0, 4);
+#else
+ vadduwm (kpw3, vt2, vt0);
+
+ vsldoi (kpw2, kpw3, kpw3, 12);
+ vsldoi (kpw1, kpw3, kpw3, 8);
+ vsldoi (kpw0, kpw3, kpw3, 4);
+#endif
+}
+
+void MacroAssembler::sha256_update_sha_state(const VectorRegister a,
+ const VectorRegister b_,
+ const VectorRegister c,
+ const VectorRegister d,
+ const VectorRegister e,
+ const VectorRegister f,
+ const VectorRegister g,
+ const VectorRegister h,
+ const Register hptr) {
+ // temporaries
+ VectorRegister vt0 = VR0;
+ VectorRegister vt1 = VR1;
+ VectorRegister vt2 = VR2;
+ VectorRegister vt3 = VR3;
+ VectorRegister vt4 = VR4;
+ VectorRegister vt5 = VR5;
+ VectorRegister vaux = VR6;
+ VectorRegister vRb = VR6;
+ Register tmp = R8;
+ Register of16 = R8;
+ Register of32 = R9;
+ Label state_load_aligned;
+
+ // Load hptr
+ andi_ (tmp, hptr, 0xf);
+ li (of16, 16);
+ lvx (vt0, hptr);
+ lvx (vt5, of16, hptr);
+ beq (CCR0, state_load_aligned);
+
+ // handle unaligned accesses
+ li (of32, 32);
+ load_perm(vRb, hptr);
+
+ vec_perm(vt0, vt5, vRb); // vt0 = hptr[0]..hptr[3]
+
+ lvx (vt1, hptr, of32);
+ vec_perm(vt5, vt1, vRb); // vt5 = hptr[4]..hptr[7]
+
+ // aligned accesses
+ bind(state_load_aligned);
+
+#if defined(VM_LITTLE_ENDIAN)
+ vmrglw (vt1, b_, a); // vt1 = {a, b, ?, ?}
+ vmrglw (vt2, d, c); // vt2 = {c, d, ?, ?}
+ vmrglw (vt3, f, e); // vt3 = {e, f, ?, ?}
+ vmrglw (vt4, h, g); // vt4 = {g, h, ?, ?}
+ xxmrgld (vt1->to_vsr(), vt2->to_vsr(), vt1->to_vsr()); // vt1 = {a, b, c, d}
+ xxmrgld (vt3->to_vsr(), vt4->to_vsr(), vt3->to_vsr()); // vt3 = {e, f, g, h}
+ vadduwm (a, vt0, vt1); // a = {a+hptr[0], b+hptr[1], c+hptr[2], d+hptr[3]}
+ vadduwm (e, vt5, vt3); // e = {e+hptr[4], f+hptr[5], g+hptr[6], h+hptr[7]}
+
+ // Save hptr back, works for any alignment
+ xxswapd (vt0->to_vsr(), a->to_vsr());
+ stxvd2x (vt0->to_vsr(), hptr);
+ xxswapd (vt5->to_vsr(), e->to_vsr());
+ stxvd2x (vt5->to_vsr(), of16, hptr);
+#else
+ vmrglw (vt1, a, b_); // vt1 = {a, b, ?, ?}
+ vmrglw (vt2, c, d); // vt2 = {c, d, ?, ?}
+ vmrglw (vt3, e, f); // vt3 = {e, f, ?, ?}
+ vmrglw (vt4, g, h); // vt4 = {g, h, ?, ?}
+ xxmrgld (vt1->to_vsr(), vt1->to_vsr(), vt2->to_vsr()); // vt1 = {a, b, c, d}
+ xxmrgld (vt3->to_vsr(), vt3->to_vsr(), vt4->to_vsr()); // vt3 = {e, f, g, h}
+ vadduwm (d, vt0, vt1); // d = {a+hptr[0], b+hptr[1], c+hptr[2], d+hptr[3]}
+ vadduwm (h, vt5, vt3); // h = {e+hptr[4], f+hptr[5], g+hptr[6], h+hptr[7]}
+
+ // Save hptr back, works for any alignment
+ stxvd2x (d->to_vsr(), hptr);
+ stxvd2x (h->to_vsr(), of16, hptr);
+#endif
+}
+
+static const uint32_t sha256_round_table[64] __attribute((aligned(16))) = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
+};
+static const uint32_t *sha256_round_consts = sha256_round_table;
+
+// R3_ARG1 - byte[] Input string with padding but in Big Endian
+// R4_ARG2 - int[] SHA.state (at first, the root of primes)
+// R5_ARG3 - int offset
+// R6_ARG4 - int limit
+//
+// Internal Register usage:
+// R7 - k
+// R8 - tmp | j | of16
+// R9 - of32
+// VR0-VR8 - ch, maj, bsa, bse, vt0-vt3 | vt0-vt5, vaux/vRb
+// VR9-VR16 - a-h
+// VR17-VR20 - w0-w3
+// VR21-VR23 - vRb | vaux0-vaux2
+// VR24-VR27 - kpw0-kpw3
+void MacroAssembler::sha256(bool multi_block) {
+ static const ssize_t buf_size = 64;
+ static const uint8_t w_size = sizeof(sha256_round_table)/sizeof(uint32_t);
+#ifdef AIX
+ // malloc provides 16 byte alignment
+ if (((uintptr_t)sha256_round_consts & 0xF) != 0) {
+ uint32_t *new_round_consts = (uint32_t*)malloc(sizeof(sha256_round_table));
+ guarantee(new_round_consts, "oom");
+ memcpy(new_round_consts, sha256_round_consts, sizeof(sha256_round_table));
+ sha256_round_consts = (const uint32_t*)new_round_consts;
+ }
+#endif
+
+ Register buf_in = R3_ARG1;
+ Register state = R4_ARG2;
+ Register ofs = R5_ARG3;
+ Register limit = R6_ARG4;
+
+ Label sha_loop, core_loop;
+
+ // Save non-volatile vector registers in the red zone
+ static const VectorRegister nv[] = {
+ VR20, VR21, VR22, VR23, VR24, VR25, VR26, VR27/*, VR28, VR29, VR30, VR31*/
+ };
+ static const uint8_t nv_size = sizeof(nv) / sizeof (VectorRegister);
+
+ for (int c = 0; c < nv_size; c++) {
+ Register tmp = R8;
+ li (tmp, (c - (nv_size)) * 16);
+ stvx(nv[c], tmp, R1);
+ }
+
+ // Load hash state to registers
+ VectorRegister a = VR9;
+ VectorRegister b = VR10;
+ VectorRegister c = VR11;
+ VectorRegister d = VR12;
+ VectorRegister e = VR13;
+ VectorRegister f = VR14;
+ VectorRegister g = VR15;
+ VectorRegister h = VR16;
+ static const VectorRegister hs[] = {a, b, c, d, e, f, g, h};
+ static const int total_hs = sizeof(hs)/sizeof(VectorRegister);
+ // counter for cycling through hs vector to avoid register moves between iterations
+ int h_cnt = 0;
+
+ // Load a-h registers from the memory pointed by state
+#if defined(VM_LITTLE_ENDIAN)
+ sha256_load_h_vec(a, e, state);
+#else
+ sha256_load_h_vec(d, h, state);
+#endif
+
+ // keep k loaded also during MultiBlock loops
+ Register k = R7;
+ assert(((uintptr_t)sha256_round_consts & 0xF) == 0, "k alignment");
+ load_const_optimized(k, (address)sha256_round_consts, R0);
+
+ // Avoiding redundant loads
+ if (multi_block) {
+ align(OptoLoopAlignment);
+ }
+ bind(sha_loop);
+#if defined(VM_LITTLE_ENDIAN)
+ sha256_deque(a, b, c, d);
+ sha256_deque(e, f, g, h);
+#else
+ sha256_deque(d, c, b, a);
+ sha256_deque(h, g, f, e);
+#endif
+
+ // Load 16 elements from w out of the loop.
+ // Order of the int values is Endianess specific.
+ VectorRegister w0 = VR17;
+ VectorRegister w1 = VR18;
+ VectorRegister w2 = VR19;
+ VectorRegister w3 = VR20;
+ static const VectorRegister ws[] = {w0, w1, w2, w3};
+ static const int total_ws = sizeof(ws)/sizeof(VectorRegister);
+
+ VectorRegister kpw0 = VR24;
+ VectorRegister kpw1 = VR25;
+ VectorRegister kpw2 = VR26;
+ VectorRegister kpw3 = VR27;
+ static const VectorRegister kpws[] = {kpw0, kpw1, kpw2, kpw3};
+ static const int total_kpws = sizeof(kpws)/sizeof(VectorRegister);
+
+ sha256_load_w_plus_k_vec(buf_in, ws, total_ws, k, kpws, total_kpws);
+
+ // Cycle through the first 16 elements
+ assert(total_ws == total_kpws, "Redesign the loop below");
+ for (int n = 0; n < total_ws; n++) {
+ VectorRegister vaux0 = VR21;
+ VectorRegister vaux1 = VR22;
+ VectorRegister vaux2 = VR23;
+
+ sha256_deque(kpws[n], vaux0, vaux1, vaux2);
+
+#if defined(VM_LITTLE_ENDIAN)
+ sha256_round(hs, total_hs, h_cnt, kpws[n]);
+ sha256_round(hs, total_hs, h_cnt, vaux0);
+ sha256_round(hs, total_hs, h_cnt, vaux1);
+ sha256_round(hs, total_hs, h_cnt, vaux2);
+#else
+ sha256_round(hs, total_hs, h_cnt, vaux2);
+ sha256_round(hs, total_hs, h_cnt, vaux1);
+ sha256_round(hs, total_hs, h_cnt, vaux0);
+ sha256_round(hs, total_hs, h_cnt, kpws[n]);
+#endif
+ }
+
+ Register tmp = R8;
+ // loop the 16th to the 64th iteration by 8 steps
+ li (tmp, (w_size - 16) / total_hs);
+ mtctr(tmp);
+
+ // j will be aligned to 4 for loading words.
+ // Whenever read, advance the pointer (e.g: when j is used in a function)
+ Register j = R8;
+ li (j, 16*4);
+
+ align(OptoLoopAlignment);
+ bind(core_loop);
+
+ // due to VectorRegister rotate, always iterate in multiples of total_hs
+ for (int n = 0; n < total_hs/4; n++) {
+ sha256_calc_4w(w0, w1, w2, w3, kpw0, kpw1, kpw2, kpw3, j, k);
+ sha256_round(hs, total_hs, h_cnt, kpw0);
+ sha256_round(hs, total_hs, h_cnt, kpw1);
+ sha256_round(hs, total_hs, h_cnt, kpw2);
+ sha256_round(hs, total_hs, h_cnt, kpw3);
+ }
+
+ bdnz (core_loop);
+
+ // Update hash state
+ sha256_update_sha_state(a, b, c, d, e, f, g, h, state);
+
+ if (multi_block) {
+ addi(buf_in, buf_in, buf_size);
+ addi(ofs, ofs, buf_size);
+ cmplw(CCR0, ofs, limit);
+ ble(CCR0, sha_loop);
+
+ // return ofs
+ mr(R3_RET, ofs);
+ }
+
+ // Restore non-volatile registers
+ for (int c = 0; c < nv_size; c++) {
+ Register tmp = R8;
+ li (tmp, (c - (nv_size)) * 16);
+ lvx(nv[c], tmp, R1);
+ }
+}
+
+
+/**********************************************************************
+ * SHA 512
+ *********************************************************************/
+
+void MacroAssembler::sha512_load_w_vec(const Register buf_in,
+ const VectorRegister* ws,
+ const int total_ws) {
+ Register tmp = R8;
+ VectorRegister vRb = VR8;
+ VectorRegister aux = VR9;
+ Label is_aligned, after_alignment;
+
+ andi_ (tmp, buf_in, 0xF);
+ beq (CCR0, is_aligned); // address ends with 0x0, not 0x8
+
+ // deal with unaligned addresses
+ lvx (ws[0], buf_in);
+ load_perm(vRb, buf_in);
+
+ for (int n = 1; n < total_ws; n++) {
+ VectorRegister w_cur = ws[n];
+ VectorRegister w_prev = ws[n-1];
+ addi (tmp, buf_in, n * 16);
+ lvx (w_cur, tmp);
+ vec_perm(w_prev, w_cur, vRb);
+ }
+ addi (tmp, buf_in, total_ws * 16);
+ lvx (aux, tmp);
+ vec_perm(ws[total_ws-1], aux, vRb);
+ b (after_alignment);
+
+ bind(is_aligned);
+ lvx (ws[0], buf_in);
+ for (int n = 1; n < total_ws; n++) {
+ VectorRegister w = ws[n];
+ addi (tmp, buf_in, n * 16);
+ lvx (w, tmp);
+ }
+
+ bind(after_alignment);
+}
+
+// Update hash state
+void MacroAssembler::sha512_update_sha_state(const Register state,
+ const VectorRegister* hs,
+ const int total_hs) {
+
+#if defined(VM_LITTLE_ENDIAN)
+ int start_idx = 0;
+#else
+ int start_idx = 1;
+#endif
+
+ // load initial hash from the memory pointed by state
+ VectorRegister ini_a = VR10;
+ VectorRegister ini_c = VR12;
+ VectorRegister ini_e = VR14;
+ VectorRegister ini_g = VR16;
+ static const VectorRegister inis[] = {ini_a, ini_c, ini_e, ini_g};
+ static const int total_inis = sizeof(inis)/sizeof(VectorRegister);
+
+ Label state_save_aligned, after_state_save_aligned;
+
+ Register addr = R7;
+ Register tmp = R8;
+ VectorRegister vRb = VR8;
+ VectorRegister aux = VR9;
+
+ andi_(tmp, state, 0xf);
+ beq(CCR0, state_save_aligned);
+ // deal with unaligned addresses
+
+ {
+ VectorRegister a = hs[0];
+ VectorRegister b_ = hs[1];
+ VectorRegister c = hs[2];
+ VectorRegister d = hs[3];
+ VectorRegister e = hs[4];
+ VectorRegister f = hs[5];
+ VectorRegister g = hs[6];
+ VectorRegister h = hs[7];
+ load_perm(vRb, state);
+ lvx (ini_a, state);
+ addi (addr, state, 16);
+
+ lvx (ini_c, addr);
+ addi (addr, state, 32);
+ vec_perm(ini_a, ini_c, vRb);
+
+ lvx (ini_e, addr);
+ addi (addr, state, 48);
+ vec_perm(ini_c, ini_e, vRb);
+
+ lvx (ini_g, addr);
+ addi (addr, state, 64);
+ vec_perm(ini_e, ini_g, vRb);
+
+ lvx (aux, addr);
+ vec_perm(ini_g, aux, vRb);
+
+#if defined(VM_LITTLE_ENDIAN)
+ xxmrgld(a->to_vsr(), b_->to_vsr(), a->to_vsr());
+ xxmrgld(c->to_vsr(), d->to_vsr(), c->to_vsr());
+ xxmrgld(e->to_vsr(), f->to_vsr(), e->to_vsr());
+ xxmrgld(g->to_vsr(), h->to_vsr(), g->to_vsr());
+#else
+ xxmrgld(b_->to_vsr(), a->to_vsr(), b_->to_vsr());
+ xxmrgld(d->to_vsr(), c->to_vsr(), d->to_vsr());
+ xxmrgld(f->to_vsr(), e->to_vsr(), f->to_vsr());
+ xxmrgld(h->to_vsr(), g->to_vsr(), h->to_vsr());
+#endif
+
+ for (int n = start_idx; n < total_hs; n += 2) {
+ VectorRegister h_cur = hs[n];
+ VectorRegister ini_cur = inis[n/2];
+
+ vaddudm(h_cur, ini_cur, h_cur);
+ }
+
+ for (int n = start_idx; n < total_hs; n += 2) {
+ VectorRegister h_cur = hs[n];
+
+ mfvrd (tmp, h_cur);
+#if defined(VM_LITTLE_ENDIAN)
+ std (tmp, 8*n + 8, state);
+#else
+ std (tmp, 8*n - 8, state);
+#endif
+ vsldoi (aux, h_cur, h_cur, 8);
+ mfvrd (tmp, aux);
+ std (tmp, 8*n + 0, state);
+ }
+
+ b (after_state_save_aligned);
+ }
+
+ bind(state_save_aligned);
+ {
+ for (int n = 0; n < total_hs; n += 2) {
+#if defined(VM_LITTLE_ENDIAN)
+ VectorRegister h_cur = hs[n];
+ VectorRegister h_next = hs[n+1];
+#else
+ VectorRegister h_cur = hs[n+1];
+ VectorRegister h_next = hs[n];
+#endif
+ VectorRegister ini_cur = inis[n/2];
+
+ if (n/2 == 0) {
+ lvx(ini_cur, state);
+ } else {
+ addi(addr, state, (n/2) * 16);
+ lvx(ini_cur, addr);
+ }
+ xxmrgld(h_cur->to_vsr(), h_next->to_vsr(), h_cur->to_vsr());
+ }
+
+ for (int n = start_idx; n < total_hs; n += 2) {
+ VectorRegister h_cur = hs[n];
+ VectorRegister ini_cur = inis[n/2];
+
+ vaddudm(h_cur, ini_cur, h_cur);
+ }
+
+ for (int n = start_idx; n < total_hs; n += 2) {
+ VectorRegister h_cur = hs[n];
+
+ if (n/2 == 0) {
+ stvx(h_cur, state);
+ } else {
+ addi(addr, state, (n/2) * 16);
+ stvx(h_cur, addr);
+ }
+ }
+ }
+
+ bind(after_state_save_aligned);
+}
+
+// Use h_cnt to cycle through hs elements but also increment it at the end
+void MacroAssembler::sha512_round(const VectorRegister* hs,
+ const int total_hs, int& h_cnt,
+ const VectorRegister kpw) {
+
+ // convenience registers: cycle from 0-7 downwards
+ const VectorRegister a = hs[(total_hs + 0 - (h_cnt % total_hs)) % total_hs];
+ const VectorRegister b = hs[(total_hs + 1 - (h_cnt % total_hs)) % total_hs];
+ const VectorRegister c = hs[(total_hs + 2 - (h_cnt % total_hs)) % total_hs];
+ const VectorRegister d = hs[(total_hs + 3 - (h_cnt % total_hs)) % total_hs];
+ const VectorRegister e = hs[(total_hs + 4 - (h_cnt % total_hs)) % total_hs];
+ const VectorRegister f = hs[(total_hs + 5 - (h_cnt % total_hs)) % total_hs];
+ const VectorRegister g = hs[(total_hs + 6 - (h_cnt % total_hs)) % total_hs];
+ const VectorRegister h = hs[(total_hs + 7 - (h_cnt % total_hs)) % total_hs];
+ // temporaries
+ const VectorRegister Ch = VR20;
+ const VectorRegister Maj = VR21;
+ const VectorRegister bsa = VR22;
+ const VectorRegister bse = VR23;
+ const VectorRegister tmp1 = VR24;
+ const VectorRegister tmp2 = VR25;
+
+ vsel (Ch, g, f, e);
+ vxor (Maj, a, b);
+ vshasigmad(bse, e, 1, 0xf);
+ vaddudm (tmp2, Ch, kpw);
+ vaddudm (tmp1, h, bse);
+ vsel (Maj, b, c, Maj);
+ vaddudm (tmp1, tmp1, tmp2);
+ vshasigmad(bsa, a, 1, 0);
+ vaddudm (tmp2, bsa, Maj);
+ vaddudm (d, d, tmp1);
+ vaddudm (h, tmp1, tmp2);
+
+ // advance vector pointer to the next iteration
+ h_cnt++;
+}
+
+void MacroAssembler::sha512_calc_2w(const VectorRegister w0,
+ const VectorRegister w1,
+ const VectorRegister w2,
+ const VectorRegister w3,
+ const VectorRegister w4,
+ const VectorRegister w5,
+ const VectorRegister w6,
+ const VectorRegister w7,
+ const VectorRegister kpw0,
+ const VectorRegister kpw1,
+ const Register j,
+ const VectorRegister vRb,
+ const Register k) {
+ // Temporaries
+ const VectorRegister VR_a = VR20;
+ const VectorRegister VR_b = VR21;
+ const VectorRegister VR_c = VR22;
+ const VectorRegister VR_d = VR23;
+
+ // load to k[j]
+ lvx (VR_a, j, k);
+ // advance j
+ addi (j, j, 16); // 16 bytes were read
+
+#if defined(VM_LITTLE_ENDIAN)
+ // v6 = w[j-15], w[j-14]
+ vperm (VR_b, w1, w0, vRb);
+ // v12 = w[j-7], w[j-6]
+ vperm (VR_c, w5, w4, vRb);
+#else
+ // v6 = w[j-15], w[j-14]
+ vperm (VR_b, w0, w1, vRb);
+ // v12 = w[j-7], w[j-6]
+ vperm (VR_c, w4, w5, vRb);
+#endif
+
+ // v6 = s0(w[j-15]) , s0(w[j-14])
+ vshasigmad (VR_b, VR_b, 0, 0);
+ // v5 = s1(w[j-2]) , s1(w[j-1])
+ vshasigmad (VR_d, w7, 0, 0xf);
+ // v6 = s0(w[j-15]) + w[j-7] , s0(w[j-14]) + w[j-6]
+ vaddudm (VR_b, VR_b, VR_c);
+ // v8 = s1(w[j-2]) + w[j-16] , s1(w[j-1]) + w[j-15]
+ vaddudm (VR_d, VR_d, w0);
+ // v9 = s0(w[j-15]) + w[j-7] + w[j-16] + s1(w[j-2]), // w[j]
+ // s0(w[j-14]) + w[j-6] + w[j-15] + s1(w[j-1]), // w[j+1]
+ vaddudm (VR_c, VR_d, VR_b);
+ // Updating w0 to w7 to hold the new previous 16 values from w.
+ vmr (w0, w1);
+ vmr (w1, w2);
+ vmr (w2, w3);
+ vmr (w3, w4);
+ vmr (w4, w5);
+ vmr (w5, w6);
+ vmr (w6, w7);
+ vmr (w7, VR_c);
+
+#if defined(VM_LITTLE_ENDIAN)
+ // store k + w to kpw0 (2 values at once)
+ vaddudm (kpw0, VR_c, VR_a);
+ // kpw1 holds (k + w)[1]
+ vsldoi (kpw1, kpw0, kpw0, 8);
+#else
+ // store k + w to kpw0 (2 values at once)
+ vaddudm (kpw1, VR_c, VR_a);
+ // kpw1 holds (k + w)[1]
+ vsldoi (kpw0, kpw1, kpw1, 8);
+#endif
+}
+
+void MacroAssembler::sha512_load_h_vec(const Register state,
+ const VectorRegister* hs,
+ const int total_hs) {
+#if defined(VM_LITTLE_ENDIAN)
+ VectorRegister a = hs[0];
+ VectorRegister g = hs[6];
+ int start_idx = 0;
+#else
+ VectorRegister a = hs[1];
+ VectorRegister g = hs[7];
+ int start_idx = 1;
+#endif
+
+ Register addr = R7;
+ VectorRegister vRb = VR8;
+ Register tmp = R8;
+ Label state_aligned, after_state_aligned;
+
+ andi_(tmp, state, 0xf);
+ beq(CCR0, state_aligned);
+
+ // deal with unaligned addresses
+ VectorRegister aux = VR9;
+
+ lvx(hs[start_idx], state);
+ load_perm(vRb, state);
+
+ for (int n = start_idx + 2; n < total_hs; n += 2) {
+ VectorRegister h_cur = hs[n];
+ VectorRegister h_prev2 = hs[n - 2];
+ addi(addr, state, (n/2) * 16);
+ lvx(h_cur, addr);
+ vec_perm(h_prev2, h_cur, vRb);
+ }
+ addi(addr, state, (total_hs/2) * 16);
+ lvx (aux, addr);
+ vec_perm(hs[total_hs - 2 + start_idx], aux, vRb);
+ b (after_state_aligned);
+
+ bind(state_aligned);
+
+ // deal with aligned addresses
+ lvx(hs[start_idx], state);
+
+ for (int n = start_idx + 2; n < total_hs; n += 2) {
+ VectorRegister h_cur = hs[n];
+ addi(addr, state, (n/2) * 16);
+ lvx(h_cur, addr);
+ }
+
+ bind(after_state_aligned);
+}
+
+static const uint64_t sha512_round_table[80] __attribute((aligned(16))) = {
+ 0x428a2f98d728ae22, 0x7137449123ef65cd,
+ 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
+ 0x3956c25bf348b538, 0x59f111f1b605d019,
+ 0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
+ 0xd807aa98a3030242, 0x12835b0145706fbe,
+ 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
+ 0x72be5d74f27b896f, 0x80deb1fe3b1696b1,
+ 0x9bdc06a725c71235, 0xc19bf174cf692694,
+ 0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
+ 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
+ 0x2de92c6f592b0275, 0x4a7484aa6ea6e483,
+ 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
+ 0x983e5152ee66dfab, 0xa831c66d2db43210,
+ 0xb00327c898fb213f, 0xbf597fc7beef0ee4,
+ 0xc6e00bf33da88fc2, 0xd5a79147930aa725,
+ 0x06ca6351e003826f, 0x142929670a0e6e70,
+ 0x27b70a8546d22ffc, 0x2e1b21385c26c926,
+ 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
+ 0x650a73548baf63de, 0x766a0abb3c77b2a8,
+ 0x81c2c92e47edaee6, 0x92722c851482353b,
+ 0xa2bfe8a14cf10364, 0xa81a664bbc423001,
+ 0xc24b8b70d0f89791, 0xc76c51a30654be30,
+ 0xd192e819d6ef5218, 0xd69906245565a910,
+ 0xf40e35855771202a, 0x106aa07032bbd1b8,
+ 0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
+ 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
+ 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
+ 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
+ 0x748f82ee5defb2fc, 0x78a5636f43172f60,
+ 0x84c87814a1f0ab72, 0x8cc702081a6439ec,
+ 0x90befffa23631e28, 0xa4506cebde82bde9,
+ 0xbef9a3f7b2c67915, 0xc67178f2e372532b,
+ 0xca273eceea26619c, 0xd186b8c721c0c207,
+ 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
+ 0x06f067aa72176fba, 0x0a637dc5a2c898a6,
+ 0x113f9804bef90dae, 0x1b710b35131c471b,
+ 0x28db77f523047d84, 0x32caab7b40c72493,
+ 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
+ 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
+ 0x5fcb6fab3ad6faec, 0x6c44198c4a475817,
+};
+static const uint64_t *sha512_round_consts = sha512_round_table;
+
+// R3_ARG1 - byte[] Input string with padding but in Big Endian
+// R4_ARG2 - int[] SHA.state (at first, the root of primes)
+// R5_ARG3 - int offset
+// R6_ARG4 - int limit
+//
+// Internal Register usage:
+// R7 R8 R9 - volatile temporaries
+// VR0-VR7 - a-h
+// VR8 - vRb
+// VR9 - aux (highly volatile, use with care)
+// VR10-VR17 - w0-w7 | ini_a-ini_h
+// VR18 - vsp16 | kplusw0
+// VR19 - vsp32 | kplusw1
+// VR20-VR25 - sha512_calc_2w and sha512_round temporaries
+void MacroAssembler::sha512(bool multi_block) {
+ static const ssize_t buf_size = 128;
+ static const uint8_t w_size = sizeof(sha512_round_table)/sizeof(uint64_t);
+#ifdef AIX
+ // malloc provides 16 byte alignment
+ if (((uintptr_t)sha512_round_consts & 0xF) != 0) {
+ uint64_t *new_round_consts = (uint64_t*)malloc(sizeof(sha512_round_table));
+ guarantee(new_round_consts, "oom");
+ memcpy(new_round_consts, sha512_round_consts, sizeof(sha512_round_table));
+ sha512_round_consts = (const uint64_t*)new_round_consts;
+ }
+#endif
+
+ Register buf_in = R3_ARG1;
+ Register state = R4_ARG2;
+ Register ofs = R5_ARG3;
+ Register limit = R6_ARG4;
+
+ Label sha_loop, core_loop;
+
+ // Save non-volatile vector registers in the red zone
+ static const VectorRegister nv[] = {
+ VR20, VR21, VR22, VR23, VR24, VR25/*, VR26, VR27, VR28, VR29, VR30, VR31*/
+ };
+ static const uint8_t nv_size = sizeof(nv) / sizeof (VectorRegister);
+
+ for (int c = 0; c < nv_size; c++) {
+ Register idx = R7;
+ li (idx, (c - (nv_size)) * 16);
+ stvx(nv[c], idx, R1);
+ }
+
+ // Load hash state to registers
+ VectorRegister a = VR0;
+ VectorRegister b = VR1;
+ VectorRegister c = VR2;
+ VectorRegister d = VR3;
+ VectorRegister e = VR4;
+ VectorRegister f = VR5;
+ VectorRegister g = VR6;
+ VectorRegister h = VR7;
+ static const VectorRegister hs[] = {a, b, c, d, e, f, g, h};
+ static const int total_hs = sizeof(hs)/sizeof(VectorRegister);
+ // counter for cycling through hs vector to avoid register moves between iterations
+ int h_cnt = 0;
+
+ // Load a-h registers from the memory pointed by state
+ sha512_load_h_vec(state, hs, total_hs);
+
+ Register k = R9;
+ assert(((uintptr_t)sha512_round_consts & 0xF) == 0, "k alignment");
+ load_const_optimized(k, (address)sha512_round_consts, R0);
+
+ if (multi_block) {
+ align(OptoLoopAlignment);
+ }
+ bind(sha_loop);
+
+ for (int n = 0; n < total_hs; n += 2) {
+#if defined(VM_LITTLE_ENDIAN)
+ VectorRegister h_cur = hs[n];
+ VectorRegister h_next = hs[n + 1];
+#else
+ VectorRegister h_cur = hs[n + 1];
+ VectorRegister h_next = hs[n];
+#endif
+ vsldoi (h_next, h_cur, h_cur, 8);
+ }
+
+ // Load 16 elements from w out of the loop.
+ // Order of the long values is Endianess specific.
+ VectorRegister w0 = VR10;
+ VectorRegister w1 = VR11;
+ VectorRegister w2 = VR12;
+ VectorRegister w3 = VR13;
+ VectorRegister w4 = VR14;
+ VectorRegister w5 = VR15;
+ VectorRegister w6 = VR16;
+ VectorRegister w7 = VR17;
+ static const VectorRegister ws[] = {w0, w1, w2, w3, w4, w5, w6, w7};
+ static const int total_ws = sizeof(ws)/sizeof(VectorRegister);
+
+ // Load 16 w into vectors and setup vsl for vperm
+ sha512_load_w_vec(buf_in, ws, total_ws);
+
+#if defined(VM_LITTLE_ENDIAN)
+ VectorRegister vsp16 = VR18;
+ VectorRegister vsp32 = VR19;
+ VectorRegister shiftarg = VR9;
+
+ vspltisw(vsp16, 8);
+ vspltisw(shiftarg, 1);
+ vsl (vsp16, vsp16, shiftarg);
+ vsl (vsp32, vsp16, shiftarg);
+
+ VectorRegister vsp8 = VR9;
+ vspltish(vsp8, 8);
+
+ // Convert input from Big Endian to Little Endian
+ for (int c = 0; c < total_ws; c++) {
+ VectorRegister w = ws[c];
+ vrlh (w, w, vsp8);
+ }
+ for (int c = 0; c < total_ws; c++) {
+ VectorRegister w = ws[c];
+ vrlw (w, w, vsp16);
+ }
+ for (int c = 0; c < total_ws; c++) {
+ VectorRegister w = ws[c];
+ vrld (w, w, vsp32);
+ }
+#endif
+
+ Register Rb = R10;
+ VectorRegister vRb = VR8;
+ li (Rb, 8);
+ load_perm(vRb, Rb);
+
+ VectorRegister kplusw0 = VR18;
+ VectorRegister kplusw1 = VR19;
+
+ Register addr = R7;
+
+ for (int n = 0; n < total_ws; n++) {
+ VectorRegister w = ws[n];
+
+ if (n == 0) {
+ lvx (kplusw0, k);
+ } else {
+ addi (addr, k, n * 16);
+ lvx (kplusw0, addr);
+ }
+#if defined(VM_LITTLE_ENDIAN)
+ vaddudm(kplusw0, kplusw0, w);
+ vsldoi (kplusw1, kplusw0, kplusw0, 8);
+#else
+ vaddudm(kplusw1, kplusw0, w);
+ vsldoi (kplusw0, kplusw1, kplusw1, 8);
+#endif
+
+ sha512_round(hs, total_hs, h_cnt, kplusw0);
+ sha512_round(hs, total_hs, h_cnt, kplusw1);
+ }
+
+ Register tmp = R8;
+ li (tmp, (w_size-16)/total_hs);
+ mtctr (tmp);
+ // j will be aligned to 4 for loading words.
+ // Whenever read, advance the pointer (e.g: when j is used in a function)
+ Register j = tmp;
+ li (j, 8*16);
+
+ align(OptoLoopAlignment);
+ bind(core_loop);
+
+ // due to VectorRegister rotate, always iterate in multiples of total_hs
+ for (int n = 0; n < total_hs/2; n++) {
+ sha512_calc_2w(w0, w1, w2, w3, w4, w5, w6, w7, kplusw0, kplusw1, j, vRb, k);
+ sha512_round(hs, total_hs, h_cnt, kplusw0);
+ sha512_round(hs, total_hs, h_cnt, kplusw1);
+ }
+
+ bdnz (core_loop);
+
+ sha512_update_sha_state(state, hs, total_hs);
+
+ if (multi_block) {
+ addi(buf_in, buf_in, buf_size);
+ addi(ofs, ofs, buf_size);
+ cmplw(CCR0, ofs, limit);
+ ble(CCR0, sha_loop);
+
+ // return ofs
+ mr(R3_RET, ofs);
+ }
+
+ // Restore non-volatile registers
+ for (int c = 0; c < nv_size; c++) {
+ Register idx = R7;
+ li (idx, (c - (nv_size)) * 16);
+ lvx(nv[c], idx, R1);
+ }
+}
diff --git a/src/hotspot/cpu/ppc/nativeInst_ppc.cpp b/src/hotspot/cpu/ppc/nativeInst_ppc.cpp
index 70cd4ebbd91..6d6128d416e 100644
--- a/src/hotspot/cpu/ppc/nativeInst_ppc.cpp
+++ b/src/hotspot/cpu/ppc/nativeInst_ppc.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -221,13 +221,13 @@ address NativeMovConstReg::set_data_plain(intptr_t data, CodeBlob *cb) {
// A calculation relative to the global TOC.
if (MacroAssembler::get_address_of_calculate_address_from_global_toc_at(addr, cb->content_begin()) !=
(address)data) {
- const int invalidated_range =
- MacroAssembler::patch_calculate_address_from_global_toc_at(addr, cb->content_begin(),
+ const address inst2_addr = addr;
+ const address inst1_addr =
+ MacroAssembler::patch_calculate_address_from_global_toc_at(inst2_addr, cb->content_begin(),
(address)data);
- const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
- // FIXME:
- const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
- ICache::ppc64_flush_icache_bytes(start, range);
+ assert(inst1_addr != NULL && inst1_addr < inst2_addr, "first instruction must be found");
+ const int range = inst2_addr - inst1_addr + BytesPerInstWord;
+ ICache::ppc64_flush_icache_bytes(inst1_addr, range);
}
next_address = addr + 1 * BytesPerInstWord;
} else if (MacroAssembler::is_load_const_at(addr)) {
@@ -288,15 +288,15 @@ void NativeMovConstReg::set_data(intptr_t data) {
}
void NativeMovConstReg::set_narrow_oop(narrowOop data, CodeBlob *code /* = NULL */) {
- address addr = addr_at(0);
+ address inst2_addr = addr_at(0);
CodeBlob* cb = (code) ? code : CodeCache::find_blob(instruction_address());
- if (MacroAssembler::get_narrow_oop(addr, cb->content_begin()) == (long)data) return;
- const int invalidated_range =
- MacroAssembler::patch_set_narrow_oop(addr, cb->content_begin(), (long)data);
- const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
- // FIXME:
- const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
- ICache::ppc64_flush_icache_bytes(start, range);
+ if (MacroAssembler::get_narrow_oop(inst2_addr, cb->content_begin()) == (long)data)
+ return;
+ const address inst1_addr =
+ MacroAssembler::patch_set_narrow_oop(inst2_addr, cb->content_begin(), (long)data);
+ assert(inst1_addr != NULL && inst1_addr < inst2_addr, "first instruction must be found");
+ const int range = inst2_addr - inst1_addr + BytesPerInstWord;
+ ICache::ppc64_flush_icache_bytes(inst1_addr, range);
}
// Do not use an assertion here. Let clients decide whether they only
diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad
index de0d6088460..789c91f2005 100644
--- a/src/hotspot/cpu/ppc/ppc.ad
+++ b/src/hotspot/cpu/ppc/ppc.ad
@@ -254,6 +254,73 @@ register %{
reg_def SR_SPEFSCR(SOC, SOC, Op_RegP, 4, SR_SPEFSCR->as_VMReg()); // v
reg_def SR_PPR( SOC, SOC, Op_RegP, 5, SR_PPR->as_VMReg()); // v
+// ----------------------------
+// Vector-Scalar Registers
+// ----------------------------
+ reg_def VSR0 ( SOC, SOC, Op_VecX, 0, NULL);
+ reg_def VSR1 ( SOC, SOC, Op_VecX, 1, NULL);
+ reg_def VSR2 ( SOC, SOC, Op_VecX, 2, NULL);
+ reg_def VSR3 ( SOC, SOC, Op_VecX, 3, NULL);
+ reg_def VSR4 ( SOC, SOC, Op_VecX, 4, NULL);
+ reg_def VSR5 ( SOC, SOC, Op_VecX, 5, NULL);
+ reg_def VSR6 ( SOC, SOC, Op_VecX, 6, NULL);
+ reg_def VSR7 ( SOC, SOC, Op_VecX, 7, NULL);
+ reg_def VSR8 ( SOC, SOC, Op_VecX, 8, NULL);
+ reg_def VSR9 ( SOC, SOC, Op_VecX, 9, NULL);
+ reg_def VSR10 ( SOC, SOC, Op_VecX, 10, NULL);
+ reg_def VSR11 ( SOC, SOC, Op_VecX, 11, NULL);
+ reg_def VSR12 ( SOC, SOC, Op_VecX, 12, NULL);
+ reg_def VSR13 ( SOC, SOC, Op_VecX, 13, NULL);
+ reg_def VSR14 ( SOC, SOC, Op_VecX, 14, NULL);
+ reg_def VSR15 ( SOC, SOC, Op_VecX, 15, NULL);
+ reg_def VSR16 ( SOC, SOC, Op_VecX, 16, NULL);
+ reg_def VSR17 ( SOC, SOC, Op_VecX, 17, NULL);
+ reg_def VSR18 ( SOC, SOC, Op_VecX, 18, NULL);
+ reg_def VSR19 ( SOC, SOC, Op_VecX, 19, NULL);
+ reg_def VSR20 ( SOC, SOC, Op_VecX, 20, NULL);
+ reg_def VSR21 ( SOC, SOC, Op_VecX, 21, NULL);
+ reg_def VSR22 ( SOC, SOC, Op_VecX, 22, NULL);
+ reg_def VSR23 ( SOC, SOC, Op_VecX, 23, NULL);
+ reg_def VSR24 ( SOC, SOC, Op_VecX, 24, NULL);
+ reg_def VSR25 ( SOC, SOC, Op_VecX, 25, NULL);
+ reg_def VSR26 ( SOC, SOC, Op_VecX, 26, NULL);
+ reg_def VSR27 ( SOC, SOC, Op_VecX, 27, NULL);
+ reg_def VSR28 ( SOC, SOC, Op_VecX, 28, NULL);
+ reg_def VSR29 ( SOC, SOC, Op_VecX, 29, NULL);
+ reg_def VSR30 ( SOC, SOC, Op_VecX, 30, NULL);
+ reg_def VSR31 ( SOC, SOC, Op_VecX, 31, NULL);
+ reg_def VSR32 ( SOC, SOC, Op_VecX, 32, NULL);
+ reg_def VSR33 ( SOC, SOC, Op_VecX, 33, NULL);
+ reg_def VSR34 ( SOC, SOC, Op_VecX, 34, NULL);
+ reg_def VSR35 ( SOC, SOC, Op_VecX, 35, NULL);
+ reg_def VSR36 ( SOC, SOC, Op_VecX, 36, NULL);
+ reg_def VSR37 ( SOC, SOC, Op_VecX, 37, NULL);
+ reg_def VSR38 ( SOC, SOC, Op_VecX, 38, NULL);
+ reg_def VSR39 ( SOC, SOC, Op_VecX, 39, NULL);
+ reg_def VSR40 ( SOC, SOC, Op_VecX, 40, NULL);
+ reg_def VSR41 ( SOC, SOC, Op_VecX, 41, NULL);
+ reg_def VSR42 ( SOC, SOC, Op_VecX, 42, NULL);
+ reg_def VSR43 ( SOC, SOC, Op_VecX, 43, NULL);
+ reg_def VSR44 ( SOC, SOC, Op_VecX, 44, NULL);
+ reg_def VSR45 ( SOC, SOC, Op_VecX, 45, NULL);
+ reg_def VSR46 ( SOC, SOC, Op_VecX, 46, NULL);
+ reg_def VSR47 ( SOC, SOC, Op_VecX, 47, NULL);
+ reg_def VSR48 ( SOC, SOC, Op_VecX, 48, NULL);
+ reg_def VSR49 ( SOC, SOC, Op_VecX, 49, NULL);
+ reg_def VSR50 ( SOC, SOC, Op_VecX, 50, NULL);
+ reg_def VSR51 ( SOC, SOC, Op_VecX, 51, NULL);
+ reg_def VSR52 ( SOC, SOC, Op_VecX, 52, NULL);
+ reg_def VSR53 ( SOC, SOC, Op_VecX, 53, NULL);
+ reg_def VSR54 ( SOC, SOC, Op_VecX, 54, NULL);
+ reg_def VSR55 ( SOC, SOC, Op_VecX, 55, NULL);
+ reg_def VSR56 ( SOC, SOC, Op_VecX, 56, NULL);
+ reg_def VSR57 ( SOC, SOC, Op_VecX, 57, NULL);
+ reg_def VSR58 ( SOC, SOC, Op_VecX, 58, NULL);
+ reg_def VSR59 ( SOC, SOC, Op_VecX, 59, NULL);
+ reg_def VSR60 ( SOC, SOC, Op_VecX, 60, NULL);
+ reg_def VSR61 ( SOC, SOC, Op_VecX, 61, NULL);
+ reg_def VSR62 ( SOC, SOC, Op_VecX, 62, NULL);
+ reg_def VSR63 ( SOC, SOC, Op_VecX, 63, NULL);
// ----------------------------
// Specify priority of register selection within phases of register
@@ -385,6 +452,73 @@ alloc_class chunk2 (
);
alloc_class chunk3 (
+ VSR0,
+ VSR1,
+ VSR2,
+ VSR3,
+ VSR4,
+ VSR5,
+ VSR6,
+ VSR7,
+ VSR8,
+ VSR9,
+ VSR10,
+ VSR11,
+ VSR12,
+ VSR13,
+ VSR14,
+ VSR15,
+ VSR16,
+ VSR17,
+ VSR18,
+ VSR19,
+ VSR20,
+ VSR21,
+ VSR22,
+ VSR23,
+ VSR24,
+ VSR25,
+ VSR26,
+ VSR27,
+ VSR28,
+ VSR29,
+ VSR30,
+ VSR31,
+ VSR32,
+ VSR33,
+ VSR34,
+ VSR35,
+ VSR36,
+ VSR37,
+ VSR38,
+ VSR39,
+ VSR40,
+ VSR41,
+ VSR42,
+ VSR43,
+ VSR44,
+ VSR45,
+ VSR46,
+ VSR47,
+ VSR48,
+ VSR49,
+ VSR50,
+ VSR51,
+ VSR52,
+ VSR53,
+ VSR54,
+ VSR55,
+ VSR56,
+ VSR57,
+ VSR58,
+ VSR59,
+ VSR60,
+ VSR61,
+ VSR62,
+ VSR63
+);
+
+alloc_class chunk4 (
// special registers
// These registers are not allocated, but used for nodes generated by postalloc expand.
SR_XER,
@@ -769,6 +903,45 @@ reg_class dbl_reg(
F31, F31_H // nv!
);
+// ----------------------------
+// Vector-Scalar Register Class
+// ----------------------------
+
+reg_class vs_reg(
+ VSR32,
+ VSR33,
+ VSR34,
+ VSR35,
+ VSR36,
+ VSR37,
+ VSR38,
+ VSR39,
+ VSR40,
+ VSR41,
+ VSR42,
+ VSR43,
+ VSR44,
+ VSR45,
+ VSR46,
+ VSR47,
+ VSR48,
+ VSR49,
+ VSR50,
+ VSR51
+// VSR52, // nv!
+// VSR53, // nv!
+// VSR54, // nv!
+// VSR55, // nv!
+// VSR56, // nv!
+// VSR57, // nv!
+// VSR58, // nv!
+// VSR59, // nv!
+// VSR60, // nv!
+// VSR61, // nv!
+// VSR62, // nv!
+// VSR63 // nv!
+);
+
%}
//----------DEFINITION BLOCK---------------------------------------------------
@@ -1502,7 +1675,7 @@ static enum RC rc_class(OptoReg::Name reg) {
if (reg < 64+64) return rc_float;
// Between float regs & stack are the flags regs.
- assert(OptoReg::is_stack(reg), "blow up if spilling flags");
+ assert(OptoReg::is_stack(reg) || reg < 64+64+64, "blow up if spilling flags");
return rc_stack;
}
@@ -2048,14 +2221,24 @@ const bool Matcher::convL2FSupported(void) {
// Vector width in bytes.
const int Matcher::vector_width_in_bytes(BasicType bt) {
- assert(MaxVectorSize == 8, "");
- return 8;
+ if (SuperwordUseVSX) {
+ assert(MaxVectorSize == 16, "");
+ return 16;
+ } else {
+ assert(MaxVectorSize == 8, "");
+ return 8;
+ }
}
// Vector ideal reg.
const uint Matcher::vector_ideal_reg(int size) {
- assert(MaxVectorSize == 8 && size == 8, "");
- return Op_RegL;
+ if (SuperwordUseVSX) {
+ assert(MaxVectorSize == 16 && size == 16, "");
+ return Op_VecX;
+ } else {
+ assert(MaxVectorSize == 8 && size == 8, "");
+ return Op_RegL;
+ }
}
const uint Matcher::vector_shift_count_ideal_reg(int size) {
@@ -2075,7 +2258,7 @@ const int Matcher::min_vector_size(const BasicType bt) {
// PPC doesn't support misaligned vectors store/load.
const bool Matcher::misaligned_vectors_ok() {
- return false;
+ return !AlignVector; // can be changed by flag
}
// PPC AES support not yet implemented
@@ -2217,10 +2400,31 @@ const MachRegisterNumbers farg_reg[13] = {
F13_num
};
+const MachRegisterNumbers vsarg_reg[64] = {
+ VSR0_num, VSR1_num, VSR2_num, VSR3_num,
+ VSR4_num, VSR5_num, VSR6_num, VSR7_num,
+ VSR8_num, VSR9_num, VSR10_num, VSR11_num,
+ VSR12_num, VSR13_num, VSR14_num, VSR15_num,
+ VSR16_num, VSR17_num, VSR18_num, VSR19_num,
+ VSR20_num, VSR21_num, VSR22_num, VSR23_num,
+ VSR24_num, VSR23_num, VSR24_num, VSR25_num,
+ VSR28_num, VSR29_num, VSR30_num, VSR31_num,
+ VSR32_num, VSR33_num, VSR34_num, VSR35_num,
+ VSR36_num, VSR37_num, VSR38_num, VSR39_num,
+ VSR40_num, VSR41_num, VSR42_num, VSR43_num,
+ VSR44_num, VSR45_num, VSR46_num, VSR47_num,
+ VSR48_num, VSR49_num, VSR50_num, VSR51_num,
+ VSR52_num, VSR53_num, VSR54_num, VSR55_num,
+ VSR56_num, VSR57_num, VSR58_num, VSR59_num,
+ VSR60_num, VSR61_num, VSR62_num, VSR63_num
+};
+
const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]);
const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]);
+const int num_vsarg_registers = sizeof(vsarg_reg) / sizeof(vsarg_reg[0]);
+
// Return whether or not this register is ever used as an argument. This
// function is used on startup to build the trampoline stubs in generateOptoStub.
// Registers not mentioned will be killed by the VM call in the trampoline, and
@@ -2552,6 +2756,115 @@ loadConLNodesTuple loadConLNodesTuple_create(PhaseRegAlloc *ra_, Node *toc, immL
return nodes;
}
+typedef struct {
+ loadConL_hiNode *_large_hi;
+ loadConL_loNode *_large_lo;
+ mtvsrdNode *_moved;
+ xxspltdNode *_replicated;
+ loadConLNode *_small;
+ MachNode *_last;
+} loadConLReplicatedNodesTuple;
+
+loadConLReplicatedNodesTuple loadConLReplicatedNodesTuple_create(Compile *C, PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc,
+ vecXOper *dst, immI_0Oper *zero,
+ OptoReg::Name reg_second, OptoReg::Name reg_first,
+ OptoReg::Name reg_vec_second, OptoReg::Name reg_vec_first) {
+ loadConLReplicatedNodesTuple nodes;
+
+ const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
+ if (large_constant_pool) {
+ // Create new nodes.
+ loadConL_hiNode *m1 = new loadConL_hiNode();
+ loadConL_loNode *m2 = new loadConL_loNode();
+ mtvsrdNode *m3 = new mtvsrdNode();
+ xxspltdNode *m4 = new xxspltdNode();
+
+ // inputs for new nodes
+ m1->add_req(NULL, toc);
+ m2->add_req(NULL, m1);
+ m3->add_req(NULL, m2);
+ m4->add_req(NULL, m3);
+
+ // operands for new nodes
+ m1->_opnds[0] = new iRegLdstOper(); // dst
+ m1->_opnds[1] = immSrc; // src
+ m1->_opnds[2] = new iRegPdstOper(); // toc
+
+ m2->_opnds[0] = new iRegLdstOper(); // dst
+ m2->_opnds[1] = immSrc; // src
+ m2->_opnds[2] = new iRegLdstOper(); // base
+
+ m3->_opnds[0] = new vecXOper(); // dst
+ m3->_opnds[1] = new iRegLdstOper(); // src
+
+ m4->_opnds[0] = new vecXOper(); // dst
+ m4->_opnds[1] = new vecXOper(); // src
+ m4->_opnds[2] = zero;
+
+ // Initialize ins_attrib TOC fields.
+ m1->_const_toc_offset = -1;
+ m2->_const_toc_offset_hi_node = m1;
+
+ // Initialize ins_attrib instruction offset.
+ m1->_cbuf_insts_offset = -1;
+
+ // register allocation for new nodes
+ ra_->set_pair(m1->_idx, reg_second, reg_first);
+ ra_->set_pair(m2->_idx, reg_second, reg_first);
+ ra_->set1(m3->_idx, reg_second);
+ ra_->set2(m3->_idx, reg_vec_first);
+ ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
+
+ // Create result.
+ nodes._large_hi = m1;
+ nodes._large_lo = m2;
+ nodes._moved = m3;
+ nodes._replicated = m4;
+ nodes._small = NULL;
+ nodes._last = nodes._replicated;
+ assert(m2->bottom_type()->isa_long(), "must be long");
+ } else {
+ loadConLNode *m2 = new loadConLNode();
+ mtvsrdNode *m3 = new mtvsrdNode();
+ xxspltdNode *m4 = new xxspltdNode();
+
+ // inputs for new nodes
+ m2->add_req(NULL, toc);
+
+ // operands for new nodes
+ m2->_opnds[0] = new iRegLdstOper(); // dst
+ m2->_opnds[1] = immSrc; // src
+ m2->_opnds[2] = new iRegPdstOper(); // toc
+
+ m3->_opnds[0] = new vecXOper(); // dst
+ m3->_opnds[1] = new iRegLdstOper(); // src
+
+ m4->_opnds[0] = new vecXOper(); // dst
+ m4->_opnds[1] = new vecXOper(); // src
+ m4->_opnds[2] = zero;
+
+ // Initialize ins_attrib instruction offset.
+ m2->_cbuf_insts_offset = -1;
+ ra_->set1(m3->_idx, reg_second);
+ ra_->set2(m3->_idx, reg_vec_first);
+ ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
+
+ // register allocation for new nodes
+ ra_->set_pair(m2->_idx, reg_second, reg_first);
+
+ // Create result.
+ nodes._large_hi = NULL;
+ nodes._large_lo = NULL;
+ nodes._small = m2;
+ nodes._moved = m3;
+ nodes._replicated = m4;
+ nodes._last = nodes._replicated;
+ assert(m2->bottom_type()->isa_long(), "must be long");
+ }
+
+ return nodes;
+}
+
%} // source
encode %{
@@ -3212,6 +3525,27 @@ encode %{
assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
%}
+ enc_class postalloc_expand_load_replF_constant_vsx(vecX dst, immF src, iRegLdst toc) %{
+ // Create new nodes.
+
+ // Make an operand with the bit pattern to load as float.
+ immLOper *op_repl = new immLOper((jlong)replicate_immF(op_src->constantF()));
+ immI_0Oper *op_zero = new immI_0Oper(0);
+
+ loadConLReplicatedNodesTuple loadConLNodes =
+ loadConLReplicatedNodesTuple_create(C, ra_, n_toc, op_repl, op_dst, op_zero,
+ OptoReg::Name(R20_H_num), OptoReg::Name(R20_num),
+ OptoReg::Name(VSR11_num), OptoReg::Name(VSR10_num));
+
+ // Push new nodes.
+ if (loadConLNodes._large_hi) { nodes->push(loadConLNodes._large_hi); }
+ if (loadConLNodes._large_lo) { nodes->push(loadConLNodes._large_lo); }
+ if (loadConLNodes._moved) { nodes->push(loadConLNodes._moved); }
+ if (loadConLNodes._last) { nodes->push(loadConLNodes._last); }
+
+ assert(nodes->length() >= 1, "must have created at least 1 node");
+ %}
+
// This enc_class is needed so that scheduler gets proper
// input mapping for latency computation.
enc_class enc_poll(immI dst, iRegLdst poll) %{
@@ -3840,6 +4174,14 @@ ins_attrib ins_field_load_ic_node(0);
//
// Formats are generated automatically for constants and base registers.
+operand vecX() %{
+ constraint(ALLOC_IN_RC(vs_reg));
+ match(VecX);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
//----------Simple Operands----------------------------------------------------
// Immediate Operands
@@ -5372,6 +5714,20 @@ instruct loadV8(iRegLdst dst, memoryAlg4 mem) %{
ins_pipe(pipe_class_memory);
%}
+// Load Aligned Packed Byte
+instruct loadV16(vecX dst, indirect mem) %{
+ predicate(n->as_LoadVector()->memory_size() == 16);
+ match(Set dst (LoadVector mem));
+ ins_cost(MEMORY_REF_COST);
+
+ format %{ "LXVD2X $dst, $mem \t// load 16-byte Vector" %}
+ size(4);
+ ins_encode %{
+ __ lxvd2x($dst$$VectorSRegister, $mem$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
// Load Range, range = array length (=jint)
instruct loadRange(iRegIdst dst, memory mem) %{
match(Set dst (LoadRange mem));
@@ -6368,6 +6724,20 @@ instruct storeA8B(memoryAlg4 mem, iRegLsrc src) %{
ins_pipe(pipe_class_memory);
%}
+// Store Packed Byte long register to memory
+instruct storeV16(indirect mem, vecX src) %{
+ predicate(n->as_StoreVector()->memory_size() == 16);
+ match(Set mem (StoreVector mem src));
+ ins_cost(MEMORY_REF_COST);
+
+ format %{ "STXVD2X $mem, $src \t// store 16-byte Vector" %}
+ size(4);
+ ins_encode %{
+ __ stxvd2x($src$$VectorSRegister, $mem$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
// Store Compressed Oop
instruct storeN(memory dst, iRegN_P2N src) %{
match(Set dst (StoreN dst src));
@@ -13239,6 +13609,26 @@ instruct storeS_reversed(iRegIsrc src, indirect mem) %{
ins_pipe(pipe_class_default);
%}
+instruct mtvsrwz(vecX temp1, iRegIsrc src) %{
+ effect(DEF temp1, USE src);
+
+ size(4);
+ ins_encode %{
+ __ mtvsrwz($temp1$$VectorSRegister, $src$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct xxspltw(vecX dst, vecX src, immI8 imm1) %{
+ effect(DEF dst, USE src, USE imm1);
+
+ size(4);
+ ins_encode %{
+ __ xxspltw($dst$$VectorSRegister, $src$$VectorSRegister, $imm1$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
//---------- Replicate Vector Instructions ------------------------------------
// Insrdi does replicate if src == dst.
@@ -13318,6 +13708,46 @@ instruct repl8B_immIminus1(iRegLdst dst, immI_minus1 src) %{
ins_pipe(pipe_class_default);
%}
+instruct repl16B_reg_Ex(vecX dst, iRegIsrc src) %{
+ match(Set dst (ReplicateB src));
+ predicate(n->as_Vector()->length() == 16);
+
+ expand %{
+ iRegLdst tmpL;
+ vecX tmpV;
+ immI8 imm1 %{ (int) 1 %}
+ moveReg(tmpL, src);
+ repl56(tmpL);
+ repl48(tmpL);
+ mtvsrwz(tmpV, tmpL);
+ xxspltw(dst, tmpV, imm1);
+ %}
+%}
+
+instruct repl16B_immI0(vecX dst, immI_0 zero) %{
+ match(Set dst (ReplicateB zero));
+ predicate(n->as_Vector()->length() == 16);
+
+ format %{ "XXLXOR $dst, $zero \t// replicate16B" %}
+ size(4);
+ ins_encode %{
+ __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct repl16B_immIminus1(vecX dst, immI_minus1 src) %{
+ match(Set dst (ReplicateB src));
+ predicate(n->as_Vector()->length() == 16);
+
+ format %{ "XXLEQV $dst, $src \t// replicate16B" %}
+ size(4);
+ ins_encode %{
+ __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
instruct repl4S_reg_Ex(iRegLdst dst, iRegIsrc src) %{
match(Set dst (ReplicateS src));
predicate(n->as_Vector()->length() == 4);
@@ -13352,6 +13782,46 @@ instruct repl4S_immIminus1(iRegLdst dst, immI_minus1 src) %{
ins_pipe(pipe_class_default);
%}
+instruct repl8S_reg_Ex(vecX dst, iRegIsrc src) %{
+ match(Set dst (ReplicateS src));
+ predicate(n->as_Vector()->length() == 8);
+
+ expand %{
+ iRegLdst tmpL;
+ vecX tmpV;
+ immI8 zero %{ (int) 0 %}
+ moveReg(tmpL, src);
+ repl48(tmpL);
+ repl32(tmpL);
+ mtvsrd(tmpV, tmpL);
+ xxpermdi(dst, tmpV, tmpV, zero);
+ %}
+%}
+
+instruct repl8S_immI0(vecX dst, immI_0 zero) %{
+ match(Set dst (ReplicateS zero));
+ predicate(n->as_Vector()->length() == 8);
+
+ format %{ "XXLXOR $dst, $zero \t// replicate8S" %}
+ size(4);
+ ins_encode %{
+ __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct repl8S_immIminus1(vecX dst, immI_minus1 src) %{
+ match(Set dst (ReplicateS src));
+ predicate(n->as_Vector()->length() == 8);
+
+ format %{ "XXLEQV $dst, $src \t// replicate16B" %}
+ size(4);
+ ins_encode %{
+ __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
instruct repl2I_reg_Ex(iRegLdst dst, iRegIsrc src) %{
match(Set dst (ReplicateI src));
predicate(n->as_Vector()->length() == 2);
@@ -13386,6 +13856,46 @@ instruct repl2I_immIminus1(iRegLdst dst, immI_minus1 src) %{
ins_pipe(pipe_class_default);
%}
+instruct repl4I_reg_Ex(vecX dst, iRegIsrc src) %{
+ match(Set dst (ReplicateI src));
+ predicate(n->as_Vector()->length() == 4);
+ ins_cost(2 * DEFAULT_COST);
+
+ expand %{
+ iRegLdst tmpL;
+ vecX tmpV;
+ immI8 zero %{ (int) 0 %}
+ moveReg(tmpL, src);
+ repl32(tmpL);
+ mtvsrd(tmpV, tmpL);
+ xxpermdi(dst, tmpV, tmpV, zero);
+ %}
+%}
+
+instruct repl4I_immI0(vecX dst, immI_0 zero) %{
+ match(Set dst (ReplicateI zero));
+ predicate(n->as_Vector()->length() == 4);
+
+ format %{ "XXLXOR $dst, $zero \t// replicate4I" %}
+ size(4);
+ ins_encode %{
+ __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct repl4I_immIminus1(vecX dst, immI_minus1 src) %{
+ match(Set dst (ReplicateI src));
+ predicate(n->as_Vector()->length() == 4);
+
+ format %{ "XXLEQV $dst, $dst, $dst \t// replicate4I" %}
+ size(4);
+ ins_encode %{
+ __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
// Move float to int register via stack, replicate.
instruct repl2F_reg_Ex(iRegLdst dst, regF src) %{
match(Set dst (ReplicateF src));
@@ -13484,6 +13994,154 @@ instruct overflowMulL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
%}
+instruct repl4F_reg_Ex(vecX dst, regF src) %{
+ match(Set dst (ReplicateF src));
+ predicate(n->as_Vector()->length() == 4);
+ ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST);
+ expand %{
+ stackSlotL tmpS;
+ iRegIdst tmpI;
+ iRegLdst tmpL;
+ vecX tmpV;
+ immI8 zero %{ (int) 0 %}
+
+ moveF2I_reg_stack(tmpS, src); // Move float to stack.
+ moveF2I_stack_reg(tmpI, tmpS); // Move stack to int reg.
+ moveReg(tmpL, tmpI); // Move int to long reg.
+ repl32(tmpL); // Replicate bitpattern.
+ mtvsrd(tmpV, tmpL);
+ xxpermdi(dst, tmpV, tmpV, zero);
+ %}
+%}
+
+instruct repl4F_immF_Ex(vecX dst, immF src) %{
+ match(Set dst (ReplicateF src));
+ predicate(n->as_Vector()->length() == 4);
+ ins_cost(10 * DEFAULT_COST);
+
+ postalloc_expand( postalloc_expand_load_replF_constant_vsx(dst, src, constanttablebase) );
+%}
+
+instruct repl4F_immF0(vecX dst, immF_0 zero) %{
+ match(Set dst (ReplicateF zero));
+ predicate(n->as_Vector()->length() == 4);
+
+ format %{ "XXLXOR $dst, $zero \t// replicate4F" %}
+ ins_encode %{
+ __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct repl2D_reg_Ex(vecX dst, regD src) %{
+ match(Set dst (ReplicateD src));
+ predicate(n->as_Vector()->length() == 2);
+ expand %{
+ stackSlotL tmpS;
+ iRegLdst tmpL;
+ iRegLdst tmp;
+ vecX tmpV;
+ immI8 zero %{ (int) 0 %}
+ moveD2L_reg_stack(tmpS, src);
+ moveD2L_stack_reg(tmpL, tmpS);
+ mtvsrd(tmpV, tmpL);
+ xxpermdi(dst, tmpV, tmpV, zero);
+ %}
+%}
+
+instruct repl2D_immI0(vecX dst, immI_0 zero) %{
+ match(Set dst (ReplicateD zero));
+ predicate(n->as_Vector()->length() == 2);
+
+ format %{ "XXLXOR $dst, $zero \t// replicate2D" %}
+ size(4);
+ ins_encode %{
+ __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct repl2D_immIminus1(vecX dst, immI_minus1 src) %{
+ match(Set dst (ReplicateD src));
+ predicate(n->as_Vector()->length() == 2);
+
+ format %{ "XXLEQV $dst, $src \t// replicate16B" %}
+ size(4);
+ ins_encode %{
+ __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct mtvsrd(vecX dst, iRegLsrc src) %{
+ predicate(false);
+ effect(DEF dst, USE src);
+
+ format %{ "MTVSRD $dst, $src \t// Move to 16-byte register"%}
+ size(4);
+ ins_encode %{
+ __ mtvsrd($dst$$VectorSRegister, $src$$Register);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct xxspltd(vecX dst, vecX src, immI8 zero) %{
+ effect(DEF dst, USE src, USE zero);
+
+ format %{ "XXSPLATD $dst, $src, $zero \t// Permute 16-byte register"%}
+ size(4);
+ ins_encode %{
+ __ xxpermdi($dst$$VectorSRegister, $src$$VectorSRegister, $src$$VectorSRegister, $zero$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct xxpermdi(vecX dst, vecX src1, vecX src2, immI8 zero) %{
+ effect(DEF dst, USE src1, USE src2, USE zero);
+
+ format %{ "XXPERMDI $dst, $src1, $src2, $zero \t// Permute 16-byte register"%}
+ size(4);
+ ins_encode %{
+ __ xxpermdi($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister, $zero$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct repl2L_reg_Ex(vecX dst, iRegLsrc src) %{
+ match(Set dst (ReplicateL src));
+ predicate(n->as_Vector()->length() == 2);
+ expand %{
+ vecX tmpV;
+ immI8 zero %{ (int) 0 %}
+ mtvsrd(tmpV, src);
+ xxpermdi(dst, tmpV, tmpV, zero);
+ %}
+%}
+
+instruct repl2L_immI0(vecX dst, immI_0 zero) %{
+ match(Set dst (ReplicateL zero));
+ predicate(n->as_Vector()->length() == 2);
+
+ format %{ "XXLXOR $dst, $zero \t// replicate2L" %}
+ size(4);
+ ins_encode %{
+ __ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct repl2L_immIminus1(vecX dst, immI_minus1 src) %{
+ match(Set dst (ReplicateL src));
+ predicate(n->as_Vector()->length() == 2);
+
+ format %{ "XXLEQV $dst, $src \t// replicate16B" %}
+ size(4);
+ ins_encode %{
+ __ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
// ============================================================================
// Safepoint Instruction
diff --git a/src/hotspot/cpu/ppc/register_definitions_ppc.cpp b/src/hotspot/cpu/ppc/register_definitions_ppc.cpp
index 2a2d968e44d..d5a064b169f 100644
--- a/src/hotspot/cpu/ppc/register_definitions_ppc.cpp
+++ b/src/hotspot/cpu/ppc/register_definitions_ppc.cpp
@@ -31,3 +31,5 @@
REGISTER_DEFINITION(Register, noreg);
REGISTER_DEFINITION(FloatRegister, fnoreg);
+
+REGISTER_DEFINITION(VectorSRegister, vsnoreg);
diff --git a/src/hotspot/cpu/ppc/register_ppc.hpp b/src/hotspot/cpu/ppc/register_ppc.hpp
index c554f88619d..af516ce2d8f 100644
--- a/src/hotspot/cpu/ppc/register_ppc.hpp
+++ b/src/hotspot/cpu/ppc/register_ppc.hpp
@@ -677,7 +677,7 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
* 2 // register halves
+ ConditionRegisterImpl::number_of_registers // condition code registers
+ SpecialRegisterImpl::number_of_registers // special registers
- + VectorRegisterImpl::number_of_registers // VSX registers
+ + VectorSRegisterImpl::number_of_registers // VSX registers
};
static const int max_gpr;
diff --git a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
index 20a1a963abc..4b879905afa 100644
--- a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
+++ b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
@@ -479,8 +479,8 @@ void RegisterSaver::restore_result_registers(MacroAssembler* masm, int frame_siz
// Is vector's size (in bytes) bigger than a size saved by default?
bool SharedRuntime::is_wide_vector(int size) {
- // Note, MaxVectorSize == 8 on PPC64.
- assert(size <= 8, "%d bytes vectors are not supported", size);
+ // Note, MaxVectorSize == 8/16 on PPC64.
+ assert(size <= (SuperwordUseVSX ? 16 : 8), "%d bytes vectors are not supported", size);
return size > 8;
}
@@ -2234,9 +2234,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
__ release();
// TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
__ stw(R0, thread_(thread_state));
- if (UseMembar) {
- __ fence();
- }
// The JNI call
@@ -2393,9 +2390,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
__ release();
// TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
__ stw(R0, thread_(thread_state));
- if (UseMembar) {
- __ fence();
- }
__ bind(after_transition);
// Reguard any pages if necessary.
diff --git a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
index 2eb4937a6fa..16cb6149f77 100644
--- a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
+++ b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp
@@ -2667,7 +2667,7 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
- // Arguments for generated stub (little endian only):
+ // Arguments for generated stub:
// R3_ARG1 - source byte array address
// R4_ARG2 - destination byte array address
// R5_ARG3 - round key array
@@ -2686,7 +2686,6 @@ class StubGenerator: public StubCodeGenerator {
Register keylen = R8;
Register temp = R9;
Register keypos = R10;
- Register hex = R11;
Register fifteen = R12;
VectorRegister vRet = VR0;
@@ -2706,164 +2705,170 @@ class StubGenerator: public StubCodeGenerator {
VectorRegister vTmp3 = VR11;
VectorRegister vTmp4 = VR12;
- VectorRegister vLow = VR13;
- VectorRegister vHigh = VR14;
-
- __ li (hex, 16);
__ li (fifteen, 15);
- __ vspltisb (fSplt, 0x0f);
// load unaligned from[0-15] to vsRet
__ lvx (vRet, from);
__ lvx (vTmp1, fifteen, from);
__ lvsl (fromPerm, from);
+#ifdef VM_LITTLE_ENDIAN
+ __ vspltisb (fSplt, 0x0f);
__ vxor (fromPerm, fromPerm, fSplt);
+#endif
__ vperm (vRet, vRet, vTmp1, fromPerm);
// load keylen (44 or 52 or 60)
__ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
// to load keys
- __ lvsr (keyPerm, key);
- __ vxor (vTmp2, vTmp2, vTmp2);
+ __ load_perm (keyPerm, key);
+#ifdef VM_LITTLE_ENDIAN
__ vspltisb (vTmp2, -16);
__ vrld (keyPerm, keyPerm, vTmp2);
__ vrld (keyPerm, keyPerm, vTmp2);
__ vsldoi (keyPerm, keyPerm, keyPerm, 8);
+#endif
- // load the 1st round key to vKey1
- __ li (keypos, 0);
+ // load the 1st round key to vTmp1
+ __ lvx (vTmp1, key);
+ __ li (keypos, 16);
__ lvx (vKey1, keypos, key);
- __ addi (keypos, keypos, 16);
- __ lvx (vTmp1, keypos, key);
- __ vperm (vKey1, vTmp1, vKey1, keyPerm);
+ __ vec_perm (vTmp1, vKey1, keyPerm);
// 1st round
- __ vxor (vRet, vRet, vKey1);
+ __ vxor (vRet, vRet, vTmp1);
// load the 2nd round key to vKey1
- __ addi (keypos, keypos, 16);
- __ lvx (vTmp2, keypos, key);
- __ vperm (vKey1, vTmp2, vTmp1, keyPerm);
+ __ li (keypos, 32);
+ __ lvx (vKey2, keypos, key);
+ __ vec_perm (vKey1, vKey2, keyPerm);
// load the 3rd round key to vKey2
- __ addi (keypos, keypos, 16);
- __ lvx (vTmp1, keypos, key);
- __ vperm (vKey2, vTmp1, vTmp2, keyPerm);
+ __ li (keypos, 48);
+ __ lvx (vKey3, keypos, key);
+ __ vec_perm (vKey2, vKey3, keyPerm);
// load the 4th round key to vKey3
- __ addi (keypos, keypos, 16);
- __ lvx (vTmp2, keypos, key);
- __ vperm (vKey3, vTmp2, vTmp1, keyPerm);
+ __ li (keypos, 64);
+ __ lvx (vKey4, keypos, key);
+ __ vec_perm (vKey3, vKey4, keyPerm);
// load the 5th round key to vKey4
- __ addi (keypos, keypos, 16);
+ __ li (keypos, 80);
__ lvx (vTmp1, keypos, key);
- __ vperm (vKey4, vTmp1, vTmp2, keyPerm);
+ __ vec_perm (vKey4, vTmp1, keyPerm);
// 2nd - 5th rounds
- __ vcipher (vRet, vRet, vKey1);
- __ vcipher (vRet, vRet, vKey2);
- __ vcipher (vRet, vRet, vKey3);
- __ vcipher (vRet, vRet, vKey4);
+ __ vcipher (vRet, vRet, vKey1);
+ __ vcipher (vRet, vRet, vKey2);
+ __ vcipher (vRet, vRet, vKey3);
+ __ vcipher (vRet, vRet, vKey4);
// load the 6th round key to vKey1
- __ addi (keypos, keypos, 16);
- __ lvx (vTmp2, keypos, key);
- __ vperm (vKey1, vTmp2, vTmp1, keyPerm);
+ __ li (keypos, 96);
+ __ lvx (vKey2, keypos, key);
+ __ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
// load the 7th round key to vKey2
- __ addi (keypos, keypos, 16);
- __ lvx (vTmp1, keypos, key);
- __ vperm (vKey2, vTmp1, vTmp2, keyPerm);
+ __ li (keypos, 112);
+ __ lvx (vKey3, keypos, key);
+ __ vec_perm (vKey2, vKey3, keyPerm);
// load the 8th round key to vKey3
- __ addi (keypos, keypos, 16);
- __ lvx (vTmp2, keypos, key);
- __ vperm (vKey3, vTmp2, vTmp1, keyPerm);
+ __ li (keypos, 128);
+ __ lvx (vKey4, keypos, key);
+ __ vec_perm (vKey3, vKey4, keyPerm);
// load the 9th round key to vKey4
- __ addi (keypos, keypos, 16);
+ __ li (keypos, 144);
__ lvx (vTmp1, keypos, key);
- __ vperm (vKey4, vTmp1, vTmp2, keyPerm);
+ __ vec_perm (vKey4, vTmp1, keyPerm);
// 6th - 9th rounds
- __ vcipher (vRet, vRet, vKey1);
- __ vcipher (vRet, vRet, vKey2);
- __ vcipher (vRet, vRet, vKey3);
- __ vcipher (vRet, vRet, vKey4);
+ __ vcipher (vRet, vRet, vKey1);
+ __ vcipher (vRet, vRet, vKey2);
+ __ vcipher (vRet, vRet, vKey3);
+ __ vcipher (vRet, vRet, vKey4);
// load the 10th round key to vKey1
- __ addi (keypos, keypos, 16);
- __ lvx (vTmp2, keypos, key);
- __ vperm (vKey1, vTmp2, vTmp1, keyPerm);
+ __ li (keypos, 160);
+ __ lvx (vKey2, keypos, key);
+ __ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
// load the 11th round key to vKey2
- __ addi (keypos, keypos, 16);
+ __ li (keypos, 176);
__ lvx (vTmp1, keypos, key);
- __ vperm (vKey2, vTmp1, vTmp2, keyPerm);
+ __ vec_perm (vKey2, vTmp1, keyPerm);
// if all round keys are loaded, skip next 4 rounds
__ cmpwi (CCR0, keylen, 44);
__ beq (CCR0, L_doLast);
// 10th - 11th rounds
- __ vcipher (vRet, vRet, vKey1);
- __ vcipher (vRet, vRet, vKey2);
+ __ vcipher (vRet, vRet, vKey1);
+ __ vcipher (vRet, vRet, vKey2);
// load the 12th round key to vKey1
- __ addi (keypos, keypos, 16);
- __ lvx (vTmp2, keypos, key);
- __ vperm (vKey1, vTmp2, vTmp1, keyPerm);
+ __ li (keypos, 192);
+ __ lvx (vKey2, keypos, key);
+ __ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
// load the 13th round key to vKey2
- __ addi (keypos, keypos, 16);
+ __ li (keypos, 208);
__ lvx (vTmp1, keypos, key);
- __ vperm (vKey2, vTmp1, vTmp2, keyPerm);
+ __ vec_perm (vKey2, vTmp1, keyPerm);
// if all round keys are loaded, skip next 2 rounds
__ cmpwi (CCR0, keylen, 52);
__ beq (CCR0, L_doLast);
// 12th - 13th rounds
- __ vcipher (vRet, vRet, vKey1);
- __ vcipher (vRet, vRet, vKey2);
+ __ vcipher (vRet, vRet, vKey1);
+ __ vcipher (vRet, vRet, vKey2);
// load the 14th round key to vKey1
- __ addi (keypos, keypos, 16);
- __ lvx (vTmp2, keypos, key);
- __ vperm (vKey1, vTmp2, vTmp1, keyPerm);
+ __ li (keypos, 224);
+ __ lvx (vKey2, keypos, key);
+ __ vec_perm (vKey1, vTmp1, vKey2, keyPerm);
// load the 15th round key to vKey2
- __ addi (keypos, keypos, 16);
+ __ li (keypos, 240);
__ lvx (vTmp1, keypos, key);
- __ vperm (vKey2, vTmp1, vTmp2, keyPerm);
+ __ vec_perm (vKey2, vTmp1, keyPerm);
__ bind(L_doLast);
// last two rounds
- __ vcipher (vRet, vRet, vKey1);
- __ vcipherlast (vRet, vRet, vKey2);
+ __ vcipher (vRet, vRet, vKey1);
+ __ vcipherlast (vRet, vRet, vKey2);
- __ neg (temp, to);
- __ lvsr (toPerm, temp);
- __ vspltisb (vTmp2, -1);
- __ vxor (vTmp1, vTmp1, vTmp1);
- __ vperm (vTmp2, vTmp2, vTmp1, toPerm);
- __ vxor (toPerm, toPerm, fSplt);
+ // store result (unaligned)
+#ifdef VM_LITTLE_ENDIAN
+ __ lvsl (toPerm, to);
+#else
+ __ lvsr (toPerm, to);
+#endif
+ __ vspltisb (vTmp3, -1);
+ __ vspltisb (vTmp4, 0);
__ lvx (vTmp1, to);
- __ vperm (vRet, vRet, vRet, toPerm);
- __ vsel (vTmp1, vTmp1, vRet, vTmp2);
- __ lvx (vTmp4, fifteen, to);
+ __ lvx (vTmp2, fifteen, to);
+#ifdef VM_LITTLE_ENDIAN
+ __ vperm (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask
+ __ vxor (toPerm, toPerm, fSplt); // swap bytes
+#else
+ __ vperm (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask
+#endif
+ __ vperm (vTmp4, vRet, vRet, toPerm); // rotate data
+ __ vsel (vTmp2, vTmp4, vTmp2, vTmp3);
+ __ vsel (vTmp1, vTmp1, vTmp4, vTmp3);
+ __ stvx (vTmp2, fifteen, to); // store this one first (may alias)
__ stvx (vTmp1, to);
- __ vsel (vRet, vRet, vTmp4, vTmp2);
- __ stvx (vRet, fifteen, to);
__ blr();
return start;
}
- // Arguments for generated stub (little endian only):
+ // Arguments for generated stub:
// R3_ARG1 - source byte array address
// R4_ARG2 - destination byte array address
// R5_ARG3 - K (key) in little endian int array
@@ -2885,7 +2890,6 @@ class StubGenerator: public StubCodeGenerator {
Register keylen = R8;
Register temp = R9;
Register keypos = R10;
- Register hex = R11;
Register fifteen = R12;
VectorRegister vRet = VR0;
@@ -2906,30 +2910,30 @@ class StubGenerator: public StubCodeGenerator {
VectorRegister vTmp3 = VR12;
VectorRegister vTmp4 = VR13;
- VectorRegister vLow = VR14;
- VectorRegister vHigh = VR15;
-
- __ li (hex, 16);
__ li (fifteen, 15);
- __ vspltisb (fSplt, 0x0f);
// load unaligned from[0-15] to vsRet
__ lvx (vRet, from);
__ lvx (vTmp1, fifteen, from);
__ lvsl (fromPerm, from);
+#ifdef VM_LITTLE_ENDIAN
+ __ vspltisb (fSplt, 0x0f);
__ vxor (fromPerm, fromPerm, fSplt);
+#endif
__ vperm (vRet, vRet, vTmp1, fromPerm); // align [and byte swap in LE]
// load keylen (44 or 52 or 60)
__ lwz (keylen, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT), key);
// to load keys
- __ lvsr (keyPerm, key);
+ __ load_perm (keyPerm, key);
+#ifdef VM_LITTLE_ENDIAN
__ vxor (vTmp2, vTmp2, vTmp2);
__ vspltisb (vTmp2, -16);
__ vrld (keyPerm, keyPerm, vTmp2);
__ vrld (keyPerm, keyPerm, vTmp2);
__ vsldoi (keyPerm, keyPerm, keyPerm, 8);
+#endif
__ cmpwi (CCR0, keylen, 44);
__ beq (CCR0, L_do44);
@@ -2937,32 +2941,32 @@ class StubGenerator: public StubCodeGenerator {
__ cmpwi (CCR0, keylen, 52);
__ beq (CCR0, L_do52);
- // load the 15th round key to vKey11
+ // load the 15th round key to vKey1
__ li (keypos, 240);
+ __ lvx (vKey1, keypos, key);
+ __ li (keypos, 224);
+ __ lvx (vKey2, keypos, key);
+ __ vec_perm (vKey1, vKey2, vKey1, keyPerm);
+
+ // load the 14th round key to vKey2
+ __ li (keypos, 208);
+ __ lvx (vKey3, keypos, key);
+ __ vec_perm (vKey2, vKey3, vKey2, keyPerm);
+
+ // load the 13th round key to vKey3
+ __ li (keypos, 192);
+ __ lvx (vKey4, keypos, key);
+ __ vec_perm (vKey3, vKey4, vKey3, keyPerm);
+
+ // load the 12th round key to vKey4
+ __ li (keypos, 176);
+ __ lvx (vKey5, keypos, key);
+ __ vec_perm (vKey4, vKey5, vKey4, keyPerm);
+
+ // load the 11th round key to vKey5
+ __ li (keypos, 160);
__ lvx (vTmp1, keypos, key);
- __ addi (keypos, keypos, -16);
- __ lvx (vTmp2, keypos, key);
- __ vperm (vKey1, vTmp1, vTmp2, keyPerm);
-
- // load the 14th round key to vKey10
- __ addi (keypos, keypos, -16);
- __ lvx (vTmp1, keypos, key);
- __ vperm (vKey2, vTmp2, vTmp1, keyPerm);
-
- // load the 13th round key to vKey10
- __ addi (keypos, keypos, -16);
- __ lvx (vTmp2, keypos, key);
- __ vperm (vKey3, vTmp1, vTmp2, keyPerm);
-
- // load the 12th round key to vKey10
- __ addi (keypos, keypos, -16);
- __ lvx (vTmp1, keypos, key);
- __ vperm (vKey4, vTmp2, vTmp1, keyPerm);
-
- // load the 11th round key to vKey10
- __ addi (keypos, keypos, -16);
- __ lvx (vTmp2, keypos, key);
- __ vperm (vKey5, vTmp1, vTmp2, keyPerm);
+ __ vec_perm (vKey5, vTmp1, vKey5, keyPerm);
// 1st - 5th rounds
__ vxor (vRet, vRet, vKey1);
@@ -2975,22 +2979,22 @@ class StubGenerator: public StubCodeGenerator {
__ bind (L_do52);
- // load the 13th round key to vKey11
+ // load the 13th round key to vKey1
__ li (keypos, 208);
- __ lvx (vTmp1, keypos, key);
- __ addi (keypos, keypos, -16);
- __ lvx (vTmp2, keypos, key);
- __ vperm (vKey1, vTmp1, vTmp2, keyPerm);
+ __ lvx (vKey1, keypos, key);
+ __ li (keypos, 192);
+ __ lvx (vKey2, keypos, key);
+ __ vec_perm (vKey1, vKey2, vKey1, keyPerm);
- // load the 12th round key to vKey10
- __ addi (keypos, keypos, -16);
- __ lvx (vTmp1, keypos, key);
- __ vperm (vKey2, vTmp2, vTmp1, keyPerm);
+ // load the 12th round key to vKey2
+ __ li (keypos, 176);
+ __ lvx (vKey3, keypos, key);
+ __ vec_perm (vKey2, vKey3, vKey2, keyPerm);
- // load the 11th round key to vKey10
- __ addi (keypos, keypos, -16);
- __ lvx (vTmp2, keypos, key);
- __ vperm (vKey3, vTmp1, vTmp2, keyPerm);
+ // load the 11th round key to vKey3
+ __ li (keypos, 160);
+ __ lvx (vTmp1, keypos, key);
+ __ vec_perm (vKey3, vTmp1, vKey3, keyPerm);
// 1st - 3rd rounds
__ vxor (vRet, vRet, vKey1);
@@ -3001,42 +3005,42 @@ class StubGenerator: public StubCodeGenerator {
__ bind (L_do44);
- // load the 11th round key to vKey11
+ // load the 11th round key to vKey1
__ li (keypos, 176);
+ __ lvx (vKey1, keypos, key);
+ __ li (keypos, 160);
__ lvx (vTmp1, keypos, key);
- __ addi (keypos, keypos, -16);
- __ lvx (vTmp2, keypos, key);
- __ vperm (vKey1, vTmp1, vTmp2, keyPerm);
+ __ vec_perm (vKey1, vTmp1, vKey1, keyPerm);
// 1st round
__ vxor (vRet, vRet, vKey1);
__ bind (L_doLast);
- // load the 10th round key to vKey10
- __ addi (keypos, keypos, -16);
+ // load the 10th round key to vKey1
+ __ li (keypos, 144);
+ __ lvx (vKey2, keypos, key);
+ __ vec_perm (vKey1, vKey2, vTmp1, keyPerm);
+
+ // load the 9th round key to vKey2
+ __ li (keypos, 128);
+ __ lvx (vKey3, keypos, key);
+ __ vec_perm (vKey2, vKey3, vKey2, keyPerm);
+
+ // load the 8th round key to vKey3
+ __ li (keypos, 112);
+ __ lvx (vKey4, keypos, key);
+ __ vec_perm (vKey3, vKey4, vKey3, keyPerm);
+
+ // load the 7th round key to vKey4
+ __ li (keypos, 96);
+ __ lvx (vKey5, keypos, key);
+ __ vec_perm (vKey4, vKey5, vKey4, keyPerm);
+
+ // load the 6th round key to vKey5
+ __ li (keypos, 80);
__ lvx (vTmp1, keypos, key);
- __ vperm (vKey1, vTmp2, vTmp1, keyPerm);
-
- // load the 9th round key to vKey10
- __ addi (keypos, keypos, -16);
- __ lvx (vTmp2, keypos, key);
- __ vperm (vKey2, vTmp1, vTmp2, keyPerm);
-
- // load the 8th round key to vKey10
- __ addi (keypos, keypos, -16);
- __ lvx (vTmp1, keypos, key);
- __ vperm (vKey3, vTmp2, vTmp1, keyPerm);
-
- // load the 7th round key to vKey10
- __ addi (keypos, keypos, -16);
- __ lvx (vTmp2, keypos, key);
- __ vperm (vKey4, vTmp1, vTmp2, keyPerm);
-
- // load the 6th round key to vKey10
- __ addi (keypos, keypos, -16);
- __ lvx (vTmp1, keypos, key);
- __ vperm (vKey5, vTmp2, vTmp1, keyPerm);
+ __ vec_perm (vKey5, vTmp1, vKey5, keyPerm);
// last 10th - 6th rounds
__ vncipher (vRet, vRet, vKey1);
@@ -3045,30 +3049,29 @@ class StubGenerator: public StubCodeGenerator {
__ vncipher (vRet, vRet, vKey4);
__ vncipher (vRet, vRet, vKey5);
- // load the 5th round key to vKey10
- __ addi (keypos, keypos, -16);
- __ lvx (vTmp2, keypos, key);
- __ vperm (vKey1, vTmp1, vTmp2, keyPerm);
+ // load the 5th round key to vKey1
+ __ li (keypos, 64);
+ __ lvx (vKey2, keypos, key);
+ __ vec_perm (vKey1, vKey2, vTmp1, keyPerm);
- // load the 4th round key to vKey10
- __ addi (keypos, keypos, -16);
- __ lvx (vTmp1, keypos, key);
- __ vperm (vKey2, vTmp2, vTmp1, keyPerm);
+ // load the 4th round key to vKey2
+ __ li (keypos, 48);
+ __ lvx (vKey3, keypos, key);
+ __ vec_perm (vKey2, vKey3, vKey2, keyPerm);
- // load the 3rd round key to vKey10
- __ addi (keypos, keypos, -16);
- __ lvx (vTmp2, keypos, key);
- __ vperm (vKey3, vTmp1, vTmp2, keyPerm);
+ // load the 3rd round key to vKey3
+ __ li (keypos, 32);
+ __ lvx (vKey4, keypos, key);
+ __ vec_perm (vKey3, vKey4, vKey3, keyPerm);
- // load the 2nd round key to vKey10
- __ addi (keypos, keypos, -16);
- __ lvx (vTmp1, keypos, key);
- __ vperm (vKey4, vTmp2, vTmp1, keyPerm);
+ // load the 2nd round key to vKey4
+ __ li (keypos, 16);
+ __ lvx (vKey5, keypos, key);
+ __ vec_perm (vKey4, vKey5, vKey4, keyPerm);
- // load the 1st round key to vKey10
- __ addi (keypos, keypos, -16);
- __ lvx (vTmp2, keypos, key);
- __ vperm (vKey5, vTmp1, vTmp2, keyPerm);
+ // load the 1st round key to vKey5
+ __ lvx (vTmp1, key);
+ __ vec_perm (vKey5, vTmp1, vKey5, keyPerm);
// last 5th - 1th rounds
__ vncipher (vRet, vRet, vKey1);
@@ -3077,24 +3080,54 @@ class StubGenerator: public StubCodeGenerator {
__ vncipher (vRet, vRet, vKey4);
__ vncipherlast (vRet, vRet, vKey5);
- __ neg (temp, to);
- __ lvsr (toPerm, temp);
- __ vspltisb (vTmp2, -1);
- __ vxor (vTmp1, vTmp1, vTmp1);
- __ vperm (vTmp2, vTmp2, vTmp1, toPerm);
- __ vxor (toPerm, toPerm, fSplt);
+ // store result (unaligned)
+#ifdef VM_LITTLE_ENDIAN
+ __ lvsl (toPerm, to);
+#else
+ __ lvsr (toPerm, to);
+#endif
+ __ vspltisb (vTmp3, -1);
+ __ vspltisb (vTmp4, 0);
__ lvx (vTmp1, to);
- __ vperm (vRet, vRet, vRet, toPerm);
- __ vsel (vTmp1, vTmp1, vRet, vTmp2);
- __ lvx (vTmp4, fifteen, to);
+ __ lvx (vTmp2, fifteen, to);
+#ifdef VM_LITTLE_ENDIAN
+ __ vperm (vTmp3, vTmp3, vTmp4, toPerm); // generate select mask
+ __ vxor (toPerm, toPerm, fSplt); // swap bytes
+#else
+ __ vperm (vTmp3, vTmp4, vTmp3, toPerm); // generate select mask
+#endif
+ __ vperm (vTmp4, vRet, vRet, toPerm); // rotate data
+ __ vsel (vTmp2, vTmp4, vTmp2, vTmp3);
+ __ vsel (vTmp1, vTmp1, vTmp4, vTmp3);
+ __ stvx (vTmp2, fifteen, to); // store this one first (may alias)
__ stvx (vTmp1, to);
- __ vsel (vRet, vRet, vTmp4, vTmp2);
- __ stvx (vRet, fifteen, to);
__ blr();
return start;
}
+ address generate_sha256_implCompress(bool multi_block, const char *name) {
+ assert(UseSHA, "need SHA instructions");
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ function_entry();
+
+ __ sha256 (multi_block);
+
+ __ blr();
+ return start;
+ }
+
+ address generate_sha512_implCompress(bool multi_block, const char *name) {
+ assert(UseSHA, "need SHA instructions");
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ function_entry();
+
+ __ sha512 (multi_block);
+
+ __ blr();
+ return start;
+ }
+
void generate_arraycopy_stubs() {
// Note: the disjoint stubs must be generated first, some of
// the conjoint stubs use them.
@@ -3306,6 +3339,267 @@ class StubGenerator: public StubCodeGenerator {
BLOCK_COMMENT("} Stub body");
}
+ /**
+ * Arguments:
+ *
+ * Input:
+ * R3_ARG1 - out address
+ * R4_ARG2 - in address
+ * R5_ARG3 - offset
+ * R6_ARG4 - len
+ * R7_ARG5 - k
+ * Output:
+ * R3_RET - carry
+ */
+ address generate_mulAdd() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "mulAdd");
+
+ address start = __ function_entry();
+
+ // C2 does not sign extend signed parameters to full 64 bits registers:
+ __ rldic (R5_ARG3, R5_ARG3, 2, 32); // always positive
+ __ clrldi(R6_ARG4, R6_ARG4, 32); // force zero bits on higher word
+ __ clrldi(R7_ARG5, R7_ARG5, 32); // force zero bits on higher word
+
+ __ muladd(R3_ARG1, R4_ARG2, R5_ARG3, R6_ARG4, R7_ARG5, R8, R9, R10);
+
+ // Moves output carry to return register
+ __ mr (R3_RET, R10);
+
+ __ blr();
+
+ return start;
+ }
+
+ /**
+ * Arguments:
+ *
+ * Input:
+ * R3_ARG1 - in address
+ * R4_ARG2 - in length
+ * R5_ARG3 - out address
+ * R6_ARG4 - out length
+ */
+ address generate_squareToLen() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "squareToLen");
+
+ address start = __ function_entry();
+
+ // args - higher word is cleaned (unsignedly) due to int to long casting
+ const Register in = R3_ARG1;
+ const Register in_len = R4_ARG2;
+ __ clrldi(in_len, in_len, 32);
+ const Register out = R5_ARG3;
+ const Register out_len = R6_ARG4;
+ __ clrldi(out_len, out_len, 32);
+
+ // output
+ const Register ret = R3_RET;
+
+ // temporaries
+ const Register lplw_s = R7;
+ const Register in_aux = R8;
+ const Register out_aux = R9;
+ const Register piece = R10;
+ const Register product = R14;
+ const Register lplw = R15;
+ const Register i_minus1 = R16;
+ const Register carry = R17;
+ const Register offset = R18;
+ const Register off_aux = R19;
+ const Register t = R20;
+ const Register mlen = R21;
+ const Register len = R22;
+ const Register a = R23;
+ const Register b = R24;
+ const Register i = R25;
+ const Register c = R26;
+ const Register cs = R27;
+
+ // Labels
+ Label SKIP_LSHIFT, SKIP_DIAGONAL_SUM, SKIP_ADDONE, SKIP_MULADD, SKIP_LOOP_SQUARE;
+ Label LOOP_LSHIFT, LOOP_DIAGONAL_SUM, LOOP_ADDONE, LOOP_MULADD, LOOP_SQUARE;
+
+ // Save non-volatile regs (frameless).
+ int current_offs = -8;
+ __ std(R28, current_offs, R1_SP); current_offs -= 8;
+ __ std(R27, current_offs, R1_SP); current_offs -= 8;
+ __ std(R26, current_offs, R1_SP); current_offs -= 8;
+ __ std(R25, current_offs, R1_SP); current_offs -= 8;
+ __ std(R24, current_offs, R1_SP); current_offs -= 8;
+ __ std(R23, current_offs, R1_SP); current_offs -= 8;
+ __ std(R22, current_offs, R1_SP); current_offs -= 8;
+ __ std(R21, current_offs, R1_SP); current_offs -= 8;
+ __ std(R20, current_offs, R1_SP); current_offs -= 8;
+ __ std(R19, current_offs, R1_SP); current_offs -= 8;
+ __ std(R18, current_offs, R1_SP); current_offs -= 8;
+ __ std(R17, current_offs, R1_SP); current_offs -= 8;
+ __ std(R16, current_offs, R1_SP); current_offs -= 8;
+ __ std(R15, current_offs, R1_SP); current_offs -= 8;
+ __ std(R14, current_offs, R1_SP);
+
+ // Store the squares, right shifted one bit (i.e., divided by 2)
+ __ subi (out_aux, out, 8);
+ __ subi (in_aux, in, 4);
+ __ cmpwi (CCR0, in_len, 0);
+ // Initialize lplw outside of the loop
+ __ xorr (lplw, lplw, lplw);
+ __ ble (CCR0, SKIP_LOOP_SQUARE); // in_len <= 0
+ __ mtctr (in_len);
+
+ __ bind(LOOP_SQUARE);
+ __ lwzu (piece, 4, in_aux);
+ __ mulld (product, piece, piece);
+ // shift left 63 bits and only keep the MSB
+ __ rldic (lplw_s, lplw, 63, 0);
+ __ mr (lplw, product);
+ // shift right 1 bit without sign extension
+ __ srdi (product, product, 1);
+ // join them to the same register and store it
+ __ orr (product, lplw_s, product);
+#ifdef VM_LITTLE_ENDIAN
+ // Swap low and high words for little endian
+ __ rldicl (product, product, 32, 0);
+#endif
+ __ stdu (product, 8, out_aux);
+ __ bdnz (LOOP_SQUARE);
+
+ __ bind(SKIP_LOOP_SQUARE);
+
+ // Add in off-diagonal sums
+ __ cmpwi (CCR0, in_len, 0);
+ __ ble (CCR0, SKIP_DIAGONAL_SUM);
+ // Avoid CTR usage here in order to use it at mulAdd
+ __ subi (i_minus1, in_len, 1);
+ __ li (offset, 4);
+
+ __ bind(LOOP_DIAGONAL_SUM);
+
+ __ sldi (off_aux, out_len, 2);
+ __ sub (off_aux, off_aux, offset);
+
+ __ mr (len, i_minus1);
+ __ sldi (mlen, i_minus1, 2);
+ __ lwzx (t, in, mlen);
+
+ __ muladd (out, in, off_aux, len, t, a, b, carry);
+
+ // begin
+ // off_aux = out_len*4 - 4 - mlen - offset*4 - 4;
+ __ addi (mlen, mlen, 4);
+ __ sldi (a, out_len, 2);
+ __ subi (a, a, 4);
+ __ sub (a, a, mlen);
+ __ subi (off_aux, offset, 4);
+ __ sub (off_aux, a, off_aux);
+
+ __ lwzx (b, off_aux, out);
+ __ add (b, b, carry);
+ __ stwx (b, off_aux, out);
+
+ // if (((uint64_t)s >> 32) != 0) {
+ __ srdi_ (a, b, 32);
+ __ beq (CCR0, SKIP_ADDONE);
+
+ // while (--mlen >= 0) {
+ __ bind(LOOP_ADDONE);
+ __ subi (mlen, mlen, 4);
+ __ cmpwi (CCR0, mlen, 0);
+ __ beq (CCR0, SKIP_ADDONE);
+
+ // if (--offset_aux < 0) { // Carry out of number
+ __ subi (off_aux, off_aux, 4);
+ __ cmpwi (CCR0, off_aux, 0);
+ __ blt (CCR0, SKIP_ADDONE);
+
+ // } else {
+ __ lwzx (b, off_aux, out);
+ __ addi (b, b, 1);
+ __ stwx (b, off_aux, out);
+ __ cmpwi (CCR0, b, 0);
+ __ bne (CCR0, SKIP_ADDONE);
+ __ b (LOOP_ADDONE);
+
+ __ bind(SKIP_ADDONE);
+ // } } } end
+
+ __ addi (offset, offset, 8);
+ __ subi (i_minus1, i_minus1, 1);
+ __ cmpwi (CCR0, i_minus1, 0);
+ __ bge (CCR0, LOOP_DIAGONAL_SUM);
+
+ __ bind(SKIP_DIAGONAL_SUM);
+
+ // Shift back up and set low bit
+ // Shifts 1 bit left up to len positions. Assumes no leading zeros
+ // begin
+ __ cmpwi (CCR0, out_len, 0);
+ __ ble (CCR0, SKIP_LSHIFT);
+ __ li (i, 0);
+ __ lwz (c, 0, out);
+ __ subi (b, out_len, 1);
+ __ mtctr (b);
+
+ __ bind(LOOP_LSHIFT);
+ __ mr (b, c);
+ __ addi (cs, i, 4);
+ __ lwzx (c, out, cs);
+
+ __ sldi (b, b, 1);
+ __ srwi (cs, c, 31);
+ __ orr (b, b, cs);
+ __ stwx (b, i, out);
+
+ __ addi (i, i, 4);
+ __ bdnz (LOOP_LSHIFT);
+
+ __ sldi (c, out_len, 2);
+ __ subi (c, c, 4);
+ __ lwzx (b, out, c);
+ __ sldi (b, b, 1);
+ __ stwx (b, out, c);
+
+ __ bind(SKIP_LSHIFT);
+ // end
+
+ // Set low bit
+ __ sldi (i, in_len, 2);
+ __ subi (i, i, 4);
+ __ lwzx (i, in, i);
+ __ sldi (c, out_len, 2);
+ __ subi (c, c, 4);
+ __ lwzx (b, out, c);
+
+ __ andi (i, i, 1);
+ __ orr (i, b, i);
+
+ __ stwx (i, out, c);
+
+ // Restore non-volatile regs.
+ current_offs = -8;
+ __ ld(R28, current_offs, R1_SP); current_offs -= 8;
+ __ ld(R27, current_offs, R1_SP); current_offs -= 8;
+ __ ld(R26, current_offs, R1_SP); current_offs -= 8;
+ __ ld(R25, current_offs, R1_SP); current_offs -= 8;
+ __ ld(R24, current_offs, R1_SP); current_offs -= 8;
+ __ ld(R23, current_offs, R1_SP); current_offs -= 8;
+ __ ld(R22, current_offs, R1_SP); current_offs -= 8;
+ __ ld(R21, current_offs, R1_SP); current_offs -= 8;
+ __ ld(R20, current_offs, R1_SP); current_offs -= 8;
+ __ ld(R19, current_offs, R1_SP); current_offs -= 8;
+ __ ld(R18, current_offs, R1_SP); current_offs -= 8;
+ __ ld(R17, current_offs, R1_SP); current_offs -= 8;
+ __ ld(R16, current_offs, R1_SP); current_offs -= 8;
+ __ ld(R15, current_offs, R1_SP); current_offs -= 8;
+ __ ld(R14, current_offs, R1_SP);
+
+ __ mr(ret, out);
+ __ blr();
+
+ return start;
+ }
/**
* Arguments:
@@ -3500,6 +3794,12 @@ class StubGenerator: public StubCodeGenerator {
}
#endif
+ if (UseSquareToLenIntrinsic) {
+ StubRoutines::_squareToLen = generate_squareToLen();
+ }
+ if (UseMulAddIntrinsic) {
+ StubRoutines::_mulAdd = generate_mulAdd();
+ }
if (UseMontgomeryMultiplyIntrinsic) {
StubRoutines::_montgomeryMultiply
= CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
@@ -3514,6 +3814,14 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
}
+ if (UseSHA256Intrinsics) {
+ StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
+ StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
+ }
+ if (UseSHA512Intrinsics) {
+ StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress");
+ StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
+ }
}
public:
diff --git a/src/hotspot/cpu/ppc/stubRoutines_ppc.hpp b/src/hotspot/cpu/ppc/stubRoutines_ppc.hpp
index dcda6be8a76..da480d73aee 100644
--- a/src/hotspot/cpu/ppc/stubRoutines_ppc.hpp
+++ b/src/hotspot/cpu/ppc/stubRoutines_ppc.hpp
@@ -34,7 +34,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _call_
enum platform_dependent_constants {
code_size1 = 20000, // simply increase if too small (assembler will crash if too small)
- code_size2 = 20000 // simply increase if too small (assembler will crash if too small)
+ code_size2 = 24000 // simply increase if too small (assembler will crash if too small)
};
// CRC32 Intrinsics.
diff --git a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
index 32e25e9038c..2216890dd6b 100644
--- a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
+++ b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
@@ -1470,10 +1470,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
// TODO PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
__ stw(R0, thread_(thread_state));
- if (UseMembar) {
- __ fence();
- }
-
//=============================================================================
// Call the native method. Argument registers must not have been
// overwritten since "__ call_stub(signature_handler);" (except for
@@ -1594,9 +1590,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
__ li(R0/*thread_state*/, _thread_in_Java);
__ release();
__ stw(R0/*thread_state*/, thread_(thread_state));
- if (UseMembar) {
- __ fence();
- }
if (CheckJNICalls) {
// clear_pending_jni_exception_check
diff --git a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
index 43f6ad1c591..282ffeb218b 100644
--- a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
+++ b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
@@ -2224,6 +2224,7 @@ void TemplateTable::load_field_cp_cache_entry(Register Robj,
if (is_static) {
__ ld(Robj, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::f1_offset()), Rcache);
__ ld(Robj, in_bytes(Klass::java_mirror_offset()), Robj);
+ __ resolve_oop_handle(Robj);
// Acquire not needed here. Following access has an address dependency on this value.
}
}
diff --git a/src/hotspot/cpu/ppc/vm_version_ppc.cpp b/src/hotspot/cpu/ppc/vm_version_ppc.cpp
index 4db0a9c20cb..a145a52c48a 100644
--- a/src/hotspot/cpu/ppc/vm_version_ppc.cpp
+++ b/src/hotspot/cpu/ppc/vm_version_ppc.cpp
@@ -107,13 +107,23 @@ void VM_Version::initialize() {
// TODO: PPC port PdScheduling::power6SectorSize = 0x20;
}
- MaxVectorSize = 8;
+ if (PowerArchitecturePPC64 >= 8) {
+ if (FLAG_IS_DEFAULT(SuperwordUseVSX)) {
+ FLAG_SET_ERGO(bool, SuperwordUseVSX, true);
+ }
+ } else {
+ if (SuperwordUseVSX) {
+ warning("SuperwordUseVSX specified, but needs at least Power8.");
+ FLAG_SET_DEFAULT(SuperwordUseVSX, false);
+ }
+ }
+ MaxVectorSize = SuperwordUseVSX ? 16 : 8;
#endif
// Create and print feature-string.
char buf[(num_features+1) * 16]; // Max 16 chars per feature.
jio_snprintf(buf, sizeof(buf),
- "ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+ "ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
(has_fsqrt() ? " fsqrt" : ""),
(has_isel() ? " isel" : ""),
(has_lxarxeh() ? " lxarxeh" : ""),
@@ -130,7 +140,8 @@ void VM_Version::initialize() {
(has_mfdscr() ? " mfdscr" : ""),
(has_vsx() ? " vsx" : ""),
(has_ldbrx() ? " ldbrx" : ""),
- (has_stdbrx() ? " stdbrx" : "")
+ (has_stdbrx() ? " stdbrx" : ""),
+ (has_vshasig() ? " sha" : "")
// Make sure number of %s matches num_features!
);
_features_string = os::strdup(buf);
@@ -138,8 +149,7 @@ void VM_Version::initialize() {
print_features();
}
- // PPC64 supports 8-byte compare-exchange operations (see
- // Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr)
+ // PPC64 supports 8-byte compare-exchange operations (see Atomic::cmpxchg)
// and 'atomic long memory ops' (see Unsafe_GetLongVolatile).
_supports_cx8 = true;
@@ -200,7 +210,6 @@ void VM_Version::initialize() {
}
// The AES intrinsic stubs require AES instruction support.
-#if defined(VM_LITTLE_ENDIAN)
if (has_vcipher()) {
if (FLAG_IS_DEFAULT(UseAES)) {
UseAES = true;
@@ -221,18 +230,6 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
}
-#else
- if (UseAES) {
- warning("AES instructions are not available on this CPU");
- FLAG_SET_DEFAULT(UseAES, false);
- }
- if (UseAESIntrinsics) {
- if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
- warning("AES intrinsics are not available on this CPU");
- FLAG_SET_DEFAULT(UseAESIntrinsics, false);
- }
-#endif
-
if (UseAESCTRIntrinsics) {
warning("AES/CTR intrinsics are not available on this CPU");
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
@@ -247,17 +244,49 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseFMA, true);
}
- if (UseSHA) {
- warning("SHA instructions are not available on this CPU");
+ if (has_vshasig()) {
+ if (FLAG_IS_DEFAULT(UseSHA)) {
+ UseSHA = true;
+ }
+ } else if (UseSHA) {
+ if (!FLAG_IS_DEFAULT(UseSHA))
+ warning("SHA instructions are not available on this CPU");
FLAG_SET_DEFAULT(UseSHA, false);
}
- if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
- warning("SHA intrinsics are not available on this CPU");
+
+ if (UseSHA1Intrinsics) {
+ warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+ }
+
+ if (UseSHA && has_vshasig()) {
+ if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
+ FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
+ }
+ } else if (UseSHA256Intrinsics) {
+ warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+ }
+
+ if (UseSHA && has_vshasig()) {
+ if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
+ FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
+ }
+ } else if (UseSHA512Intrinsics) {
+ warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
}
+ if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
+ FLAG_SET_DEFAULT(UseSHA, false);
+ }
+
+ if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
+ UseSquareToLenIntrinsic = true;
+ }
+ if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
+ UseMulAddIntrinsic = true;
+ }
if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
UseMultiplyToLenIntrinsic = true;
}
@@ -657,6 +686,7 @@ void VM_Version::determine_features() {
a->lxvd2x(VSR0, R3_ARG1); // code[14] -> vsx
a->ldbrx(R7, R3_ARG1, R4_ARG2); // code[15] -> ldbrx
a->stdbrx(R7, R3_ARG1, R4_ARG2); // code[16] -> stdbrx
+ a->vshasigmaw(VR0, VR1, 1, 0xF); // code[17] -> vshasig
a->blr();
// Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
@@ -708,6 +738,7 @@ void VM_Version::determine_features() {
if (code[feature_cntr++]) features |= vsx_m;
if (code[feature_cntr++]) features |= ldbrx_m;
if (code[feature_cntr++]) features |= stdbrx_m;
+ if (code[feature_cntr++]) features |= vshasig_m;
// Print the detection code.
if (PrintAssembly) {
diff --git a/src/hotspot/cpu/ppc/vm_version_ppc.hpp b/src/hotspot/cpu/ppc/vm_version_ppc.hpp
index f7d5ea73aca..eec629f1d3e 100644
--- a/src/hotspot/cpu/ppc/vm_version_ppc.hpp
+++ b/src/hotspot/cpu/ppc/vm_version_ppc.hpp
@@ -49,6 +49,7 @@ protected:
vsx,
ldbrx,
stdbrx,
+ vshasig,
num_features // last entry to count features
};
enum Feature_Flag_Set {
@@ -64,6 +65,7 @@ protected:
vand_m = (1 << vand ),
lqarx_m = (1 << lqarx ),
vcipher_m = (1 << vcipher),
+ vshasig_m = (1 << vshasig),
vpmsumb_m = (1 << vpmsumb),
tcheck_m = (1 << tcheck ),
mfdscr_m = (1 << mfdscr ),
@@ -106,6 +108,7 @@ public:
static bool has_vsx() { return (_features & vsx_m) != 0; }
static bool has_ldbrx() { return (_features & ldbrx_m) != 0; }
static bool has_stdbrx() { return (_features & stdbrx_m) != 0; }
+ static bool has_vshasig() { return (_features & vshasig_m) != 0; }
static bool has_mtfprd() { return has_vpmsumb(); } // alias for P8
// Assembler testing
diff --git a/src/hotspot/cpu/s390/assembler_s390.hpp b/src/hotspot/cpu/s390/assembler_s390.hpp
index 8a7ae9d0756..a839700259c 100644
--- a/src/hotspot/cpu/s390/assembler_s390.hpp
+++ b/src/hotspot/cpu/s390/assembler_s390.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -250,7 +250,6 @@ class Address VALUE_OBJ_CLASS_SPEC {
bool is_RSform() { return has_base() && !has_index() && is_disp12(); }
bool is_RSYform() { return has_base() && !has_index() && is_disp20(); }
bool is_RXform() { return has_base() && has_index() && is_disp12(); }
- bool is_RXEform() { return has_base() && has_index() && is_disp12(); }
bool is_RXYform() { return has_base() && has_index() && is_disp20(); }
bool uses(Register r) { return _base == r || _index == r; };
@@ -1093,7 +1092,201 @@ class Assembler : public AbstractAssembler {
#define TRTT_ZOPC (unsigned int)(0xb9 << 24 | 0x90 << 16)
-// Miscellaneous Operations
+//---------------------------
+//-- Vector Instructions --
+//---------------------------
+
+//---< Vector Support Instructions >---
+
+//--- Load (memory) ---
+
+#define VLM_ZOPC (unsigned long)(0xe7L << 40 | 0x36L << 0) // load full vreg range (n * 128 bit)
+#define VL_ZOPC (unsigned long)(0xe7L << 40 | 0x06L << 0) // load full vreg (128 bit)
+#define VLEB_ZOPC (unsigned long)(0xe7L << 40 | 0x00L << 0) // load vreg element (8 bit)
+#define VLEH_ZOPC (unsigned long)(0xe7L << 40 | 0x01L << 0) // load vreg element (16 bit)
+#define VLEF_ZOPC (unsigned long)(0xe7L << 40 | 0x03L << 0) // load vreg element (32 bit)
+#define VLEG_ZOPC (unsigned long)(0xe7L << 40 | 0x02L << 0) // load vreg element (64 bit)
+
+#define VLREP_ZOPC (unsigned long)(0xe7L << 40 | 0x05L << 0) // load and replicate into all vector elements
+#define VLLEZ_ZOPC (unsigned long)(0xe7L << 40 | 0x04L << 0) // load logical element and zero.
+
+// vector register gather
+#define VGEF_ZOPC (unsigned long)(0xe7L << 40 | 0x13L << 0) // gather element (32 bit), V1(M3) = [D2(V2(M3),B2)]
+#define VGEG_ZOPC (unsigned long)(0xe7L << 40 | 0x12L << 0) // gather element (64 bit), V1(M3) = [D2(V2(M3),B2)]
+// vector register scatter
+#define VSCEF_ZOPC (unsigned long)(0xe7L << 40 | 0x1bL << 0) // vector scatter element FW
+#define VSCEG_ZOPC (unsigned long)(0xe7L << 40 | 0x1aL << 0) // vector scatter element DW
+
+#define VLBB_ZOPC (unsigned long)(0xe7L << 40 | 0x07L << 0) // load vreg to block boundary (load to alignment).
+#define VLL_ZOPC (unsigned long)(0xe7L << 40 | 0x37L << 0) // load vreg with length.
+
+//--- Load (register) ---
+
+#define VLR_ZOPC (unsigned long)(0xe7L << 40 | 0x56L << 0) // copy full vreg (128 bit)
+#define VLGV_ZOPC (unsigned long)(0xe7L << 40 | 0x21L << 0) // copy vreg element -> GR
+#define VLVG_ZOPC (unsigned long)(0xe7L << 40 | 0x22L << 0) // copy GR -> vreg element
+#define VLVGP_ZOPC (unsigned long)(0xe7L << 40 | 0x62L << 0) // copy GR2, GR3 (disjoint pair) -> vreg
+
+// vector register pack: cut in half the size the source vector elements
+#define VPK_ZOPC (unsigned long)(0xe7L << 40 | 0x94L << 0) // just cut
+#define VPKS_ZOPC (unsigned long)(0xe7L << 40 | 0x97L << 0) // saturate as signed values
+#define VPKLS_ZOPC (unsigned long)(0xe7L << 40 | 0x95L << 0) // saturate as unsigned values
+
+// vector register unpack: double in size the source vector elements
+#define VUPH_ZOPC (unsigned long)(0xe7L << 40 | 0xd7L << 0) // signed, left half of the source vector elements
+#define VUPLH_ZOPC (unsigned long)(0xe7L << 40 | 0xd5L << 0) // unsigned, left half of the source vector elements
+#define VUPL_ZOPC (unsigned long)(0xe7L << 40 | 0xd6L << 0) // signed, right half of the source vector elements
+#define VUPLL_ZOPC (unsigned long)(0xe7L << 40 | 0xd4L << 0) // unsigned, right half of the source vector element
+
+// vector register merge
+#define VMRH_ZOPC (unsigned long)(0xe7L << 40 | 0x61L << 0) // register merge high (left half of source registers)
+#define VMRL_ZOPC (unsigned long)(0xe7L << 40 | 0x60L << 0) // register merge low (right half of source registers)
+
+// vector register permute
+#define VPERM_ZOPC (unsigned long)(0xe7L << 40 | 0x8cL << 0) // vector permute
+#define VPDI_ZOPC (unsigned long)(0xe7L << 40 | 0x84L << 0) // vector permute DW immediate
+
+// vector register replicate
+#define VREP_ZOPC (unsigned long)(0xe7L << 40 | 0x4dL << 0) // vector replicate
+#define VREPI_ZOPC (unsigned long)(0xe7L << 40 | 0x45L << 0) // vector replicate immediate
+#define VSEL_ZOPC (unsigned long)(0xe7L << 40 | 0x8dL << 0) // vector select
+
+#define VSEG_ZOPC (unsigned long)(0xe7L << 40 | 0x5fL << 0) // vector sign-extend to DW (rightmost element in each DW).
+
+//--- Load (immediate) ---
+
+#define VLEIB_ZOPC (unsigned long)(0xe7L << 40 | 0x40L << 0) // load vreg element (16 bit imm to 8 bit)
+#define VLEIH_ZOPC (unsigned long)(0xe7L << 40 | 0x41L << 0) // load vreg element (16 bit imm to 16 bit)
+#define VLEIF_ZOPC (unsigned long)(0xe7L << 40 | 0x43L << 0) // load vreg element (16 bit imm to 32 bit)
+#define VLEIG_ZOPC (unsigned long)(0xe7L << 40 | 0x42L << 0) // load vreg element (16 bit imm to 64 bit)
+
+//--- Store ---
+
+#define VSTM_ZOPC (unsigned long)(0xe7L << 40 | 0x3eL << 0) // store full vreg range (n * 128 bit)
+#define VST_ZOPC (unsigned long)(0xe7L << 40 | 0x0eL << 0) // store full vreg (128 bit)
+#define VSTEB_ZOPC (unsigned long)(0xe7L << 40 | 0x08L << 0) // store vreg element (8 bit)
+#define VSTEH_ZOPC (unsigned long)(0xe7L << 40 | 0x09L << 0) // store vreg element (16 bit)
+#define VSTEF_ZOPC (unsigned long)(0xe7L << 40 | 0x0bL << 0) // store vreg element (32 bit)
+#define VSTEG_ZOPC (unsigned long)(0xe7L << 40 | 0x0aL << 0) // store vreg element (64 bit)
+#define VSTL_ZOPC (unsigned long)(0xe7L << 40 | 0x3fL << 0) // store vreg with length.
+
+//--- Misc ---
+
+#define VGM_ZOPC (unsigned long)(0xe7L << 40 | 0x46L << 0) // generate bit mask, [start..end] = '1', else '0'
+#define VGBM_ZOPC (unsigned long)(0xe7L << 40 | 0x44L << 0) // generate byte mask, bits(imm16) -> bytes
+
+//---< Vector Arithmetic Instructions >---
+
+// Load
+#define VLC_ZOPC (unsigned long)(0xe7L << 40 | 0xdeL << 0) // V1 := -V2, element size = 2**m
+#define VLP_ZOPC (unsigned long)(0xe7L << 40 | 0xdfL << 0) // V1 := |V2|, element size = 2**m
+
+// ADD
+#define VA_ZOPC (unsigned long)(0xe7L << 40 | 0xf3L << 0) // V1 := V2 + V3, element size = 2**m
+#define VACC_ZOPC (unsigned long)(0xe7L << 40 | 0xf1L << 0) // V1 := carry(V2 + V3), element size = 2**m
+
+// SUB
+#define VS_ZOPC (unsigned long)(0xe7L << 40 | 0xf7L << 0) // V1 := V2 - V3, element size = 2**m
+#define VSCBI_ZOPC (unsigned long)(0xe7L << 40 | 0xf5L << 0) // V1 := borrow(V2 - V3), element size = 2**m
+
+// MUL
+#define VML_ZOPC (unsigned long)(0xe7L << 40 | 0xa2L << 0) // V1 := V2 * V3, element size = 2**m
+#define VMH_ZOPC (unsigned long)(0xe7L << 40 | 0xa3L << 0) // V1 := V2 * V3, element size = 2**m
+#define VMLH_ZOPC (unsigned long)(0xe7L << 40 | 0xa1L << 0) // V1 := V2 * V3, element size = 2**m, unsigned
+#define VME_ZOPC (unsigned long)(0xe7L << 40 | 0xa6L << 0) // V1 := V2 * V3, element size = 2**m
+#define VMLE_ZOPC (unsigned long)(0xe7L << 40 | 0xa4L << 0) // V1 := V2 * V3, element size = 2**m, unsigned
+#define VMO_ZOPC (unsigned long)(0xe7L << 40 | 0xa7L << 0) // V1 := V2 * V3, element size = 2**m
+#define VMLO_ZOPC (unsigned long)(0xe7L << 40 | 0xa5L << 0) // V1 := V2 * V3, element size = 2**m, unsigned
+
+// MUL & ADD
+#define VMAL_ZOPC (unsigned long)(0xe7L << 40 | 0xaaL << 0) // V1 := V2 * V3 + V4, element size = 2**m
+#define VMAH_ZOPC (unsigned long)(0xe7L << 40 | 0xabL << 0) // V1 := V2 * V3 + V4, element size = 2**m
+#define VMALH_ZOPC (unsigned long)(0xe7L << 40 | 0xa9L << 0) // V1 := V2 * V3 + V4, element size = 2**m, unsigned
+#define VMAE_ZOPC (unsigned long)(0xe7L << 40 | 0xaeL << 0) // V1 := V2 * V3 + V4, element size = 2**m
+#define VMALE_ZOPC (unsigned long)(0xe7L << 40 | 0xacL << 0) // V1 := V2 * V3 + V4, element size = 2**m, unsigned
+#define VMAO_ZOPC (unsigned long)(0xe7L << 40 | 0xafL << 0) // V1 := V2 * V3 + V4, element size = 2**m
+#define VMALO_ZOPC (unsigned long)(0xe7L << 40 | 0xadL << 0) // V1 := V2 * V3 + V4, element size = 2**m, unsigned
+
+// Vector SUM
+#define VSUM_ZOPC (unsigned long)(0xe7L << 40 | 0x64L << 0) // V1[j] := toFW(sum(V2[i]) + V3[j]), subelements: byte or HW
+#define VSUMG_ZOPC (unsigned long)(0xe7L << 40 | 0x65L << 0) // V1[j] := toDW(sum(V2[i]) + V3[j]), subelements: HW or FW
+#define VSUMQ_ZOPC (unsigned long)(0xe7L << 40 | 0x67L << 0) // V1[j] := toQW(sum(V2[i]) + V3[j]), subelements: FW or DW
+
+// Average
+#define VAVG_ZOPC (unsigned long)(0xe7L << 40 | 0xf2L << 0) // V1 := (V2+V3+1)/2, signed, element size = 2**m
+#define VAVGL_ZOPC (unsigned long)(0xe7L << 40 | 0xf0L << 0) // V1 := (V2+V3+1)/2, unsigned, element size = 2**m
+
+// VECTOR Galois Field Multiply Sum
+#define VGFM_ZOPC (unsigned long)(0xe7L << 40 | 0xb4L << 0)
+#define VGFMA_ZOPC (unsigned long)(0xe7L << 40 | 0xbcL << 0)
+
+//---< Vector Logical Instructions >---
+
+// AND
+#define VN_ZOPC (unsigned long)(0xe7L << 40 | 0x68L << 0) // V1 := V2 & V3, element size = 2**m
+#define VNC_ZOPC (unsigned long)(0xe7L << 40 | 0x69L << 0) // V1 := V2 & ~V3, element size = 2**m
+
+// XOR
+#define VX_ZOPC (unsigned long)(0xe7L << 40 | 0x6dL << 0) // V1 := V2 ^ V3, element size = 2**m
+
+// NOR
+#define VNO_ZOPC (unsigned long)(0xe7L << 40 | 0x6bL << 0) // V1 := !(V2 | V3), element size = 2**m
+
+// OR
+#define VO_ZOPC (unsigned long)(0xe7L << 40 | 0x6aL << 0) // V1 := V2 | V3, element size = 2**m
+
+// Comparison (element-wise)
+#define VCEQ_ZOPC (unsigned long)(0xe7L << 40 | 0xf8L << 0) // V1 := (V2 == V3) ? 0xffff : 0x0000, element size = 2**m
+#define VCH_ZOPC (unsigned long)(0xe7L << 40 | 0xfbL << 0) // V1 := (V2 > V3) ? 0xffff : 0x0000, element size = 2**m, signed
+#define VCHL_ZOPC (unsigned long)(0xe7L << 40 | 0xf9L << 0) // V1 := (V2 > V3) ? 0xffff : 0x0000, element size = 2**m, unsigned
+
+// Max/Min (element-wise)
+#define VMX_ZOPC (unsigned long)(0xe7L << 40 | 0xffL << 0) // V1 := (V2 > V3) ? V2 : V3, element size = 2**m, signed
+#define VMXL_ZOPC (unsigned long)(0xe7L << 40 | 0xfdL << 0) // V1 := (V2 > V3) ? V2 : V3, element size = 2**m, unsigned
+#define VMN_ZOPC (unsigned long)(0xe7L << 40 | 0xfeL << 0) // V1 := (V2 < V3) ? V2 : V3, element size = 2**m, signed
+#define VMNL_ZOPC (unsigned long)(0xe7L << 40 | 0xfcL << 0) // V1 := (V2 < V3) ? V2 : V3, element size = 2**m, unsigned
+
+// Leading/Trailing Zeros, population count
+#define VCLZ_ZOPC (unsigned long)(0xe7L << 40 | 0x53L << 0) // V1 := leadingzeros(V2), element size = 2**m
+#define VCTZ_ZOPC (unsigned long)(0xe7L << 40 | 0x52L << 0) // V1 := trailingzeros(V2), element size = 2**m
+#define VPOPCT_ZOPC (unsigned long)(0xe7L << 40 | 0x50L << 0) // V1 := popcount(V2), bytewise!!
+
+// Rotate/Shift
+#define VERLLV_ZOPC (unsigned long)(0xe7L << 40 | 0x73L << 0) // V1 := rotateleft(V2), rotate count in V3 element
+#define VERLL_ZOPC (unsigned long)(0xe7L << 40 | 0x33L << 0) // V1 := rotateleft(V3), rotate count from d2(b2).
+#define VERIM_ZOPC (unsigned long)(0xe7L << 40 | 0x72L << 0) // Rotate then insert under mask. Read Principles of Operation!!
+
+#define VESLV_ZOPC (unsigned long)(0xe7L << 40 | 0x70L << 0) // V1 := SLL(V2, V3), unsigned, element-wise
+#define VESL_ZOPC (unsigned long)(0xe7L << 40 | 0x30L << 0) // V1 := SLL(V3), unsigned, shift count from d2(b2).
+
+#define VESRAV_ZOPC (unsigned long)(0xe7L << 40 | 0x7AL << 0) // V1 := SRA(V2, V3), signed, element-wise
+#define VESRA_ZOPC (unsigned long)(0xe7L << 40 | 0x3AL << 0) // V1 := SRA(V3), signed, shift count from d2(b2).
+#define VESRLV_ZOPC (unsigned long)(0xe7L << 40 | 0x78L << 0) // V1 := SRL(V2, V3), unsigned, element-wise
+#define VESRL_ZOPC (unsigned long)(0xe7L << 40 | 0x38L << 0) // V1 := SRL(V3), unsigned, shift count from d2(b2).
+
+#define VSL_ZOPC (unsigned long)(0xe7L << 40 | 0x74L << 0) // V1 := SLL(V2), unsigned, bit-count
+#define VSLB_ZOPC (unsigned long)(0xe7L << 40 | 0x75L << 0) // V1 := SLL(V2), unsigned, byte-count
+#define VSLDB_ZOPC (unsigned long)(0xe7L << 40 | 0x77L << 0) // V1 := SLL((V2,V3)), unsigned, byte-count
+
+#define VSRA_ZOPC (unsigned long)(0xe7L << 40 | 0x7eL << 0) // V1 := SRA(V2), signed, bit-count
+#define VSRAB_ZOPC (unsigned long)(0xe7L << 40 | 0x7fL << 0) // V1 := SRA(V2), signed, byte-count
+#define VSRL_ZOPC (unsigned long)(0xe7L << 40 | 0x7cL << 0) // V1 := SRL(V2), unsigned, bit-count
+#define VSRLB_ZOPC (unsigned long)(0xe7L << 40 | 0x7dL << 0) // V1 := SRL(V2), unsigned, byte-count
+
+// Test under Mask
+#define VTM_ZOPC (unsigned long)(0xe7L << 40 | 0xd8L << 0) // Like TM, set CC according to state of selected bits.
+
+//---< Vector String Instructions >---
+#define VFAE_ZOPC (unsigned long)(0xe7L << 40 | 0x82L << 0) // Find any element
+#define VFEE_ZOPC (unsigned long)(0xe7L << 40 | 0x80L << 0) // Find element equal
+#define VFENE_ZOPC (unsigned long)(0xe7L << 40 | 0x81L << 0) // Find element not equal
+#define VSTRC_ZOPC (unsigned long)(0xe7L << 40 | 0x8aL << 0) // String range compare
+#define VISTR_ZOPC (unsigned long)(0xe7L << 40 | 0x5cL << 0) // Isolate String
+
+
+//--------------------------------
+//-- Miscellaneous Operations --
+//--------------------------------
// Execute
#define EX_ZOPC (unsigned int)(68L << 24)
@@ -1117,7 +1310,6 @@ class Assembler : public AbstractAssembler {
#define LAOG_ZOPC (unsigned long)(0xebL << 40 | 0xe6L) // z196
// System Functions
-#define STCK_ZOPC (unsigned int)(0xb2 << 24 | 0x05 << 16)
#define STCKF_ZOPC (unsigned int)(0xb2 << 24 | 0x7c << 16)
#define STFLE_ZOPC (unsigned int)(0xb2 << 24 | 0xb0 << 16)
#define ECTG_ZOPC (unsigned long)(0xc8L <<40 | 0x01L << 32) // z10
@@ -1244,10 +1436,18 @@ class Assembler : public AbstractAssembler {
// unsigned arithmetic calculation instructions
// Mask bit#0 is not used by these instructions.
// There is no indication of overflow for these instr.
- bcondLogZero = 2,
- bcondLogNotZero = 5,
+ bcondLogZero_NoCarry = 8,
+ bcondLogZero_Carry = 2,
+ // bcondLogZero_Borrow = 8, // This CC is never generated.
+ bcondLogZero_NoBorrow = 2,
+ bcondLogZero = bcondLogZero_Carry | bcondLogZero_NoCarry,
+ bcondLogNotZero_NoCarry = 4,
+ bcondLogNotZero_Carry = 1,
bcondLogNotZero_Borrow = 4,
bcondLogNotZero_NoBorrow = 1,
+ bcondLogNotZero = bcondLogNotZero_Carry | bcondLogNotZero_NoCarry,
+ bcondLogCarry = bcondLogZero_Carry | bcondLogNotZero_Carry,
+ bcondLogBorrow = /* bcondLogZero_Borrow | */ bcondLogNotZero_Borrow,
// string search instructions
bcondFound = 4,
bcondNotFound = 2,
@@ -1280,6 +1480,29 @@ class Assembler : public AbstractAssembler {
to_minus_infinity = 7
};
+ // Vector Register Element Type.
+ enum VRegElemType {
+ VRET_BYTE = 0,
+ VRET_HW = 1,
+ VRET_FW = 2,
+ VRET_DW = 3,
+ VRET_QW = 4
+ };
+
+ // Vector Operation Result Control.
+ // This is a set of flags used in some vector instructions to control
+ // the result (side) effects of instruction execution.
+ enum VOpRC {
+ VOPRC_CCSET = 0b0001, // set the CC.
+ VOPRC_CCIGN = 0b0000, // ignore, don't set CC.
+ VOPRC_ZS = 0b0010, // Zero Search. Additional, elementwise, comparison against zero.
+ VOPRC_NOZS = 0b0000, // No Zero Search.
+ VOPRC_RTBYTEIX = 0b0100, // generate byte index to lowest element with true comparison.
+ VOPRC_RTBITVEC = 0b0000, // generate bit vector, all 1s for true, all 0s for false element comparisons.
+ VOPRC_INVERT = 0b1000, // invert comparison results.
+ VOPRC_NOINVERT = 0b0000 // use comparison results as is, do not invert.
+ };
+
// Inverse condition code, i.e. determine "15 - cc" for a given condition code cc.
static branch_condition inverse_condition(branch_condition cc);
static branch_condition inverse_float_condition(branch_condition cc);
@@ -1376,6 +1599,65 @@ class Assembler : public AbstractAssembler {
return r;
}
+ static int64_t rsmask_48( Address a) { assert(a.is_RSform(), "bad address format"); return rsmask_48( a.disp12(), a.base()); }
+ static int64_t rxmask_48( Address a) { if (a.is_RXform()) { return rxmask_48( a.disp12(), a.index(), a.base()); }
+ else if (a.is_RSform()) { return rsmask_48( a.disp12(), a.base()); }
+ else { guarantee(false, "bad address format"); return 0; }
+ }
+ static int64_t rsymask_48(Address a) { assert(a.is_RSYform(), "bad address format"); return rsymask_48(a.disp20(), a.base()); }
+ static int64_t rxymask_48(Address a) { if (a.is_RXYform()) { return rxymask_48( a.disp20(), a.index(), a.base()); }
+ else if (a.is_RSYform()) { return rsymask_48( a.disp20(), a.base()); }
+ else { guarantee(false, "bad address format"); return 0; }
+ }
+
+ static int64_t rsmask_48( int64_t d2, Register b2) { return uimm12(d2, 20, 48) | regz(b2, 16, 48); }
+ static int64_t rxmask_48( int64_t d2, Register x2, Register b2) { return uimm12(d2, 20, 48) | reg(x2, 12, 48) | regz(b2, 16, 48); }
+ static int64_t rsymask_48(int64_t d2, Register b2) { return simm20(d2) | regz(b2, 16, 48); }
+ static int64_t rxymask_48(int64_t d2, Register x2, Register b2) { return simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48); }
+
+ // Address calculated from d12(vx,b) - vx is vector index register.
+ static int64_t rvmask_48( int64_t d2, VectorRegister x2, Register b2) { return uimm12(d2, 20, 48) | vreg(x2, 12) | regz(b2, 16, 48); }
+
+ static int64_t vreg_mask(VectorRegister v, int pos) {
+ return vreg(v, pos) | v->RXB_mask(pos);
+ }
+
+ // Vector Element Size Control. 4-bit field which indicates the size of the vector elements.
+ static int64_t vesc_mask(int64_t size, int min_size, int max_size, int pos) {
+ // min_size - minimum element size. Not all instructions support element sizes beginning with "byte".
+ // max_size - maximum element size. Not all instructions support element sizes up to "QW".
+ assert((min_size <= size) && (size <= max_size), "element size control out of range");
+ return uimm4(size, pos, 48);
+ }
+
+ // Vector Element IndeX. 4-bit field which indexes the target vector element.
+ static int64_t veix_mask(int64_t ix, int el_size, int pos) {
+ // el_size - size of the vector element. This is a VRegElemType enum value.
+ // ix - vector element index.
+ int max_ix = -1;
+ switch (el_size) {
+ case VRET_BYTE: max_ix = 15; break;
+ case VRET_HW: max_ix = 7; break;
+ case VRET_FW: max_ix = 3; break;
+ case VRET_DW: max_ix = 1; break;
+ case VRET_QW: max_ix = 0; break;
+ default: guarantee(false, "bad vector element size %d", el_size); break;
+ }
+ assert((0 <= ix) && (ix <= max_ix), "element size out of range (0 <= %ld <= %d)", ix, max_ix);
+ return uimm4(ix, pos, 48);
+ }
+
+ // Vector Operation Result Control. 4-bit field.
+ static int64_t voprc_any(int64_t flags, int pos, int64_t allowed_flags = 0b1111) {
+ assert((flags & allowed_flags) == flags, "Invalid VOPRC_* flag combination: %d", (int)flags);
+ return uimm4(flags, pos, 48);
+ }
+
+ // Vector Operation Result Control. Condition code setting.
+ static int64_t voprc_ccmask(int64_t flags, int pos) {
+ return voprc_any(flags, pos, VOPRC_CCIGN | VOPRC_CCSET);
+ }
+
public:
//--------------------------------------------------
@@ -1453,6 +1735,8 @@ class Assembler : public AbstractAssembler {
static long imm24(int64_t i24, int s, int len) { return imm(i24, 24) << (len-s-24); }
static long imm32(int64_t i32, int s, int len) { return imm(i32, 32) << (len-s-32); }
+ static long vreg(VectorRegister v, int pos) { const int len = 48; return u_field(v->encoding()&0x0f, (len-pos)-1, (len-pos)-4) | v->RXB_mask(pos); }
+
static long fregt(FloatRegister r, int s, int len) { return freg(r,s,len); }
static long freg( FloatRegister r, int s, int len) { return u_field(r->encoding(), (len-s)-1, (len-s)-4); }
@@ -1840,13 +2124,16 @@ class Assembler : public AbstractAssembler {
inline void z_alsi( const Address& d, int64_t i2); // add logical *(d) += i2_imm8 ; uint32 -- z10
inline void z_algsi(const Address& d, int64_t i2); // add logical *(d) += i2_imm8 ; uint64 -- z10
- // negate
+ // sign adjustment
inline void z_lcr( Register r1, Register r2 = noreg); // neg r1 = -r2 ; int32
inline void z_lcgr( Register r1, Register r2 = noreg); // neg r1 = -r2 ; int64
inline void z_lcgfr(Register r1, Register r2); // neg r1 = -r2 ; int64 <- int32
inline void z_lnr( Register r1, Register r2 = noreg); // neg r1 = -|r2| ; int32
inline void z_lngr( Register r1, Register r2 = noreg); // neg r1 = -|r2| ; int64
inline void z_lngfr(Register r1, Register r2); // neg r1 = -|r2| ; int64 <- int32
+ inline void z_lpr( Register r1, Register r2 = noreg); // r1 = |r2| ; int32
+ inline void z_lpgr( Register r1, Register r2 = noreg); // r1 = |r2| ; int64
+ inline void z_lpgfr(Register r1, Register r2); // r1 = |r2| ; int64 <- int32
// subtract intstructions
// sub registers
@@ -2125,6 +2412,422 @@ class Assembler : public AbstractAssembler {
inline void z_trtt(Register r1, Register r2, int64_t m3);
+ //---------------------------
+ //-- Vector Instructions --
+ //---------------------------
+
+ //---< Vector Support Instructions >---
+
+ // Load (transfer from memory)
+ inline void z_vlm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+ inline void z_vl( VectorRegister v1, int64_t d2, Register x2, Register b2);
+ inline void z_vleb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+ inline void z_vleh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+ inline void z_vlef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+ inline void z_vleg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+
+ // Gather/Scatter
+ inline void z_vgef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
+ inline void z_vgeg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
+
+ inline void z_vscef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
+ inline void z_vsceg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
+
+ // load and replicate
+ inline void z_vlrep( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+ inline void z_vlrepb(VectorRegister v1, int64_t d2, Register x2, Register b2);
+ inline void z_vlreph(VectorRegister v1, int64_t d2, Register x2, Register b2);
+ inline void z_vlrepf(VectorRegister v1, int64_t d2, Register x2, Register b2);
+ inline void z_vlrepg(VectorRegister v1, int64_t d2, Register x2, Register b2);
+
+ inline void z_vllez( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+ inline void z_vllezb(VectorRegister v1, int64_t d2, Register x2, Register b2);
+ inline void z_vllezh(VectorRegister v1, int64_t d2, Register x2, Register b2);
+ inline void z_vllezf(VectorRegister v1, int64_t d2, Register x2, Register b2);
+ inline void z_vllezg(VectorRegister v1, int64_t d2, Register x2, Register b2);
+
+ inline void z_vlbb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+ inline void z_vll( VectorRegister v1, Register r3, int64_t d2, Register b2);
+
+ // Load (register to register)
+ inline void z_vlr( VectorRegister v1, VectorRegister v2);
+
+ inline void z_vlgv( Register r1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
+ inline void z_vlgvb( Register r1, VectorRegister v3, int64_t d2, Register b2);
+ inline void z_vlgvh( Register r1, VectorRegister v3, int64_t d2, Register b2);
+ inline void z_vlgvf( Register r1, VectorRegister v3, int64_t d2, Register b2);
+ inline void z_vlgvg( Register r1, VectorRegister v3, int64_t d2, Register b2);
+
+ inline void z_vlvg( VectorRegister v1, Register r3, int64_t d2, Register b2, int64_t m4);
+ inline void z_vlvgb( VectorRegister v1, Register r3, int64_t d2, Register b2);
+ inline void z_vlvgh( VectorRegister v1, Register r3, int64_t d2, Register b2);
+ inline void z_vlvgf( VectorRegister v1, Register r3, int64_t d2, Register b2);
+ inline void z_vlvgg( VectorRegister v1, Register r3, int64_t d2, Register b2);
+
+ inline void z_vlvgp( VectorRegister v1, Register r2, Register r3);
+
+ // vector register pack
+ inline void z_vpk( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vpkh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vpkf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vpkg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+ inline void z_vpks( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
+ inline void z_vpksh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vpksf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vpksg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vpkshs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vpksfs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vpksgs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+ inline void z_vpkls( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
+ inline void z_vpklsh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vpklsf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vpklsg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vpklshs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vpklsfs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vpklsgs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+ // vector register unpack (sign-extended)
+ inline void z_vuph( VectorRegister v1, VectorRegister v2, int64_t m3);
+ inline void z_vuphb( VectorRegister v1, VectorRegister v2);
+ inline void z_vuphh( VectorRegister v1, VectorRegister v2);
+ inline void z_vuphf( VectorRegister v1, VectorRegister v2);
+ inline void z_vupl( VectorRegister v1, VectorRegister v2, int64_t m3);
+ inline void z_vuplb( VectorRegister v1, VectorRegister v2);
+ inline void z_vuplh( VectorRegister v1, VectorRegister v2);
+ inline void z_vuplf( VectorRegister v1, VectorRegister v2);
+
+ // vector register unpack (zero-extended)
+ inline void z_vuplh( VectorRegister v1, VectorRegister v2, int64_t m3);
+ inline void z_vuplhb( VectorRegister v1, VectorRegister v2);
+ inline void z_vuplhh( VectorRegister v1, VectorRegister v2);
+ inline void z_vuplhf( VectorRegister v1, VectorRegister v2);
+ inline void z_vupll( VectorRegister v1, VectorRegister v2, int64_t m3);
+ inline void z_vupllb( VectorRegister v1, VectorRegister v2);
+ inline void z_vupllh( VectorRegister v1, VectorRegister v2);
+ inline void z_vupllf( VectorRegister v1, VectorRegister v2);
+
+ // vector register merge high/low
+ inline void z_vmrh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vmrhb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vmrhh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vmrhf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vmrhg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+ inline void z_vmrl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vmrlb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vmrlh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vmrlf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vmrlg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+ // vector register permute
+ inline void z_vperm( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
+ inline void z_vpdi( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+
+ // vector register replicate
+ inline void z_vrep( VectorRegister v1, VectorRegister v3, int64_t imm2, int64_t m4);
+ inline void z_vrepb( VectorRegister v1, VectorRegister v3, int64_t imm2);
+ inline void z_vreph( VectorRegister v1, VectorRegister v3, int64_t imm2);
+ inline void z_vrepf( VectorRegister v1, VectorRegister v3, int64_t imm2);
+ inline void z_vrepg( VectorRegister v1, VectorRegister v3, int64_t imm2);
+ inline void z_vrepi( VectorRegister v1, int64_t imm2, int64_t m3);
+ inline void z_vrepib(VectorRegister v1, int64_t imm2);
+ inline void z_vrepih(VectorRegister v1, int64_t imm2);
+ inline void z_vrepif(VectorRegister v1, int64_t imm2);
+ inline void z_vrepig(VectorRegister v1, int64_t imm2);
+
+ inline void z_vsel( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
+ inline void z_vseg( VectorRegister v1, VectorRegister v2, int64_t imm3);
+
+ // Load (immediate)
+ inline void z_vleib( VectorRegister v1, int64_t imm2, int64_t m3);
+ inline void z_vleih( VectorRegister v1, int64_t imm2, int64_t m3);
+ inline void z_vleif( VectorRegister v1, int64_t imm2, int64_t m3);
+ inline void z_vleig( VectorRegister v1, int64_t imm2, int64_t m3);
+
+ // Store
+ inline void z_vstm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+ inline void z_vst( VectorRegister v1, int64_t d2, Register x2, Register b2);
+ inline void z_vsteb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+ inline void z_vsteh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+ inline void z_vstef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+ inline void z_vsteg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
+ inline void z_vstl( VectorRegister v1, Register r3, int64_t d2, Register b2);
+
+ // Misc
+ inline void z_vgm( VectorRegister v1, int64_t imm2, int64_t imm3, int64_t m4);
+ inline void z_vgmb( VectorRegister v1, int64_t imm2, int64_t imm3);
+ inline void z_vgmh( VectorRegister v1, int64_t imm2, int64_t imm3);
+ inline void z_vgmf( VectorRegister v1, int64_t imm2, int64_t imm3);
+ inline void z_vgmg( VectorRegister v1, int64_t imm2, int64_t imm3);
+
+ inline void z_vgbm( VectorRegister v1, int64_t imm2);
+ inline void z_vzero( VectorRegister v1); // preferred method to set vreg to all zeroes
+ inline void z_vone( VectorRegister v1); // preferred method to set vreg to all ones
+
+ //---< Vector Arithmetic Instructions >---
+
+ // Load
+ inline void z_vlc( VectorRegister v1, VectorRegister v2, int64_t m3);
+ inline void z_vlcb( VectorRegister v1, VectorRegister v2);
+ inline void z_vlch( VectorRegister v1, VectorRegister v2);
+ inline void z_vlcf( VectorRegister v1, VectorRegister v2);
+ inline void z_vlcg( VectorRegister v1, VectorRegister v2);
+ inline void z_vlp( VectorRegister v1, VectorRegister v2, int64_t m3);
+ inline void z_vlpb( VectorRegister v1, VectorRegister v2);
+ inline void z_vlph( VectorRegister v1, VectorRegister v2);
+ inline void z_vlpf( VectorRegister v1, VectorRegister v2);
+ inline void z_vlpg( VectorRegister v1, VectorRegister v2);
+
+ // ADD
+ inline void z_va( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vab( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vah( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vaf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vag( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vaq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vacc( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vaccb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vacch( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vaccf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vaccg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vaccq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+ // SUB
+ inline void z_vs( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vsb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vsh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vsf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vsg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vsq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vscbi( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vscbib( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vscbih( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vscbif( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vscbig( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vscbiq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+ // MULTIPLY
+ inline void z_vml( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vmh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vmlh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vme( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vmle( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vmo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vmlo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+
+ // MULTIPLY & ADD
+ inline void z_vmal( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+ inline void z_vmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+ inline void z_vmalh( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+ inline void z_vmae( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+ inline void z_vmale( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+ inline void z_vmao( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+ inline void z_vmalo( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+
+ // VECTOR SUM
+ inline void z_vsum( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vsumb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vsumh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vsumg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vsumgh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vsumgf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vsumq( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vsumqf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vsumqg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+ // Average
+ inline void z_vavg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vavgb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vavgh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vavgf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vavgg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vavgl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vavglb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vavglh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vavglf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vavglg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+ // VECTOR Galois Field Multiply Sum
+ inline void z_vgfm( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vgfmb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vgfmh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vgfmf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vgfmg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ // VECTOR Galois Field Multiply Sum and Accumulate
+ inline void z_vgfma( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
+ inline void z_vgfmab( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
+ inline void z_vgfmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
+ inline void z_vgfmaf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
+ inline void z_vgfmag( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
+
+ //---< Vector Logical Instructions >---
+
+ // AND
+ inline void z_vn( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vnc( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+ // XOR
+ inline void z_vx( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+ // NOR
+ inline void z_vno( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+ // OR
+ inline void z_vo( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+ // Comparison (element-wise)
+ inline void z_vceq( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
+ inline void z_vceqb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vceqh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vceqf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vceqg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vceqbs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vceqhs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vceqfs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vceqgs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vch( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
+ inline void z_vchb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vchh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vchf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vchg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vchbs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vchhs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vchfs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vchgs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vchl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
+ inline void z_vchlb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vchlh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vchlf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vchlg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vchlbs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vchlhs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vchlfs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vchlgs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+ // Max/Min (element-wise)
+ inline void z_vmx( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vmxb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vmxh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vmxf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vmxg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vmxl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vmxlb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vmxlh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vmxlf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vmxlg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vmn( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vmnb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vmnh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vmnf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vmng( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vmnl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vmnlb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vmnlh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vmnlf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vmnlg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+ // Leading/Trailing Zeros, population count
+ inline void z_vclz( VectorRegister v1, VectorRegister v2, int64_t m3);
+ inline void z_vclzb( VectorRegister v1, VectorRegister v2);
+ inline void z_vclzh( VectorRegister v1, VectorRegister v2);
+ inline void z_vclzf( VectorRegister v1, VectorRegister v2);
+ inline void z_vclzg( VectorRegister v1, VectorRegister v2);
+ inline void z_vctz( VectorRegister v1, VectorRegister v2, int64_t m3);
+ inline void z_vctzb( VectorRegister v1, VectorRegister v2);
+ inline void z_vctzh( VectorRegister v1, VectorRegister v2);
+ inline void z_vctzf( VectorRegister v1, VectorRegister v2);
+ inline void z_vctzg( VectorRegister v1, VectorRegister v2);
+ inline void z_vpopct( VectorRegister v1, VectorRegister v2, int64_t m3);
+
+ // Rotate/Shift
+ inline void z_verllv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_verllvb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_verllvh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_verllvf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_verllvg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_verll( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
+ inline void z_verllb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+ inline void z_verllh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+ inline void z_verllf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+ inline void z_verllg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+ inline void z_verim( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t m5);
+ inline void z_verimb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
+ inline void z_verimh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
+ inline void z_verimf( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
+ inline void z_verimg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
+
+ inline void z_veslv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_veslvb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_veslvh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_veslvf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_veslvg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vesl( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
+ inline void z_veslb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+ inline void z_veslh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+ inline void z_veslf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+ inline void z_veslg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+
+ inline void z_vesrav( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vesravb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vesravh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vesravf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vesravg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vesra( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
+ inline void z_vesrab( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+ inline void z_vesrah( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+ inline void z_vesraf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+ inline void z_vesrag( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+ inline void z_vesrlv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
+ inline void z_vesrlvb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vesrlvh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vesrlvf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vesrlvg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vesrl( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
+ inline void z_vesrlb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+ inline void z_vesrlh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+ inline void z_vesrlf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+ inline void z_vesrlg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
+
+ inline void z_vsl( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vslb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vsldb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
+
+ inline void z_vsra( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vsrab( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vsrl( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+ inline void z_vsrlb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
+
+ // Test under Mask
+ inline void z_vtm( VectorRegister v1, VectorRegister v2);
+
+ //---< Vector String Instructions >---
+ inline void z_vfae( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5); // Find any element
+ inline void z_vfaeb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
+ inline void z_vfaeh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
+ inline void z_vfaef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
+ inline void z_vfee( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5); // Find element equal
+ inline void z_vfeeb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
+ inline void z_vfeeh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
+ inline void z_vfeef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
+ inline void z_vfene( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5); // Find element not equal
+ inline void z_vfeneb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
+ inline void z_vfeneh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
+ inline void z_vfenef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
+ inline void z_vstrc( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t imm5, int64_t cc6); // String range compare
+ inline void z_vstrcb( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6);
+ inline void z_vstrch( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6);
+ inline void z_vstrcf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6);
+ inline void z_vistr( VectorRegister v1, VectorRegister v2, int64_t imm3, int64_t cc5); // Isolate String
+ inline void z_vistrb( VectorRegister v1, VectorRegister v2, int64_t cc5);
+ inline void z_vistrh( VectorRegister v1, VectorRegister v2, int64_t cc5);
+ inline void z_vistrf( VectorRegister v1, VectorRegister v2, int64_t cc5);
+ inline void z_vistrbs(VectorRegister v1, VectorRegister v2);
+ inline void z_vistrhs(VectorRegister v1, VectorRegister v2);
+ inline void z_vistrfs(VectorRegister v1, VectorRegister v2);
+
+
// Floatingpoint instructions
// ==========================
@@ -2331,7 +3034,6 @@ class Assembler : public AbstractAssembler {
inline void z_ahhlr(Register r1, Register r2, Register r3); // ADD halfword high low
inline void z_tam();
- inline void z_stck(int64_t d2, Register b2);
inline void z_stckf(int64_t d2, Register b2);
inline void z_stmg(Register r1, Register r3, int64_t d2, Register b2);
inline void z_lmg(Register r1, Register r3, int64_t d2, Register b2);
diff --git a/src/hotspot/cpu/s390/assembler_s390.inline.hpp b/src/hotspot/cpu/s390/assembler_s390.inline.hpp
index 449d0af0bf3..19c472787c5 100644
--- a/src/hotspot/cpu/s390/assembler_s390.inline.hpp
+++ b/src/hotspot/cpu/s390/assembler_s390.inline.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -309,6 +309,9 @@ inline void Assembler::z_lcgfr(Register r1, Register r2) { emit_32( LCGFR_ZOPC |
inline void Assembler::z_lnr( Register r1, Register r2) { emit_16( LNR_ZOPC | regt( r1, 8, 16) | reg((r2 == noreg) ? r1:r2, 12, 16)); }
inline void Assembler::z_lngr( Register r1, Register r2) { emit_32( LNGR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
inline void Assembler::z_lngfr(Register r1, Register r2) { emit_32( LNGFR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
+inline void Assembler::z_lpr( Register r1, Register r2) { emit_16( LPR_ZOPC | regt( r1, 8, 16) | reg((r2 == noreg) ? r1:r2, 12, 16)); }
+inline void Assembler::z_lpgr( Register r1, Register r2) { emit_32( LPGR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
+inline void Assembler::z_lpgfr(Register r1, Register r2) { emit_32( LPGFR_ZOPC | regt( r1, 24, 32) | reg((r2 == noreg) ? r1:r2, 28, 32)); }
inline void Assembler::z_lrvr( Register r1, Register r2) { emit_32( LRVR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
inline void Assembler::z_lrvgr(Register r1, Register r2) { emit_32( LRVGR_ZOPC | regt(r1, 24, 32) | reg(r2, 28, 32)); }
@@ -686,7 +689,6 @@ inline void Assembler::z_ahhhr(Register r1, Register r2, Register r3) { emit_32(
inline void Assembler::z_ahhlr(Register r1, Register r2, Register r3) { emit_32( AHHLR_ZOPC | reg(r3, 16, 32) | reg(r1, 24, 32) | reg(r2, 28, 32)); }
inline void Assembler::z_tam() { emit_16( TAM_ZOPC); }
-inline void Assembler::z_stck(int64_t d2, Register b2) { emit_32( STCK_ZOPC | uimm12(d2, 20, 32) | regz(b2, 16, 32)); }
inline void Assembler::z_stckf(int64_t d2, Register b2) { emit_32( STCKF_ZOPC | uimm12(d2, 20, 32) | regz(b2, 16, 32)); }
inline void Assembler::z_stmg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( STMG_ZOPC | simm20(d2) | reg(r1, 8, 48) | reg(r3,12,48)| reg(b2,16,48) ); }
inline void Assembler::z_lmg(Register r1, Register r3, int64_t d2, Register b2) { emit_48( LMG_ZOPC | simm20(d2) | reg(r1, 8, 48) | reg(r3,12,48)| reg(b2,16,48) ); }
@@ -702,6 +704,421 @@ inline void Assembler::z_cvd(Register r1, int64_t d2, Register x2, Register b2)
inline void Assembler::z_cvdg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CVDG_ZOPC | regt(r1, 8, 48) | reg(x2, 12, 48) | reg(b2, 16, 48) | simm20(d2)); }
+//---------------------------
+//-- Vector Instructions --
+//---------------------------
+
+//---< Vector Support Instructions >---
+
+// Load (transfer from memory)
+inline void Assembler::z_vlm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {emit_48(VLM_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2)); }
+inline void Assembler::z_vl( VectorRegister v1, int64_t d2, Register x2, Register b2) {emit_48(VL_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2)); }
+inline void Assembler::z_vleb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEB_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_BYTE, 32)); }
+inline void Assembler::z_vleh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEH_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_HW, 32)); }
+inline void Assembler::z_vlef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEF_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_FW, 32)); }
+inline void Assembler::z_vleg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEG_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_DW, 32)); }
+
+// Gather/Scatter
+inline void Assembler::z_vgef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VGEF_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_FW, 32)); }
+inline void Assembler::z_vgeg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VGEG_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_DW, 32)); }
+
+inline void Assembler::z_vscef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VSCEF_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_FW, 32)); }
+inline void Assembler::z_vsceg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VSCEG_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_DW, 32)); }
+
+// load and replicate
+inline void Assembler::z_vlrep( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLREP_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vlrepb( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vlrep(v1, d2, x2, b2, VRET_BYTE); }// load byte and replicate to all vector elements of type 'B'
+inline void Assembler::z_vlreph( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vlrep(v1, d2, x2, b2, VRET_HW); } // load HW and replicate to all vector elements of type 'H'
+inline void Assembler::z_vlrepf( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vlrep(v1, d2, x2, b2, VRET_FW); } // load FW and replicate to all vector elements of type 'F'
+inline void Assembler::z_vlrepg( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vlrep(v1, d2, x2, b2, VRET_DW); } // load DW and replicate to all vector elements of type 'G'
+
+inline void Assembler::z_vllez( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLLEZ_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vllezb( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vllez(v1, d2, x2, b2, VRET_BYTE); }// load logical byte into left DW of VR, zero all other bit positions.
+inline void Assembler::z_vllezh( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vllez(v1, d2, x2, b2, VRET_HW); } // load logical HW into left DW of VR, zero all other bit positions.
+inline void Assembler::z_vllezf( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vllez(v1, d2, x2, b2, VRET_FW); } // load logical FW into left DW of VR, zero all other bit positions.
+inline void Assembler::z_vllezg( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vllez(v1, d2, x2, b2, VRET_DW); } // load logical DW into left DW of VR, zero all other bit positions.
+
+inline void Assembler::z_vlbb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLBB_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | uimm4(m3, 32, 48)); }
+inline void Assembler::z_vll( VectorRegister v1, Register r3, int64_t d2, Register b2) {emit_48(VLL_ZOPC | vreg(v1, 8) | reg(r3, 12, 48) | rsmask_48(d2, b2)); }
+
+// Load (register to register)
+inline void Assembler::z_vlr ( VectorRegister v1, VectorRegister v2) {emit_48(VLR_ZOPC | vreg(v1, 8) | vreg(v2, 12)); }
+
+inline void Assembler::z_vlgv( Register r1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VLGV_ZOPC | reg(r1, 8, 48) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vlgvb( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_BYTE); } // load byte from VR element (index d2(b2)) into GR (logical)
+inline void Assembler::z_vlgvh( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_HW); } // load HW from VR element (index d2(b2)) into GR (logical)
+inline void Assembler::z_vlgvf( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_FW); } // load FW from VR element (index d2(b2)) into GR (logical)
+inline void Assembler::z_vlgvg( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_DW); } // load DW from VR element (index d2(b2)) into GR.
+
+inline void Assembler::z_vlvg( VectorRegister v1, Register r3, int64_t d2, Register b2, int64_t m4) {emit_48(VLVG_ZOPC | vreg(v1, 8) | reg(r3, 12, 48) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vlvgb( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_BYTE); }
+inline void Assembler::z_vlvgh( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_HW); }
+inline void Assembler::z_vlvgf( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_FW); }
+inline void Assembler::z_vlvgg( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_DW); }
+
+inline void Assembler::z_vlvgp( VectorRegister v1, Register r2, Register r3) {emit_48(VLVGP_ZOPC | vreg(v1, 8) | reg(r2, 12, 48) | reg(r3, 16, 48)); }
+
+// vector register pack
+inline void Assembler::z_vpk( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VPK_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_DW, 32)); }
+inline void Assembler::z_vpkh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpk(v1, v2, v3, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vpkf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpk(v1, v2, v3, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vpkg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpk(v1, v2, v3, VRET_DW); } // vector element type 'G'
+
+inline void Assembler::z_vpks( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VPKS_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
+inline void Assembler::z_vpksh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
+inline void Assembler::z_vpksf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
+inline void Assembler::z_vpksg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
+inline void Assembler::z_vpkshs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', set CC
+inline void Assembler::z_vpksfs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', set CC
+inline void Assembler::z_vpksgs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', set CC
+
+inline void Assembler::z_vpkls( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VPKLS_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
+inline void Assembler::z_vpklsh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
+inline void Assembler::z_vpklsf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
+inline void Assembler::z_vpklsg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
+inline void Assembler::z_vpklshs(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', set CC
+inline void Assembler::z_vpklsfs(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', set CC
+inline void Assembler::z_vpklsgs(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', set CC
+
+// vector register unpack (sign-extended)
+inline void Assembler::z_vuph( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VUPH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vuphb( VectorRegister v1, VectorRegister v2) {z_vuph(v1, v2, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vuphh( VectorRegister v1, VectorRegister v2) {z_vuph(v1, v2, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vuphf( VectorRegister v1, VectorRegister v2) {z_vuph(v1, v2, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vupl( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VUPL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vuplb( VectorRegister v1, VectorRegister v2) {z_vupl(v1, v2, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vuplh( VectorRegister v1, VectorRegister v2) {z_vupl(v1, v2, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vuplf( VectorRegister v1, VectorRegister v2) {z_vupl(v1, v2, VRET_FW); } // vector element type 'F'
+
+// vector register unpack (zero-extended)
+inline void Assembler::z_vuplh( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VUPLH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vuplhb( VectorRegister v1, VectorRegister v2) {z_vuplh(v1, v2, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vuplhh( VectorRegister v1, VectorRegister v2) {z_vuplh(v1, v2, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vuplhf( VectorRegister v1, VectorRegister v2) {z_vuplh(v1, v2, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vupll( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VUPLL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vupllb( VectorRegister v1, VectorRegister v2) {z_vupll(v1, v2, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vupllh( VectorRegister v1, VectorRegister v2) {z_vupll(v1, v2, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vupllf( VectorRegister v1, VectorRegister v2) {z_vupll(v1, v2, VRET_FW); } // vector element type 'F'
+
+// vector register merge high/low
+inline void Assembler::z_vmrh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMRH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vmrhb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vmrhh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vmrhf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vmrhg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_DW); } // vector element type 'G'
+
+inline void Assembler::z_vmrl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMRL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vmrlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vmrlh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vmrlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vmrlg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_DW); } // vector element type 'G'
+
+// vector register permute
+inline void Assembler::z_vperm( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {emit_48(VPERM_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32)); }
+inline void Assembler::z_vpdi( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VPDI_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | uimm4(m4, 32, 48)); }
+
+// vector register replicate
+inline void Assembler::z_vrep( VectorRegister v1, VectorRegister v3, int64_t imm2, int64_t m4) {emit_48(VREP_ZOPC | vreg(v1, 8) | vreg(v3, 12) | simm16(imm2, 16, 48) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vrepb( VectorRegister v1, VectorRegister v3, int64_t imm2) {z_vrep(v1, v3, imm2, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vreph( VectorRegister v1, VectorRegister v3, int64_t imm2) {z_vrep(v1, v3, imm2, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vrepf( VectorRegister v1, VectorRegister v3, int64_t imm2) {z_vrep(v1, v3, imm2, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vrepg( VectorRegister v1, VectorRegister v3, int64_t imm2) {z_vrep(v1, v3, imm2, VRET_DW); } // vector element type 'G'
+inline void Assembler::z_vrepi( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VREPI_ZOPC | vreg(v1, 8) | simm16(imm2, 16, 48) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vrepib( VectorRegister v1, int64_t imm2) {z_vrepi(v1, imm2, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vrepih( VectorRegister v1, int64_t imm2) {z_vrepi(v1, imm2, VRET_HW); } // vector element type 'B'
+inline void Assembler::z_vrepif( VectorRegister v1, int64_t imm2) {z_vrepi(v1, imm2, VRET_FW); } // vector element type 'B'
+inline void Assembler::z_vrepig( VectorRegister v1, int64_t imm2) {z_vrepi(v1, imm2, VRET_DW); } // vector element type 'B'
+
+inline void Assembler::z_vsel( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {emit_48(VSEL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32)); }
+inline void Assembler::z_vseg( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VSEG_ZOPC | vreg(v1, 8) | vreg(v2, 12) | uimm4(m3, 32, 48)); }
+
+// Load (immediate)
+inline void Assembler::z_vleib( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VLEIB_ZOPC | vreg(v1, 8) | simm16(imm2, 32, 48) | veix_mask(m3, VRET_BYTE, 32)); }
+inline void Assembler::z_vleih( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VLEIH_ZOPC | vreg(v1, 8) | simm16(imm2, 32, 48) | veix_mask(m3, VRET_HW, 32)); }
+inline void Assembler::z_vleif( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VLEIF_ZOPC | vreg(v1, 8) | simm16(imm2, 32, 48) | veix_mask(m3, VRET_FW, 32)); }
+inline void Assembler::z_vleig( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VLEIG_ZOPC | vreg(v1, 8) | simm16(imm2, 32, 48) | veix_mask(m3, VRET_DW, 32)); }
+
+// Store
+inline void Assembler::z_vstm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {emit_48(VSTM_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2)); }
+inline void Assembler::z_vst( VectorRegister v1, int64_t d2, Register x2, Register b2) {emit_48(VST_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2)); }
+inline void Assembler::z_vsteb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEB_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_BYTE, 32)); }
+inline void Assembler::z_vsteh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEH_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_HW, 32)); }
+inline void Assembler::z_vstef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEF_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_FW, 32)); }
+inline void Assembler::z_vsteg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEG_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_DW, 32)); }
+inline void Assembler::z_vstl( VectorRegister v1, Register r3, int64_t d2, Register b2) {emit_48(VSTL_ZOPC | vreg(v1, 8) | reg(r3, 12, 48) | rsmask_48(d2, b2)); }
+
+// Misc
+inline void Assembler::z_vgm( VectorRegister v1, int64_t imm2, int64_t imm3, int64_t m4) {emit_48(VGM_ZOPC | vreg(v1, 8) | uimm8( imm2, 16, 48) | uimm8(imm3, 24, 48) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vgmb( VectorRegister v1, int64_t imm2, int64_t imm3) {z_vgm(v1, imm2, imm3, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vgmh( VectorRegister v1, int64_t imm2, int64_t imm3) {z_vgm(v1, imm2, imm3, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vgmf( VectorRegister v1, int64_t imm2, int64_t imm3) {z_vgm(v1, imm2, imm3, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vgmg( VectorRegister v1, int64_t imm2, int64_t imm3) {z_vgm(v1, imm2, imm3, VRET_DW); } // vector element type 'G'
+
+inline void Assembler::z_vgbm( VectorRegister v1, int64_t imm2) {emit_48(VGBM_ZOPC | vreg(v1, 8) | uimm16(imm2, 16, 48)); }
+inline void Assembler::z_vzero( VectorRegister v1) {z_vgbm(v1, 0); } // preferred method to set vreg to all zeroes
+inline void Assembler::z_vone( VectorRegister v1) {z_vgbm(v1, 0xffff); } // preferred method to set vreg to all ones
+
+//---< Vector Arithmetic Instructions >---
+
+// Load
+inline void Assembler::z_vlc( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VLC_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vlcb( VectorRegister v1, VectorRegister v2) {z_vlc(v1, v2, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vlch( VectorRegister v1, VectorRegister v2) {z_vlc(v1, v2, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vlcf( VectorRegister v1, VectorRegister v2) {z_vlc(v1, v2, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vlcg( VectorRegister v1, VectorRegister v2) {z_vlc(v1, v2, VRET_DW); } // vector element type 'G'
+inline void Assembler::z_vlp( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VLP_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vlpb( VectorRegister v1, VectorRegister v2) {z_vlp(v1, v2, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vlph( VectorRegister v1, VectorRegister v2) {z_vlp(v1, v2, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vlpf( VectorRegister v1, VectorRegister v2) {z_vlp(v1, v2, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vlpg( VectorRegister v1, VectorRegister v2) {z_vlp(v1, v2, VRET_DW); } // vector element type 'G'
+
+// ADD
+inline void Assembler::z_va( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VA_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
+inline void Assembler::z_vab( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vah( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vaf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vag( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_DW); } // vector element type 'G'
+inline void Assembler::z_vaq( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_QW); } // vector element type 'Q'
+inline void Assembler::z_vacc( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VACC_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
+inline void Assembler::z_vaccb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vacch( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vaccf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vaccg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_DW); } // vector element type 'G'
+inline void Assembler::z_vaccq( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_QW); } // vector element type 'Q'
+
+// SUB
+inline void Assembler::z_vs( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VS_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
+inline void Assembler::z_vsb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vsh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vsf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vsg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_DW); } // vector element type 'G'
+inline void Assembler::z_vsq( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_QW); } // vector element type 'Q'
+inline void Assembler::z_vscbi( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSCBI_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
+inline void Assembler::z_vscbib( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vscbih( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vscbif( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vscbig( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_DW); } // vector element type 'G'
+inline void Assembler::z_vscbiq( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_QW); } // vector element type 'Q'
+
+// MULTIPLY
+inline void Assembler::z_vml( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VML_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vmh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vmlh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vme( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VME_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vmle( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vmo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
+inline void Assembler::z_vmlo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
+
+// MULTIPLY & ADD
+inline void Assembler::z_vmal( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
+inline void Assembler::z_vmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
+inline void Assembler::z_vmalh( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMALH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
+inline void Assembler::z_vmae( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
+inline void Assembler::z_vmale( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMALE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
+inline void Assembler::z_vmao( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
+inline void Assembler::z_vmalo( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMALO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
+
+// VECTOR SUM
+inline void Assembler::z_vsum( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSUM_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_HW, 32)); }
+inline void Assembler::z_vsumb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsum(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vsumh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsum(v1, v2, v3, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vsumg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSUMG_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_FW, 32)); }
+inline void Assembler::z_vsumgh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsumg(v1, v2, v3, VRET_HW); } // vector element type 'B'
+inline void Assembler::z_vsumgf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsumg(v1, v2, v3, VRET_FW); } // vector element type 'H'
+inline void Assembler::z_vsumq( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSUMQ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_FW, VRET_DW, 32)); }
+inline void Assembler::z_vsumqf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsumq(v1, v2, v3, VRET_FW); } // vector element type 'B'
+inline void Assembler::z_vsumqg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsumq(v1, v2, v3, VRET_DW); } // vector element type 'H'
+
+// Average
+inline void Assembler::z_vavg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VAVG_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vavgb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavg(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vavgh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavg(v1, v2, v3, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vavgf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavg(v1, v2, v3, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vavgg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavg(v1, v2, v3, VRET_DW); } // vector element type 'G'
+inline void Assembler::z_vavgl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VAVGL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vavglb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavgl(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vavglh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavgl(v1, v2, v3, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vavglf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavgl(v1, v2, v3, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vavglg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavgl(v1, v2, v3, VRET_DW); } // vector element type 'G'
+
+// VECTOR Galois Field Multiply Sum
+inline void Assembler::z_vgfm( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VGFM_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vgfmb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vgfm(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vgfmh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vgfm(v1, v2, v3, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vgfmf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vgfm(v1, v2, v3, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vgfmg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vgfm(v1, v2, v3, VRET_DW); } // vector element type 'G'
+inline void Assembler::z_vgfma( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VGFMA_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v3, 16) | vesc_mask(m5, VRET_BYTE, VRET_DW, 20)); }
+inline void Assembler::z_vgfmab( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vgfmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vgfmaf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vgfmag( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_DW); } // vector element type 'G'
+
+//---< Vector Logical Instructions >---
+
+// AND
+inline void Assembler::z_vn( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VN_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
+inline void Assembler::z_vnc( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VNC_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
+
+// XOR
+inline void Assembler::z_vx( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VX_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
+
+// NOR
+inline void Assembler::z_vno( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VNO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
+
+// OR
+inline void Assembler::z_vo( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
+
+// Comparison (element-wise)
+inline void Assembler::z_vceq( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VCEQ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
+inline void Assembler::z_vceqb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_BYTE, VOPRC_CCIGN); } // vector element type 'B', don't set CC
+inline void Assembler::z_vceqh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
+inline void Assembler::z_vceqf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
+inline void Assembler::z_vceqg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
+inline void Assembler::z_vceqbs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_BYTE, VOPRC_CCSET); } // vector element type 'B', don't set CC
+inline void Assembler::z_vceqhs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', don't set CC
+inline void Assembler::z_vceqfs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', don't set CC
+inline void Assembler::z_vceqgs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', don't set CC
+inline void Assembler::z_vch( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VCH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
+inline void Assembler::z_vchb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_BYTE, VOPRC_CCIGN); } // vector element type 'B', don't set CC
+inline void Assembler::z_vchh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
+inline void Assembler::z_vchf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
+inline void Assembler::z_vchg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
+inline void Assembler::z_vchbs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_BYTE, VOPRC_CCSET); } // vector element type 'B', don't set CC
+inline void Assembler::z_vchhs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', don't set CC
+inline void Assembler::z_vchfs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', don't set CC
+inline void Assembler::z_vchgs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', don't set CC
+inline void Assembler::z_vchl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VCHL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
+inline void Assembler::z_vchlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_BYTE, VOPRC_CCIGN); } // vector element type 'B', don't set CC
+inline void Assembler::z_vchlh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
+inline void Assembler::z_vchlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
+inline void Assembler::z_vchlg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
+inline void Assembler::z_vchlbs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_BYTE, VOPRC_CCSET); } // vector element type 'B', don't set CC
+inline void Assembler::z_vchlhs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', don't set CC
+inline void Assembler::z_vchlfs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', don't set CC
+inline void Assembler::z_vchlgs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', don't set CC
+
+// Max/Min (element-wise)
+inline void Assembler::z_vmx( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMX_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vmxb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmx(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vmxh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmx(v1, v2, v3, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vmxf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmx(v1, v2, v3, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vmxg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmx(v1, v2, v3, VRET_DW); } // vector element type 'G'
+inline void Assembler::z_vmxl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMXL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vmxlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmxl(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vmxlh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmxl(v1, v2, v3, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vmxlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmxl(v1, v2, v3, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vmxlg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmxl(v1, v2, v3, VRET_DW); } // vector element type 'G'
+inline void Assembler::z_vmn( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMN_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vmnb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmn(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vmnh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmn(v1, v2, v3, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vmnf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmn(v1, v2, v3, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vmng( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmn(v1, v2, v3, VRET_DW); } // vector element type 'G'
+inline void Assembler::z_vmnl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMNL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vmnlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmnl(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vmnlh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmnl(v1, v2, v3, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vmnlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmnl(v1, v2, v3, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vmnlg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmnl(v1, v2, v3, VRET_DW); } // vector element type 'G'
+
+// Leading/Trailing Zeros, population count
+inline void Assembler::z_vclz( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VCLZ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vclzb( VectorRegister v1, VectorRegister v2) {z_vclz(v1, v2, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vclzh( VectorRegister v1, VectorRegister v2) {z_vclz(v1, v2, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vclzf( VectorRegister v1, VectorRegister v2) {z_vclz(v1, v2, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vclzg( VectorRegister v1, VectorRegister v2) {z_vclz(v1, v2, VRET_DW); } // vector element type 'G'
+inline void Assembler::z_vctz( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VCTZ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vctzb( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vctzh( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vctzf( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vctzg( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_DW); } // vector element type 'G'
+inline void Assembler::z_vpopct( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VPOPCT_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
+
+// Rotate/Shift
+inline void Assembler::z_verllv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VERLLV_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_verllvb(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_verllvh(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_verllvf(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_verllvg(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_DW); } // vector element type 'G'
+inline void Assembler::z_verll( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VERLL_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_verllb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
+inline void Assembler::z_verllh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_HW);} // vector element type 'H'
+inline void Assembler::z_verllf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_FW);} // vector element type 'F'
+inline void Assembler::z_verllg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_DW);} // vector element type 'G'
+inline void Assembler::z_verim( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t m5) {emit_48(VERLL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | uimm8(imm4, 24, 48) | vesc_mask(m5, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_verimb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_verimh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_verimf( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_verimg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_DW); } // vector element type 'G'
+
+inline void Assembler::z_veslv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VESLV_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_veslvb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_veslv(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_veslvh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_veslv(v1, v2, v3, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_veslvf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_veslv(v1, v2, v3, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_veslvg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_veslv(v1, v2, v3, VRET_DW); } // vector element type 'G'
+inline void Assembler::z_vesl( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VESL_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_veslb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesl(v1, v3, d2, b2, VRET_BYTE);} // vector element type 'B'
+inline void Assembler::z_veslh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesl(v1, v3, d2, b2, VRET_HW);} // vector element type 'H'
+inline void Assembler::z_veslf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesl(v1, v3, d2, b2, VRET_FW);} // vector element type 'F'
+inline void Assembler::z_veslg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesl(v1, v3, d2, b2, VRET_DW);} // vector element type 'G'
+
+inline void Assembler::z_vesrav( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VESRAV_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vesravb(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrav(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vesravh(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrav(v1, v2, v3, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vesravf(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrav(v1, v2, v3, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vesravg(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrav(v1, v2, v3, VRET_DW); } // vector element type 'G'
+inline void Assembler::z_vesra( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VESRA_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vesrab( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesra(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
+inline void Assembler::z_vesrah( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesra(v1, v3, d2, b2, VRET_HW);} // vector element type 'H'
+inline void Assembler::z_vesraf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesra(v1, v3, d2, b2, VRET_FW);} // vector element type 'F'
+inline void Assembler::z_vesrag( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesra(v1, v3, d2, b2, VRET_DW);} // vector element type 'G'
+inline void Assembler::z_vesrlv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VESRLV_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vesrlvb(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrlv(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
+inline void Assembler::z_vesrlvh(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrlv(v1, v2, v3, VRET_HW); } // vector element type 'H'
+inline void Assembler::z_vesrlvf(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrlv(v1, v2, v3, VRET_FW); } // vector element type 'F'
+inline void Assembler::z_vesrlvg(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrlv(v1, v2, v3, VRET_DW); } // vector element type 'G'
+inline void Assembler::z_vesrl( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VESRL_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
+inline void Assembler::z_vesrlb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesrl(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
+inline void Assembler::z_vesrlh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesrl(v1, v3, d2, b2, VRET_HW);} // vector element type 'H'
+inline void Assembler::z_vesrlf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesrl(v1, v3, d2, b2, VRET_FW);} // vector element type 'F'
+inline void Assembler::z_vesrlg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesrl(v1, v3, d2, b2, VRET_DW);} // vector element type 'G'
+
+inline void Assembler::z_vsl( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
+inline void Assembler::z_vslb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSLB_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
+inline void Assembler::z_vsldb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {emit_48(VSLDB_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | uimm8(imm4, 24, 48)); }
+
+inline void Assembler::z_vsra( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSRA_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
+inline void Assembler::z_vsrab( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSRAB_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
+inline void Assembler::z_vsrl( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSRL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
+inline void Assembler::z_vsrlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSRLB_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
+
+// Test under Mask
+inline void Assembler::z_vtm( VectorRegister v1, VectorRegister v2) {emit_48(VTM_ZOPC | vreg(v1, 8) | vreg(v2, 12)); }
+
+//---< Vector String Instructions >---
+inline void Assembler::z_vfae( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5) {emit_48(VFAE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(imm4, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); } // Find any element
+inline void Assembler::z_vfaeb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfae(v1, v2, v3, VRET_BYTE, cc5); }
+inline void Assembler::z_vfaeh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfae(v1, v2, v3, VRET_HW, cc5); }
+inline void Assembler::z_vfaef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfae(v1, v2, v3, VRET_FW, cc5); }
+inline void Assembler::z_vfee( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5) {emit_48(VFEE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(imm4, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); } // Find element equal
+inline void Assembler::z_vfeeb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfee(v1, v2, v3, VRET_BYTE, cc5); }
+inline void Assembler::z_vfeeh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfee(v1, v2, v3, VRET_HW, cc5); }
+inline void Assembler::z_vfeef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfee(v1, v2, v3, VRET_FW, cc5); }
+inline void Assembler::z_vfene( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5) {emit_48(VFENE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(imm4, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); } // Find element not equal
+inline void Assembler::z_vfeneb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfene(v1, v2, v3, VRET_BYTE, cc5); }
+inline void Assembler::z_vfeneh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfene(v1, v2, v3, VRET_HW, cc5); }
+inline void Assembler::z_vfenef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfene(v1, v2, v3, VRET_FW, cc5); }
+inline void Assembler::z_vstrc( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t imm5, int64_t cc6) {emit_48(VSTRC_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(imm5, VRET_BYTE, VRET_FW, 20) | voprc_any(cc6, 24) ); } // String range compare
+inline void Assembler::z_vstrcb( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6) {z_vstrc(v1, v2, v3, v4, VRET_BYTE, cc6); }
+inline void Assembler::z_vstrch( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6) {z_vstrc(v1, v2, v3, v4, VRET_HW, cc6); }
+inline void Assembler::z_vstrcf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6) {z_vstrc(v1, v2, v3, v4, VRET_FW, cc6); }
+inline void Assembler::z_vistr( VectorRegister v1, VectorRegister v2, int64_t imm3, int64_t cc5) {emit_48(VISTR_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(imm3, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); } // isolate string
+inline void Assembler::z_vistrb( VectorRegister v1, VectorRegister v2, int64_t cc5) {z_vistr(v1, v2, VRET_BYTE, cc5); }
+inline void Assembler::z_vistrh( VectorRegister v1, VectorRegister v2, int64_t cc5) {z_vistr(v1, v2, VRET_HW, cc5); }
+inline void Assembler::z_vistrf( VectorRegister v1, VectorRegister v2, int64_t cc5) {z_vistr(v1, v2, VRET_FW, cc5); }
+inline void Assembler::z_vistrbs(VectorRegister v1, VectorRegister v2) {z_vistr(v1, v2, VRET_BYTE, VOPRC_CCSET); }
+inline void Assembler::z_vistrhs(VectorRegister v1, VectorRegister v2) {z_vistr(v1, v2, VRET_HW, VOPRC_CCSET); }
+inline void Assembler::z_vistrfs(VectorRegister v1, VectorRegister v2) {z_vistr(v1, v2, VRET_FW, VOPRC_CCSET); }
+
+
//-------------------------------
// FLOAT INSTRUCTIONS
//-------------------------------
diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
index 21634f930e7..caa5e942394 100644
--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
@@ -2713,13 +2713,9 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
metadata2reg(md->constant_encoding(), mdo);
Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
- Bytecodes::Code bc = method->java_code_at_bci(bci);
- const bool callee_is_static = callee->is_loaded() && callee->is_static();
// Perform additional virtual call profiling for invokevirtual and
- // invokeinterface bytecodes.
- if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
- !callee_is_static && // Required for optimized MH invokes.
- C1ProfileVirtualCalls) {
+ // invokeinterface bytecodes
+ if (op->should_profile_receiver_type()) {
assert(op->recv()->is_single_cpu(), "recv must be allocated");
Register recv = op->recv()->as_register();
assert_different_registers(mdo, tmp1, recv);
diff --git a/src/hotspot/cpu/s390/globals_s390.hpp b/src/hotspot/cpu/s390/globals_s390.hpp
index cb5adff3ef5..fef2dbec99b 100644
--- a/src/hotspot/cpu/s390/globals_s390.hpp
+++ b/src/hotspot/cpu/s390/globals_s390.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2017 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,7 @@
// Sorted according to sparc.
// z/Architecture remembers branch targets, so don't share vtables.
-define_pd_global(bool, ShareVtableStubs, false);
+define_pd_global(bool, ShareVtableStubs, true);
define_pd_global(bool, NeedsDeoptSuspend, false); // Only register window machines need this.
define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks.
diff --git a/src/hotspot/cpu/s390/interp_masm_s390.cpp b/src/hotspot/cpu/s390/interp_masm_s390.cpp
index 99965528886..edcbc8f4b1d 100644
--- a/src/hotspot/cpu/s390/interp_masm_s390.cpp
+++ b/src/hotspot/cpu/s390/interp_masm_s390.cpp
@@ -914,7 +914,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
//
// markOop displaced_header = obj->mark().set_unlocked();
// monitor->lock()->set_displaced_header(displaced_header);
- // if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
+ // if (Atomic::cmpxchg(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
// // We stored the monitor address into the object's mark word.
// } else if (THREAD->is_lock_owned((address)displaced_header))
// // Simple recursive case.
@@ -949,7 +949,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
z_stg(displaced_header, BasicObjectLock::lock_offset_in_bytes() +
BasicLock::displaced_header_offset_in_bytes(), monitor);
- // if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
+ // if (Atomic::cmpxchg(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
// Store stack address of the BasicObjectLock (this is monitor) into object.
add2reg(object_mark_addr, oopDesc::mark_offset_in_bytes(), object);
@@ -1021,7 +1021,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor, Register object)
// if ((displaced_header = monitor->displaced_header()) == NULL) {
// // Recursive unlock. Mark the monitor unlocked by setting the object field to NULL.
// monitor->set_obj(NULL);
- // } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) {
+ // } else if (Atomic::cmpxchg(displaced_header, obj->mark_addr(), monitor) == monitor) {
// // We swapped the unlocked mark in displaced_header into the object's mark word.
// monitor->set_obj(NULL);
// } else {
@@ -1062,7 +1062,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor, Register object)
BasicLock::displaced_header_offset_in_bytes()));
z_bre(done); // displaced_header == 0 -> goto done
- // } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) {
+ // } else if (Atomic::cmpxchg(displaced_header, obj->mark_addr(), monitor) == monitor) {
// // We swapped the unlocked mark in displaced_header into the object's mark word.
// monitor->set_obj(NULL);
diff --git a/src/hotspot/cpu/s390/jniTypes_s390.hpp b/src/hotspot/cpu/s390/jniTypes_s390.hpp
index de7bfb49251..a10d9699a83 100644
--- a/src/hotspot/cpu/s390/jniTypes_s390.hpp
+++ b/src/hotspot/cpu/s390/jniTypes_s390.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -29,9 +29,9 @@
// This file holds platform-dependent routines used to write primitive
// jni types to the array of arguments passed into JavaCalls::call.
+#include "jni.h"
#include "memory/allocation.hpp"
#include "oops/oop.hpp"
-#include "prims/jni.h"
class JNITypes : AllStatic {
// These functions write a java primitive type (in native format) to
diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp
index b8d3e4de275..af2c02934ff 100644
--- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp
@@ -4671,6 +4671,7 @@ void MacroAssembler::load_mirror(Register mirror, Register method) {
mem2reg_opt(mirror, Address(mirror, ConstMethod::constants_offset()));
mem2reg_opt(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes()));
mem2reg_opt(mirror, Address(mirror, Klass::java_mirror_offset()));
+ resolve_oop_handle(mirror);
}
//---------------------------------------------------------------
diff --git a/src/hotspot/cpu/s390/register_definitions_s390.cpp b/src/hotspot/cpu/s390/register_definitions_s390.cpp
index 99116f5399b..2378d513799 100644
--- a/src/hotspot/cpu/s390/register_definitions_s390.cpp
+++ b/src/hotspot/cpu/s390/register_definitions_s390.cpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -35,3 +35,5 @@
REGISTER_DEFINITION(Register, noreg);
REGISTER_DEFINITION(FloatRegister, fnoreg);
+
+REGISTER_DEFINITION(VectorRegister, vnoreg);
diff --git a/src/hotspot/cpu/s390/register_s390.cpp b/src/hotspot/cpu/s390/register_s390.cpp
index 1746da9f150..853b5642470 100644
--- a/src/hotspot/cpu/s390/register_s390.cpp
+++ b/src/hotspot/cpu/s390/register_s390.cpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -46,3 +46,13 @@ const char* FloatRegisterImpl::name() const {
};
return is_valid() ? names[encoding()] : "fnoreg";
}
+
+const char* VectorRegisterImpl::name() const {
+ const char* names[number_of_registers] = {
+ "Z_V0", "Z_V1", "Z_V2", "Z_V3", "Z_V4", "Z_V5", "Z_V6", "Z_V7",
+ "Z_V8", "Z_V9", "Z_V10", "Z_V11", "Z_V12", "Z_V13", "Z_V14", "Z_V15",
+ "Z_V16", "Z_V17", "Z_V18", "Z_V19", "Z_V20", "Z_V21", "Z_V22", "Z_V23",
+ "Z_V24", "Z_V25", "Z_V26", "Z_V27", "Z_V28", "Z_V29", "Z_V30", "Z_V31"
+ };
+ return is_valid() ? names[encoding()] : "fnoreg";
+}
diff --git a/src/hotspot/cpu/s390/register_s390.hpp b/src/hotspot/cpu/s390/register_s390.hpp
index 4c61174a613..f8f218e1dd6 100644
--- a/src/hotspot/cpu/s390/register_s390.hpp
+++ b/src/hotspot/cpu/s390/register_s390.hpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -34,11 +34,6 @@ class VMRegImpl;
typedef VMRegImpl* VMReg;
-// Use Register as shortcut.
-class RegisterImpl;
-typedef RegisterImpl* Register;
-
-// The implementation of integer registers for z/Architecture.
// z/Architecture registers, see "LINUX for zSeries ELF ABI Supplement", IBM March 2001
//
@@ -57,6 +52,17 @@ typedef RegisterImpl* Register;
// f1,f3,f5,f7 General purpose (volatile)
// f8-f15 General purpose (nonvolatile)
+
+//===========================
+//=== Integer Registers ===
+//===========================
+
+// Use Register as shortcut.
+class RegisterImpl;
+typedef RegisterImpl* Register;
+
+// The implementation of integer registers for z/Architecture.
+
inline Register as_Register(int encoding) {
return (Register)(long)encoding;
}
@@ -110,6 +116,11 @@ CONSTANT_REGISTER_DECLARATION(Register, Z_R13, (13));
CONSTANT_REGISTER_DECLARATION(Register, Z_R14, (14));
CONSTANT_REGISTER_DECLARATION(Register, Z_R15, (15));
+
+//=============================
+//=== Condition Registers ===
+//=============================
+
// Use ConditionRegister as shortcut
class ConditionRegisterImpl;
typedef ConditionRegisterImpl* ConditionRegister;
@@ -159,7 +170,7 @@ CONSTANT_REGISTER_DECLARATION(ConditionRegister, Z_CR, (0));
// dangers of defines.
// If a particular file has a problem with these defines then it's possible
// to turn them off in that file by defining
-// DONT_USE_REGISTER_DEFINES. Register_definition_s390.cpp does that
+// DONT_USE_REGISTER_DEFINES. Register_definitions_s390.cpp does that
// so that it's able to provide real definitions of these registers
// for use in debuggers and such.
@@ -186,6 +197,11 @@ CONSTANT_REGISTER_DECLARATION(ConditionRegister, Z_CR, (0));
#define Z_CR ((ConditionRegister)(Z_CR_ConditionRegisterEnumValue))
#endif // DONT_USE_REGISTER_DEFINES
+
+//=========================
+//=== Float Registers ===
+//=========================
+
// Use FloatRegister as shortcut
class FloatRegisterImpl;
typedef FloatRegisterImpl* FloatRegister;
@@ -263,22 +279,6 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F15, (15));
#define Z_F15 ((FloatRegister)( Z_F15_FloatRegisterEnumValue))
#endif // DONT_USE_REGISTER_DEFINES
-// Need to know the total number of registers of all sorts for SharedInfo.
-// Define a class that exports it.
-
-class ConcreteRegisterImpl : public AbstractRegisterImpl {
- public:
- enum {
- number_of_registers =
- (RegisterImpl::number_of_registers +
- FloatRegisterImpl::number_of_registers)
- * 2 // register halves
- + 1 // condition code register
- };
- static const int max_gpr;
- static const int max_fpr;
-};
-
// Single, Double and Quad fp reg classes. These exist to map the ADLC
// encoding for a floating point register, to the FloatRegister number
// desired by the macroassembler. A FloatRegister is a number between
@@ -329,6 +329,161 @@ class QuadFloatRegisterImpl {
};
+//==========================
+//=== Vector Registers ===
+//==========================
+
+// Use VectorRegister as shortcut
+class VectorRegisterImpl;
+typedef VectorRegisterImpl* VectorRegister;
+
+// The implementation of vector registers for z/Architecture.
+
+inline VectorRegister as_VectorRegister(int encoding) {
+ return (VectorRegister)(long)encoding;
+}
+
+class VectorRegisterImpl: public AbstractRegisterImpl {
+ public:
+ enum {
+ number_of_registers = 32,
+ number_of_arg_registers = 0
+ };
+
+ // construction
+ inline friend VectorRegister as_VectorRegister(int encoding);
+
+ inline VMReg as_VMReg();
+
+ // accessors
+ int encoding() const {
+ assert(is_valid(), "invalid register"); return value();
+ }
+
+ bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
+ bool is_volatile() const { return true; }
+ bool is_nonvolatile() const { return false; }
+
+ // Register fields in z/Architecture instructions are 4 bits wide, restricting the
+ // addressable register set size to 16.
+ // The vector register set size is 32, requiring an extension, by one bit, of the
+ // register encoding. This is accomplished by the introduction of a RXB field in the
+ // instruction. RXB = Register eXtension Bits.
+ // The RXB field contains the MSBs (most significant bit) of the vector register numbers
+ // used for this instruction. Assignment of MSB in RBX is by bit position of the
+ // register field in the instruction.
+ // Example:
+ // The register field starting at bit position 12 in the instruction is assigned RXB bit 0b0100.
+ int64_t RXB_mask(int pos) {
+ if (encoding() >= number_of_registers/2) {
+ switch (pos) {
+ case 8: return ((int64_t)0b1000) << 8; // actual bit pos: 36
+ case 12: return ((int64_t)0b0100) << 8; // actual bit pos: 37
+ case 16: return ((int64_t)0b0010) << 8; // actual bit pos: 38
+ case 32: return ((int64_t)0b0001) << 8; // actual bit pos: 39
+ default:
+ ShouldNotReachHere();
+ }
+ }
+ return 0;
+ }
+
+ const char* name() const;
+
+ VectorRegister successor() const { return as_VectorRegister(encoding() + 1); }
+};
+
+// The Vector registers of z/Architecture.
+
+CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg, (-1));
+
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V0, (0));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V1, (1));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V2, (2));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V3, (3));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V4, (4));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V5, (5));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V6, (6));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V7, (7));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V8, (8));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V9, (9));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V10, (10));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V11, (11));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V12, (12));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V13, (13));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V14, (14));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V15, (15));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V16, (16));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V17, (17));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V18, (18));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V19, (19));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V20, (20));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V21, (21));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V22, (22));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V23, (23));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V24, (24));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V25, (25));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V26, (26));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V27, (27));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V28, (28));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V29, (29));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V30, (30));
+CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V31, (31));
+
+#ifndef DONT_USE_REGISTER_DEFINES
+#define vnoreg ((VectorRegister)(vnoreg_VectorRegisterEnumValue))
+#define Z_V0 ((VectorRegister)( Z_V0_VectorRegisterEnumValue))
+#define Z_V1 ((VectorRegister)( Z_V1_VectorRegisterEnumValue))
+#define Z_V2 ((VectorRegister)( Z_V2_VectorRegisterEnumValue))
+#define Z_V3 ((VectorRegister)( Z_V3_VectorRegisterEnumValue))
+#define Z_V4 ((VectorRegister)( Z_V4_VectorRegisterEnumValue))
+#define Z_V5 ((VectorRegister)( Z_V5_VectorRegisterEnumValue))
+#define Z_V6 ((VectorRegister)( Z_V6_VectorRegisterEnumValue))
+#define Z_V7 ((VectorRegister)( Z_V7_VectorRegisterEnumValue))
+#define Z_V8 ((VectorRegister)( Z_V8_VectorRegisterEnumValue))
+#define Z_V9 ((VectorRegister)( Z_V9_VectorRegisterEnumValue))
+#define Z_V10 ((VectorRegister)( Z_V10_VectorRegisterEnumValue))
+#define Z_V11 ((VectorRegister)( Z_V11_VectorRegisterEnumValue))
+#define Z_V12 ((VectorRegister)( Z_V12_VectorRegisterEnumValue))
+#define Z_V13 ((VectorRegister)( Z_V13_VectorRegisterEnumValue))
+#define Z_V14 ((VectorRegister)( Z_V14_VectorRegisterEnumValue))
+#define Z_V15 ((VectorRegister)( Z_V15_VectorRegisterEnumValue))
+#define Z_V16 ((VectorRegister)( Z_V16_VectorRegisterEnumValue))
+#define Z_V17 ((VectorRegister)( Z_V17_VectorRegisterEnumValue))
+#define Z_V18 ((VectorRegister)( Z_V18_VectorRegisterEnumValue))
+#define Z_V19 ((VectorRegister)( Z_V19_VectorRegisterEnumValue))
+#define Z_V20 ((VectorRegister)( Z_V20_VectorRegisterEnumValue))
+#define Z_V21 ((VectorRegister)( Z_V21_VectorRegisterEnumValue))
+#define Z_V22 ((VectorRegister)( Z_V22_VectorRegisterEnumValue))
+#define Z_V23 ((VectorRegister)( Z_V23_VectorRegisterEnumValue))
+#define Z_V24 ((VectorRegister)( Z_V24_VectorRegisterEnumValue))
+#define Z_V25 ((VectorRegister)( Z_V25_VectorRegisterEnumValue))
+#define Z_V26 ((VectorRegister)( Z_V26_VectorRegisterEnumValue))
+#define Z_V27 ((VectorRegister)( Z_V27_VectorRegisterEnumValue))
+#define Z_V28 ((VectorRegister)( Z_V28_VectorRegisterEnumValue))
+#define Z_V29 ((VectorRegister)( Z_V29_VectorRegisterEnumValue))
+#define Z_V30 ((VectorRegister)( Z_V30_VectorRegisterEnumValue))
+#define Z_V31 ((VectorRegister)( Z_V31_VectorRegisterEnumValue))
+#endif // DONT_USE_REGISTER_DEFINES
+
+
+// Need to know the total number of registers of all sorts for SharedInfo.
+// Define a class that exports it.
+
+class ConcreteRegisterImpl : public AbstractRegisterImpl {
+ public:
+ enum {
+ number_of_registers =
+ (RegisterImpl::number_of_registers +
+ FloatRegisterImpl::number_of_registers)
+ * 2 // register halves
+ + 1 // condition code register
+ };
+ static const int max_gpr;
+ static const int max_fpr;
+};
+
+
// Common register declarations used in assembler code.
REGISTER_DECLARATION(Register, Z_EXC_OOP, Z_R2);
REGISTER_DECLARATION(Register, Z_EXC_PC, Z_R3);
diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad
index b30437e0faf..15902d9f7aa 100644
--- a/src/hotspot/cpu/s390/s390.ad
+++ b/src/hotspot/cpu/s390/s390.ad
@@ -3149,7 +3149,7 @@ operand noArg_iRegI() %{
interface(REG_INTER);
%}
-// Revenregi and roddRegI constitute and even-odd-pair.
+// revenRegI and roddRegI constitute and even-odd-pair.
operand revenRegI() %{
constraint(ALLOC_IN_RC(z_rarg3_int_reg));
match(iRegI);
@@ -3157,7 +3157,7 @@ operand revenRegI() %{
interface(REG_INTER);
%}
-// Revenregi and roddRegI constitute and even-odd-pair.
+// revenRegI and roddRegI constitute and even-odd-pair.
operand roddRegI() %{
constraint(ALLOC_IN_RC(z_rarg4_int_reg));
match(iRegI);
@@ -3283,7 +3283,7 @@ operand memoryRegP() %{
interface(REG_INTER);
%}
-// Revenregp and roddRegP constitute and even-odd-pair.
+// revenRegP and roddRegP constitute and even-odd-pair.
operand revenRegP() %{
constraint(ALLOC_IN_RC(z_rarg3_ptr_reg));
match(iRegP);
@@ -3291,7 +3291,7 @@ operand revenRegP() %{
interface(REG_INTER);
%}
-// Revenregl and roddRegL constitute and even-odd-pair.
+// revenRegP and roddRegP constitute and even-odd-pair.
operand roddRegP() %{
constraint(ALLOC_IN_RC(z_rarg4_ptr_reg));
match(iRegP);
@@ -3380,7 +3380,7 @@ operand iRegL() %{
interface(REG_INTER);
%}
-// Revenregl and roddRegL constitute and even-odd-pair.
+// revenRegL and roddRegL constitute and even-odd-pair.
operand revenRegL() %{
constraint(ALLOC_IN_RC(z_rarg3_long_reg));
match(iRegL);
@@ -3388,7 +3388,7 @@ operand revenRegL() %{
interface(REG_INTER);
%}
-// Revenregl and roddRegL constitute and even-odd-pair.
+// revenRegL and roddRegL constitute and even-odd-pair.
operand roddRegL() %{
constraint(ALLOC_IN_RC(z_rarg4_long_reg));
match(iRegL);
@@ -6443,6 +6443,32 @@ instruct mulL_Reg_mem(iRegL dst, memory src)%{
ins_pipe(pipe_class_dummy);
%}
+instruct mulHiL_reg_reg(revenRegL Rdst, roddRegL Rsrc1, iRegL Rsrc2, iRegL Rtmp1, flagsReg cr)%{
+ match(Set Rdst (MulHiL Rsrc1 Rsrc2));
+ effect(TEMP_DEF Rdst, USE_KILL Rsrc1, TEMP Rtmp1, KILL cr);
+ ins_cost(7*DEFAULT_COST);
+ // TODO: s390 port size(VARIABLE_SIZE);
+ format %{ "MulHiL $Rdst, $Rsrc1, $Rsrc2\t # Multiply High Long" %}
+ ins_encode%{
+ Register dst = $Rdst$$Register;
+ Register src1 = $Rsrc1$$Register;
+ Register src2 = $Rsrc2$$Register;
+ Register tmp1 = $Rtmp1$$Register;
+ Register tmp2 = $Rdst$$Register;
+ // z/Architecture has only unsigned multiply (64 * 64 -> 128).
+ // implementing mulhs(a,b) = mulhu(a,b) – (a & (b>>63)) – (b & (a>>63))
+ __ z_srag(tmp2, src1, 63); // a>>63
+ __ z_srag(tmp1, src2, 63); // b>>63
+ __ z_ngr(tmp2, src2); // b & (a>>63)
+ __ z_ngr(tmp1, src1); // a & (b>>63)
+ __ z_agr(tmp1, tmp2); // ((a & (b>>63)) + (b & (a>>63)))
+ __ z_mlgr(dst, src2); // tricky: 128-bit product is written to even/odd pair (dst,src1),
+ // multiplicand is taken from oddReg (src1), multiplier in src2.
+ __ z_sgr(dst, tmp1);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
// DIV
// Integer DIVMOD with Register, both quotient and mod results
diff --git a/src/hotspot/cpu/s390/templateTable_s390.cpp b/src/hotspot/cpu/s390/templateTable_s390.cpp
index becbb0e48b5..50477e6e556 100644
--- a/src/hotspot/cpu/s390/templateTable_s390.cpp
+++ b/src/hotspot/cpu/s390/templateTable_s390.cpp
@@ -2382,6 +2382,7 @@ void TemplateTable::load_field_cp_cache_entry(Register obj,
if (is_static) {
__ mem2reg_opt(obj, Address(cache, index, cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
__ mem2reg_opt(obj, Address(obj, Klass::java_mirror_offset()));
+ __ resolve_oop_handle(obj);
}
}
diff --git a/src/hotspot/cpu/s390/vm_version_s390.cpp b/src/hotspot/cpu/s390/vm_version_s390.cpp
index 709a9fdf6ed..51383d6db75 100644
--- a/src/hotspot/cpu/s390/vm_version_s390.cpp
+++ b/src/hotspot/cpu/s390/vm_version_s390.cpp
@@ -224,7 +224,7 @@ void VM_Version::initialize() {
}
// z/Architecture supports 8-byte compare-exchange operations
- // (see Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr)
+ // (see Atomic::cmpxchg)
// and 'atomic long memory ops' (see Unsafe_GetLongVolatile).
_supports_cx8 = true;
@@ -706,13 +706,13 @@ void VM_Version::determine_features() {
Label getCPUFEATURES; // fcode = -1 (cache)
Label getCIPHERFEATURES; // fcode = -2 (cipher)
Label getMSGDIGESTFEATURES; // fcode = -3 (SHA)
- Label checkLongDispFast;
- Label noLongDisp;
- Label posDisp, negDisp;
+ Label getVECTORFEATURES; // fcode = -4 (OS support for vector instructions)
Label errRTN;
a->z_ltgfr(Z_R0, Z_ARG2); // Buf len to r0 and test.
- a->z_brl(getFEATURES); // negative -> Get machine features.
- a->z_brz(checkLongDispFast); // zero -> Check for high-speed Long Displacement Facility.
+ a->z_brl(getFEATURES); // negative -> Get machine features not covered by facility list.
+ a->z_lghi(Z_R1,0);
+ a->z_brz(errRTN); // zero -> Function code currently not used, indicate "aborted".
+
a->z_aghi(Z_R0, -1);
a->z_stfle(0, Z_ARG1);
a->z_lg(Z_R1, 0, Z_ARG1); // Get first DW of facility list.
@@ -736,6 +736,8 @@ void VM_Version::determine_features() {
a->z_bre(getCIPHERFEATURES);
a->z_cghi(Z_R0, -3); // -3: Extract detailed crypto capabilities (msg digest instructions).
a->z_bre(getMSGDIGESTFEATURES);
+ a->z_cghi(Z_R0, -4); // -4: Verify vector instruction availability (OS support).
+ a->z_bre(getVECTORFEATURES);
a->z_xgr(Z_RET, Z_RET); // Not a valid function code.
a->z_br(Z_R14); // Return "operation aborted".
@@ -766,46 +768,9 @@ void VM_Version::determine_features() {
a->z_ecag(Z_RET,Z_R0,0,Z_ARG3); // Extract information as requested by Z_ARG1 contents.
a->z_br(Z_R14);
- // Check the performance of the Long Displacement Facility, i.e. find out if we are running on z900 or newer.
- a->bind(checkLongDispFast);
- a->z_llill(Z_R0, 0xffff); // preset #iterations
- a->z_larl(Z_R1, posDisp);
- a->z_stck(0, Z_ARG1); // Get begin timestamp.
-
- a->bind(posDisp); // Positive disp loop.
- a->z_lg(Z_ARG2, 0, Z_ARG1);
- a->z_bctgr(Z_R0, Z_R1);
-
- a->z_stck(0, Z_ARG1); // Get end timestamp.
- a->z_sg(Z_ARG2, 0, Z_R0, Z_ARG1); // Calculate elapsed time.
- a->z_lcgr(Z_ARG2, Z_ARG2);
- a->z_srlg(Z_ARG2, Z_ARG2, 12); // LSB: now microseconds
- a->z_stg(Z_ARG2, 8, Z_ARG1); // Store difference in buffer[1].
-
- a->z_llill(Z_R0, 0xffff); // preset #iterations
- a->z_larl(Z_R1, negDisp);
- a->z_xgr(Z_ARG2, Z_ARG2); // Clear to detect absence of LongDisp facility.
- a->z_stck(0, Z_ARG1); // Get begin timestamp.
- a->z_la(Z_ARG1, 8, Z_ARG1);
-
- a->bind(negDisp); // Negative disp loop.
- a->z_lg(Z_ARG2, -8, Z_ARG1);
- a->z_bctgr(Z_R0, Z_R1);
-
- a->z_aghi(Z_ARG1, -8);
- a->z_stck(0, Z_ARG1); // Get end timestamp.
- a->z_ltgr(Z_ARG2, Z_ARG2); // Check for absence of LongDisp facility.
- a->z_brz(noLongDisp);
- a->z_sg(Z_ARG2, 0, Z_R0, Z_ARG1); // Calc elapsed time.
- a->z_lcgr(Z_ARG2, Z_ARG2);
- a->z_srlg(Z_ARG2, Z_ARG2, 12); // LSB: now microseconds
- a->z_stg(Z_ARG2, 0, Z_ARG1); // store difference in buffer[0]
-
- a->z_llill(Z_RET,0xffff);
- a->z_br(Z_R14);
-
- a->bind(noLongDisp);
- a->z_lghi(Z_RET,-1);
+ // Use a vector instruction to verify OS support. Will fail with SIGFPE if OS support is missing.
+ a->bind(getVECTORFEATURES);
+ a->z_vtm(Z_V0,Z_V0); // non-destructive vector instruction. Will cause SIGFPE if not supported.
a->z_br(Z_R14);
address code_end = a->pc();
@@ -962,6 +927,19 @@ void VM_Version::determine_features() {
_nfeatures = 0;
}
+ if (has_VectorFacility()) {
+ // Verify that feature can actually be used. OS support required.
+ call_getFeatures(buffer, -4, 0);
+ if (printVerbose) {
+ ttyLocker ttyl;
+ if (has_VectorFacility()) {
+ tty->print_cr(" Vector Facility has been verified to be supported by OS");
+ } else {
+ tty->print_cr(" Vector Facility has been disabled - not supported by OS");
+ }
+ }
+ }
+
// Extract Crypto Facility details.
if (has_Crypto()) {
// Get cipher features.
diff --git a/src/hotspot/cpu/s390/vm_version_s390.hpp b/src/hotspot/cpu/s390/vm_version_s390.hpp
index 7aa66bffc39..0f5d754707b 100644
--- a/src/hotspot/cpu/s390/vm_version_s390.hpp
+++ b/src/hotspot/cpu/s390/vm_version_s390.hpp
@@ -473,6 +473,8 @@ class VM_Version: public Abstract_VM_Version {
static void set_has_CryptoExt5() { _features[0] |= CryptoExtension5Mask; }
static void set_has_VectorFacility() { _features[2] |= VectorFacilityMask; }
+ static void reset_has_VectorFacility() { _features[2] &= ~VectorFacilityMask; }
+
// Assembler testing.
static void allow_all();
static void revert();
diff --git a/src/hotspot/cpu/sparc/assembler_sparc.hpp b/src/hotspot/cpu/sparc/assembler_sparc.hpp
index 69822951928..f8f5b11c9a6 100644
--- a/src/hotspot/cpu/sparc/assembler_sparc.hpp
+++ b/src/hotspot/cpu/sparc/assembler_sparc.hpp
@@ -122,6 +122,7 @@ class Assembler : public AbstractAssembler {
fpop1_op3 = 0x34,
fpop2_op3 = 0x35,
impdep1_op3 = 0x36,
+ addx_op3 = 0x36,
aes3_op3 = 0x36,
sha_op3 = 0x36,
bmask_op3 = 0x36,
@@ -133,6 +134,8 @@ class Assembler : public AbstractAssembler {
fzero_op3 = 0x36,
fsrc_op3 = 0x36,
fnot_op3 = 0x36,
+ mpmul_op3 = 0x36,
+ umulx_op3 = 0x36,
xmulx_op3 = 0x36,
crc32c_op3 = 0x36,
impdep2_op3 = 0x37,
@@ -195,6 +198,9 @@ class Assembler : public AbstractAssembler {
fnegs_opf = 0x05,
fnegd_opf = 0x06,
+ addxc_opf = 0x11,
+ addxccc_opf = 0x13,
+ umulxhi_opf = 0x16,
alignaddr_opf = 0x18,
bmask_opf = 0x19,
@@ -240,7 +246,8 @@ class Assembler : public AbstractAssembler {
sha256_opf = 0x142,
sha512_opf = 0x143,
- crc32c_opf = 0x147
+ crc32c_opf = 0x147,
+ mpmul_opf = 0x148
};
enum op5s {
@@ -380,7 +387,7 @@ class Assembler : public AbstractAssembler {
assert_signed_range(x, nbits + 2);
}
- static void assert_unsigned_const(int x, int nbits) {
+ static void assert_unsigned_range(int x, int nbits) {
assert(juint(x) < juint(1 << nbits), "unsigned constant out of range");
}
@@ -534,6 +541,12 @@ class Assembler : public AbstractAssembler {
return x & ((1 << nbits) - 1);
}
+ // unsigned immediate, in low bits, at most nbits long.
+ static int uimm(int x, int nbits) {
+ assert_unsigned_range(x, nbits);
+ return x & ((1 << nbits) - 1);
+ }
+
// compute inverse of wdisp16
static intptr_t inv_wdisp16(int x, intptr_t pos) {
int lo = x & ((1 << 14) - 1);
@@ -631,6 +644,9 @@ class Assembler : public AbstractAssembler {
// FMAf instructions supported only on certain processors
static void fmaf_only() { assert(VM_Version::has_fmaf(), "This instruction only works on SPARC with FMAf"); }
+ // MPMUL instruction supported only on certain processors
+ static void mpmul_only() { assert(VM_Version::has_mpmul(), "This instruction only works on SPARC with MPMUL"); }
+
// instruction only in VIS1
static void vis1_only() { assert(VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }
@@ -772,11 +788,12 @@ class Assembler : public AbstractAssembler {
AbstractAssembler::flush();
}
- inline void emit_int32(int); // shadows AbstractAssembler::emit_int32
- inline void emit_data(int);
- inline void emit_data(int, RelocationHolder const &rspec);
- inline void emit_data(int, relocInfo::relocType rtype);
- // helper for above functions
+ inline void emit_int32(int32_t); // shadows AbstractAssembler::emit_int32
+ inline void emit_data(int32_t);
+ inline void emit_data(int32_t, RelocationHolder const&);
+ inline void emit_data(int32_t, relocInfo::relocType rtype);
+
+ // Helper for the above functions.
inline void check_delay();
@@ -929,6 +946,10 @@ class Assembler : public AbstractAssembler {
// fmaf instructions.
inline void fmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
+ inline void fmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
+
+ inline void fnmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
+ inline void fnmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
// pp 165
@@ -960,6 +981,8 @@ class Assembler : public AbstractAssembler {
inline void ldf(FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d,
RelocationHolder const &rspec = RelocationHolder());
+ inline void ldd(Register s1, Register s2, FloatRegister d);
+ inline void ldd(Register s1, int simm13a, FloatRegister d);
inline void ldfsr(Register s1, Register s2);
inline void ldfsr(Register s1, int simm13a);
@@ -987,8 +1010,6 @@ class Assembler : public AbstractAssembler {
inline void lduw(Register s1, int simm13a, Register d);
inline void ldx(Register s1, Register s2, Register d);
inline void ldx(Register s1, int simm13a, Register d);
- inline void ldd(Register s1, Register s2, Register d);
- inline void ldd(Register s1, int simm13a, Register d);
// pp 177
@@ -1157,6 +1178,9 @@ class Assembler : public AbstractAssembler {
inline void stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2);
inline void stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, int simm13a);
+ inline void std(FloatRegister d, Register s1, Register s2);
+ inline void std(FloatRegister d, Register s1, int simm13a);
+
inline void stfsr(Register s1, Register s2);
inline void stfsr(Register s1, int simm13a);
inline void stxfsr(Register s1, Register s2);
@@ -1177,8 +1201,6 @@ class Assembler : public AbstractAssembler {
inline void stw(Register d, Register s1, int simm13a);
inline void stx(Register d, Register s1, Register s2);
inline void stx(Register d, Register s1, int simm13a);
- inline void std(Register d, Register s1, Register s2);
- inline void std(Register d, Register s1, int simm13a);
// pp 177
@@ -1267,6 +1289,9 @@ class Assembler : public AbstractAssembler {
// VIS3 instructions
+ inline void addxc(Register s1, Register s2, Register d);
+ inline void addxccc(Register s1, Register s2, Register d);
+
inline void movstosw(FloatRegister s, Register d);
inline void movstouw(FloatRegister s, Register d);
inline void movdtox(FloatRegister s, Register d);
@@ -1276,6 +1301,7 @@ class Assembler : public AbstractAssembler {
inline void xmulx(Register s1, Register s2, Register d);
inline void xmulxhi(Register s1, Register s2, Register d);
+ inline void umulxhi(Register s1, Register s2, Register d);
// Crypto SHA instructions
@@ -1287,6 +1313,10 @@ class Assembler : public AbstractAssembler {
inline void crc32c(FloatRegister s1, FloatRegister s2, FloatRegister d);
+ // MPMUL instruction
+
+ inline void mpmul(int uimm5);
+
// Creation
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
#ifdef VALIDATE_PIPELINE
diff --git a/src/hotspot/cpu/sparc/assembler_sparc.inline.hpp b/src/hotspot/cpu/sparc/assembler_sparc.inline.hpp
index 070a1f80db3..b9a918e5e0b 100644
--- a/src/hotspot/cpu/sparc/assembler_sparc.inline.hpp
+++ b/src/hotspot/cpu/sparc/assembler_sparc.inline.hpp
@@ -59,7 +59,7 @@ inline void Assembler::check_delay() {
#endif
}
-inline void Assembler::emit_int32(int x) {
+inline void Assembler::emit_int32(int32_t x) {
check_delay();
#ifdef VALIDATE_PIPELINE
_hazard_state = NoHazard;
@@ -67,16 +67,16 @@ inline void Assembler::emit_int32(int x) {
AbstractAssembler::emit_int32(x);
}
-inline void Assembler::emit_data(int x) {
+inline void Assembler::emit_data(int32_t x) {
emit_int32(x);
}
-inline void Assembler::emit_data(int x, relocInfo::relocType rtype) {
+inline void Assembler::emit_data(int32_t x, relocInfo::relocType rtype) {
relocate(rtype);
emit_int32(x);
}
-inline void Assembler::emit_data(int x, RelocationHolder const &rspec) {
+inline void Assembler::emit_data(int32_t x, RelocationHolder const &rspec) {
relocate(rspec);
emit_int32(x);
}
@@ -359,6 +359,19 @@ inline void Assembler::fmadd(FloatRegisterImpl::Width w, FloatRegister s1, Float
fmaf_only();
emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(w) | fs2(s2, w));
}
+inline void Assembler::fmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
+ fmaf_only();
+ emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0x4 + w) | fs2(s2, w));
+}
+
+inline void Assembler::fnmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
+ fmaf_only();
+ emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0xc + w) | fs2(s2, w));
+}
+inline void Assembler::fnmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
+ fmaf_only();
+ emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0x8 + w) | fs2(s2, w));
+}
inline void Assembler::flush(Register s1, Register s2) {
emit_int32(op(arith_op) | op3(flush_op3) | rs1(s1) | rs2(s2));
@@ -402,6 +415,15 @@ inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, int simm13a,
emit_data(op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec);
}
+inline void Assembler::ldd(Register s1, Register s2, FloatRegister d) {
+ assert(d->is_even(), "not even");
+ ldf(FloatRegisterImpl::D, s1, s2, d);
+}
+inline void Assembler::ldd(Register s1, int simm13a, FloatRegister d) {
+ assert(d->is_even(), "not even");
+ ldf(FloatRegisterImpl::D, s1, simm13a, d);
+}
+
inline void Assembler::ldxfsr(Register s1, Register s2) {
emit_int32(op(ldst_op) | rd(G1) | op3(ldfsr_op3) | rs1(s1) | rs2(s2));
}
@@ -460,16 +482,6 @@ inline void Assembler::ldx(Register s1, Register s2, Register d) {
inline void Assembler::ldx(Register s1, int simm13a, Register d) {
emit_data(op(ldst_op) | rd(d) | op3(ldx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
}
-inline void Assembler::ldd(Register s1, Register s2, Register d) {
- v9_dep();
- assert(d->is_even(), "not even");
- emit_int32(op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | rs2(s2));
-}
-inline void Assembler::ldd(Register s1, int simm13a, Register d) {
- v9_dep();
- assert(d->is_even(), "not even");
- emit_data(op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
-}
inline void Assembler::ldsba(Register s1, Register s2, int ia, Register d) {
emit_int32(op(ldst_op) | rd(d) | op3(ldsb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2));
@@ -806,6 +818,15 @@ inline void Assembler::stf(FloatRegisterImpl::Width w, FloatRegister d, Register
emit_data(op(ldst_op) | fd(d, w) | alt_op3(stf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13));
}
+inline void Assembler::std(FloatRegister d, Register s1, Register s2) {
+ assert(d->is_even(), "not even");
+ stf(FloatRegisterImpl::D, d, s1, s2);
+}
+inline void Assembler::std(FloatRegister d, Register s1, int simm13a) {
+ assert(d->is_even(), "not even");
+ stf(FloatRegisterImpl::D, d, s1, simm13a);
+}
+
inline void Assembler::stxfsr(Register s1, Register s2) {
emit_int32(op(ldst_op) | rd(G1) | op3(stfsr_op3) | rs1(s1) | rs2(s2));
}
@@ -848,16 +869,6 @@ inline void Assembler::stx(Register d, Register s1, Register s2) {
inline void Assembler::stx(Register d, Register s1, int simm13a) {
emit_data(op(ldst_op) | rd(d) | op3(stx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
}
-inline void Assembler::std(Register d, Register s1, Register s2) {
- v9_dep();
- assert(d->is_even(), "not even");
- emit_int32(op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | rs2(s2));
-}
-inline void Assembler::std(Register d, Register s1, int simm13a) {
- v9_dep();
- assert(d->is_even(), "not even");
- emit_data(op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
-}
inline void Assembler::stba(Register d, Register s1, Register s2, int ia) {
emit_int32(op(ldst_op) | rd(d) | op3(stb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2));
@@ -1043,6 +1054,15 @@ inline void Assembler::bshuffle(FloatRegister s1, FloatRegister s2, FloatRegiste
// VIS3 instructions
+inline void Assembler::addxc(Register s1, Register s2, Register d) {
+ vis3_only();
+ emit_int32(op(arith_op) | rd(d) | op3(addx_op3) | rs1(s1) | opf(addxc_opf) | rs2(s2));
+}
+inline void Assembler::addxccc(Register s1, Register s2, Register d) {
+ vis3_only();
+ emit_int32(op(arith_op) | rd(d) | op3(addx_op3) | rs1(s1) | opf(addxccc_opf) | rs2(s2));
+}
+
inline void Assembler::movstosw(FloatRegister s, Register d) {
vis3_only();
emit_int32(op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mstosw_opf) | fs2(s, FloatRegisterImpl::S));
@@ -1073,6 +1093,10 @@ inline void Assembler::xmulxhi(Register s1, Register s2, Register d) {
vis3_only();
emit_int32(op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulxhi_opf) | rs2(s2));
}
+inline void Assembler::umulxhi(Register s1, Register s2, Register d) {
+ vis3_only();
+ emit_int32(op(arith_op) | rd(d) | op3(umulx_op3) | rs1(s1) | opf(umulxhi_opf) | rs2(s2));
+}
// Crypto SHA instructions
@@ -1096,4 +1120,11 @@ inline void Assembler::crc32c(FloatRegister s1, FloatRegister s2, FloatRegister
emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(crc32c_op3) | fs1(s1, FloatRegisterImpl::D) | opf(crc32c_opf) | fs2(s2, FloatRegisterImpl::D));
}
+// MPMUL instruction
+
+inline void Assembler::mpmul(int uimm5) {
+ mpmul_only();
+ emit_int32(op(arith_op) | rd(0) | op3(mpmul_op3) | rs1(0) | opf(mpmul_opf) | uimm(uimm5, 5));
+}
+
#endif // CPU_SPARC_VM_ASSEMBLER_SPARC_INLINE_HPP
diff --git a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp
index 5269c9fd8be..937252fe752 100644
--- a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp
+++ b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp
@@ -2763,13 +2763,9 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
}
Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias);
- Bytecodes::Code bc = method->java_code_at_bci(bci);
- const bool callee_is_static = callee->is_loaded() && callee->is_static();
// Perform additional virtual call profiling for invokevirtual and
// invokeinterface bytecodes
- if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
- !callee_is_static && // required for optimized MH invokes
- C1ProfileVirtualCalls) {
+ if (op->should_profile_receiver_type()) {
assert(op->recv()->is_single_cpu(), "recv must be allocated");
Register recv = op->recv()->as_register();
assert_different_registers(mdo, tmp1, recv);
diff --git a/src/hotspot/cpu/sparc/frame_sparc.cpp b/src/hotspot/cpu/sparc/frame_sparc.cpp
index 4fa7e6a973b..3985a875c34 100644
--- a/src/hotspot/cpu/sparc/frame_sparc.cpp
+++ b/src/hotspot/cpu/sparc/frame_sparc.cpp
@@ -119,8 +119,8 @@ address RegisterMap::pd_location(VMReg regname) const {
reg = regname->as_Register();
}
if (reg->is_out()) {
- assert(_younger_window != NULL, "Younger window should be available");
- return second_word + (address)&_younger_window[reg->after_save()->sp_offset_in_saved_window()];
+ return _younger_window == NULL ? NULL :
+ second_word + (address)&_younger_window[reg->after_save()->sp_offset_in_saved_window()];
}
if (reg->is_local() || reg->is_in()) {
assert(_window != NULL, "Window should be available");
diff --git a/src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp b/src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp
index 465a6a014e3..cb4bb137ceb 100644
--- a/src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp
+++ b/src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp
@@ -43,7 +43,7 @@ const bool CCallingConventionRequiresIntsAsLongs = true;
#elif defined(COMPILER1)
// pure C1, 32-bit, small machine
#define DEFAULT_CACHE_LINE_SIZE 16
-#elif defined(COMPILER2) || defined(SHARK)
+#elif defined(COMPILER2)
// pure C2, 64-bit, large machine
#define DEFAULT_CACHE_LINE_SIZE 128
#endif
diff --git a/src/hotspot/cpu/sparc/globals_sparc.hpp b/src/hotspot/cpu/sparc/globals_sparc.hpp
index 89361fcddbd..a232649f957 100644
--- a/src/hotspot/cpu/sparc/globals_sparc.hpp
+++ b/src/hotspot/cpu/sparc/globals_sparc.hpp
@@ -97,12 +97,15 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
writeable) \
\
product(intx, UseVIS, 99, \
- "Highest supported VIS instructions set on Sparc") \
+ "Highest supported VIS instructions set on SPARC") \
range(0, 99) \
\
product(bool, UseCBCond, false, \
"Use compare and branch instruction on SPARC") \
\
+ product(bool, UseMPMUL, false, \
+ "Use multi-precision multiply instruction (mpmul) on SPARC") \
+ \
product(bool, UseBlockZeroing, false, \
"Use special cpu instructions for block zeroing") \
\
diff --git a/src/hotspot/cpu/sparc/jniTypes_sparc.hpp b/src/hotspot/cpu/sparc/jniTypes_sparc.hpp
index 50b51fff2c3..bbdb064a9f1 100644
--- a/src/hotspot/cpu/sparc/jniTypes_sparc.hpp
+++ b/src/hotspot/cpu/sparc/jniTypes_sparc.hpp
@@ -25,9 +25,9 @@
#ifndef CPU_SPARC_VM_JNITYPES_SPARC_HPP
#define CPU_SPARC_VM_JNITYPES_SPARC_HPP
+#include "jni.h"
#include "memory/allocation.hpp"
#include "oops/oop.hpp"
-#include "prims/jni.h"
// This file holds platform-dependent routines used to write primitive jni
// types to the array of arguments passed into JavaCalls::call
diff --git a/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp b/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp
index b667f1b1103..70a65b07298 100644
--- a/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp
+++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.cpp
@@ -1574,29 +1574,39 @@ void MacroAssembler::br_null_short(Register s1, Predict p, Label& L) {
assert_not_delayed();
if (use_cbcond(L)) {
Assembler::cbcond(zero, ptr_cc, s1, 0, L);
- return;
+ } else {
+ br_null(s1, false, p, L);
+ delayed()->nop();
}
- br_null(s1, false, p, L);
- delayed()->nop();
}
void MacroAssembler::br_notnull_short(Register s1, Predict p, Label& L) {
assert_not_delayed();
if (use_cbcond(L)) {
Assembler::cbcond(notZero, ptr_cc, s1, 0, L);
- return;
+ } else {
+ br_notnull(s1, false, p, L);
+ delayed()->nop();
}
- br_notnull(s1, false, p, L);
- delayed()->nop();
}
// Unconditional short branch
void MacroAssembler::ba_short(Label& L) {
+ assert_not_delayed();
if (use_cbcond(L)) {
Assembler::cbcond(equal, icc, G0, G0, L);
- return;
+ } else {
+ br(always, false, pt, L);
+ delayed()->nop();
}
- br(always, false, pt, L);
+}
+
+// Branch if 'icc' says zero or not (i.e. icc.z == 1|0).
+
+void MacroAssembler::br_icc_zero(bool iszero, Predict p, Label &L) {
+ assert_not_delayed();
+ Condition cf = (iszero ? Assembler::zero : Assembler::notZero);
+ br(cf, false, p, L);
delayed()->nop();
}
@@ -3565,20 +3575,6 @@ static void generate_satb_log_enqueue(bool with_frame) {
#undef __
}
-static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) {
- if (with_frame) {
- if (satb_log_enqueue_with_frame == 0) {
- generate_satb_log_enqueue(with_frame);
- assert(satb_log_enqueue_with_frame != 0, "postcondition.");
- }
- } else {
- if (satb_log_enqueue_frameless == 0) {
- generate_satb_log_enqueue(with_frame);
- assert(satb_log_enqueue_frameless != 0, "postcondition.");
- }
- }
-}
-
void MacroAssembler::g1_write_barrier_pre(Register obj,
Register index,
int offset,
@@ -3648,13 +3644,9 @@ void MacroAssembler::g1_write_barrier_pre(Register obj,
"Or we need to think harder.");
if (pre_val->is_global() && !preserve_o_regs) {
- generate_satb_log_enqueue_if_necessary(true); // with frame
-
call(satb_log_enqueue_with_frame);
delayed()->mov(pre_val, O0);
} else {
- generate_satb_log_enqueue_if_necessary(false); // frameless
-
save_frame(0);
call(satb_log_enqueue_frameless);
delayed()->mov(pre_val->after_save(), O0);
@@ -3758,15 +3750,6 @@ static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
}
-static inline void
-generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) {
- if (dirty_card_log_enqueue == 0) {
- generate_dirty_card_log_enqueue(byte_map_base);
- assert(dirty_card_log_enqueue != 0, "postcondition.");
- }
-}
-
-
void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
Label filtered;
@@ -3796,7 +3779,6 @@ void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val
} else {
post_filter_masm->nop();
}
- generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
save_frame(0);
call(dirty_card_log_enqueue);
if (use_scr) {
@@ -3809,6 +3791,28 @@ void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val
bind(filtered);
}
+// Called from init_globals() after universe_init() and before interpreter_init()
+void g1_barrier_stubs_init() {
+ CollectedHeap* heap = Universe::heap();
+ if (heap->kind() == CollectedHeap::G1CollectedHeap) {
+ // Only needed for G1
+ if (dirty_card_log_enqueue == 0) {
+ G1SATBCardTableLoggingModRefBS* bs =
+ barrier_set_cast(heap->barrier_set());
+ generate_dirty_card_log_enqueue(bs->byte_map_base);
+ assert(dirty_card_log_enqueue != 0, "postcondition.");
+ }
+ if (satb_log_enqueue_with_frame == 0) {
+ generate_satb_log_enqueue(true);
+ assert(satb_log_enqueue_with_frame != 0, "postcondition.");
+ }
+ if (satb_log_enqueue_frameless == 0) {
+ generate_satb_log_enqueue(false);
+ assert(satb_log_enqueue_frameless != 0, "postcondition.");
+ }
+ }
+}
+
#endif // INCLUDE_ALL_GCS
///////////////////////////////////////////////////////////////////////////////////
@@ -3834,6 +3838,7 @@ void MacroAssembler::load_mirror(Register mirror, Register method) {
ld_ptr(mirror, in_bytes(ConstMethod::constants_offset()), mirror);
ld_ptr(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
ld_ptr(mirror, mirror_offset, mirror);
+ resolve_oop_handle(mirror);
}
void MacroAssembler::load_klass(Register src_oop, Register klass) {
diff --git a/src/hotspot/cpu/sparc/macroAssembler_sparc.hpp b/src/hotspot/cpu/sparc/macroAssembler_sparc.hpp
index 4f24d0354ee..db1ebf1ead8 100644
--- a/src/hotspot/cpu/sparc/macroAssembler_sparc.hpp
+++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.hpp
@@ -606,7 +606,7 @@ class MacroAssembler : public Assembler {
// offset. No explicit code generation is needed if the offset is within a certain
// range (0 <= offset <= page_size).
//
- // %%%%%% Currently not done for SPARC
+ // FIXME: Currently not done for SPARC
void null_check(Register reg, int offset = -1);
static bool needs_explicit_null_check(intptr_t offset);
@@ -648,6 +648,9 @@ class MacroAssembler : public Assembler {
// unconditional short branch
void ba_short(Label& L);
+ // Branch on icc.z (true or not).
+ void br_icc_zero(bool iszero, Predict p, Label &L);
+
inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
inline void bp( Condition c, bool a, CC cc, Predict p, Label& L );
@@ -663,19 +666,19 @@ class MacroAssembler : public Assembler {
inline void fbp( Condition c, bool a, CC cc, Predict p, Label& L );
// Sparc shorthands(pp 85, V8 manual, pp 289 V9 manual)
- inline void cmp( Register s1, Register s2 );
- inline void cmp( Register s1, int simm13a );
+ inline void cmp( Register s1, Register s2 );
+ inline void cmp( Register s1, int simm13a );
inline void jmp( Register s1, Register s2 );
inline void jmp( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() );
// Check if the call target is out of wdisp30 range (relative to the code cache)
static inline bool is_far_target(address d);
- inline void call( address d, relocInfo::relocType rt = relocInfo::runtime_call_type );
- inline void call( address d, RelocationHolder const& rspec);
+ inline void call( address d, relocInfo::relocType rt = relocInfo::runtime_call_type );
+ inline void call( address d, RelocationHolder const& rspec);
- inline void call( Label& L, relocInfo::relocType rt = relocInfo::runtime_call_type );
- inline void call( Label& L, RelocationHolder const& rspec);
+ inline void call( Label& L, relocInfo::relocType rt = relocInfo::runtime_call_type );
+ inline void call( Label& L, RelocationHolder const& rspec);
inline void callr( Register s1, Register s2 );
inline void callr( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() );
diff --git a/src/hotspot/cpu/sparc/macroAssembler_sparc.inline.hpp b/src/hotspot/cpu/sparc/macroAssembler_sparc.inline.hpp
index 679bbd30c29..16871d98629 100644
--- a/src/hotspot/cpu/sparc/macroAssembler_sparc.inline.hpp
+++ b/src/hotspot/cpu/sparc/macroAssembler_sparc.inline.hpp
@@ -185,7 +185,7 @@ inline void MacroAssembler::br( Condition c, bool a, Predict p, address d, reloc
}
inline void MacroAssembler::br( Condition c, bool a, Predict p, Label& L ) {
- // See note[+] on 'avoid_pipeline_stalls()', in "assembler_sparc.inline.hpp".
+ // See note[+] on 'avoid_pipeline_stall()', in "assembler_sparc.inline.hpp".
avoid_pipeline_stall();
br(c, a, p, target(L));
}
diff --git a/src/hotspot/cpu/sparc/register_sparc.hpp b/src/hotspot/cpu/sparc/register_sparc.hpp
index 22cb0283825..5682b622d9c 100644
--- a/src/hotspot/cpu/sparc/register_sparc.hpp
+++ b/src/hotspot/cpu/sparc/register_sparc.hpp
@@ -236,7 +236,7 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
inline VMReg as_VMReg( );
// accessors
- int encoding() const { assert(is_valid(), "invalid register"); return value(); }
+ int encoding() const { assert(is_valid(), "invalid register"); return value(); }
public:
int encoding(Width w) const {
@@ -258,10 +258,12 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
return -1;
}
- bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
+ bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
+ bool is_even() const { return (encoding() & 1) == 0; }
+
const char* name() const;
- FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
+ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
};
diff --git a/src/hotspot/cpu/sparc/sharedRuntime_sparc.cpp b/src/hotspot/cpu/sparc/sharedRuntime_sparc.cpp
index 4f217e33ac6..357be521a6b 100644
--- a/src/hotspot/cpu/sparc/sharedRuntime_sparc.cpp
+++ b/src/hotspot/cpu/sparc/sharedRuntime_sparc.cpp
@@ -41,10 +41,6 @@
#ifdef COMPILER2
#include "opto/runtime.hpp"
#endif
-#ifdef SHARK
-#include "compiler/compileBroker.hpp"
-#include "shark/sharkCompiler.hpp"
-#endif
#if INCLUDE_JVMCI
#include "jvmci/jvmciJavaClasses.hpp"
#endif
diff --git a/src/hotspot/cpu/sparc/sparc.ad b/src/hotspot/cpu/sparc/sparc.ad
index 07f62bac5aa..c582cd5a6a8 100644
--- a/src/hotspot/cpu/sparc/sparc.ad
+++ b/src/hotspot/cpu/sparc/sparc.ad
@@ -2628,7 +2628,6 @@ enc_class fsqrtd (dflt_reg dst, dflt_reg src) %{
%}
-
enc_class fmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
MacroAssembler _masm(&cbuf);
@@ -2651,7 +2650,71 @@ enc_class fmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
__ fmadd(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
%}
+enc_class fmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
+ MacroAssembler _masm(&cbuf);
+ FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
+ FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
+ FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
+ FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
+
+ __ fmsub(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
+%}
+
+enc_class fmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
+ MacroAssembler _masm(&cbuf);
+
+ FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
+ FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
+ FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
+ FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
+
+ __ fmsub(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
+%}
+
+enc_class fnmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
+ MacroAssembler _masm(&cbuf);
+
+ FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
+ FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
+ FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
+ FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
+
+ __ fnmadd(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
+%}
+
+enc_class fnmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
+ MacroAssembler _masm(&cbuf);
+
+ FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
+ FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
+ FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
+ FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
+
+ __ fnmadd(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
+%}
+
+enc_class fnmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
+ MacroAssembler _masm(&cbuf);
+
+ FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
+ FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
+ FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
+ FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
+
+ __ fnmsub(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
+%}
+
+enc_class fnmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
+ MacroAssembler _masm(&cbuf);
+
+ FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
+ FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
+ FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
+ FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
+
+ __ fnmsub(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
+%}
enc_class fmovs (dflt_reg dst, dflt_reg src) %{
@@ -7597,7 +7660,7 @@ instruct sqrtD_reg_reg(regD dst, regD src) %{
ins_pipe(fdivD_reg_reg);
%}
-// Single precision fused floating-point multiply-add (d = a * b + c).
+// Single/Double precision fused floating-point multiply-add (d = a * b + c).
instruct fmaF_regx4(regF dst, regF a, regF b, regF c) %{
predicate(UseFMA);
match(Set dst (FmaF c (Binary a b)));
@@ -7606,7 +7669,6 @@ instruct fmaF_regx4(regF dst, regF a, regF b, regF c) %{
ins_pipe(fmaF_regx4);
%}
-// Double precision fused floating-point multiply-add (d = a * b + c).
instruct fmaD_regx4(regD dst, regD a, regD b, regD c) %{
predicate(UseFMA);
match(Set dst (FmaD c (Binary a b)));
@@ -7615,6 +7677,66 @@ instruct fmaD_regx4(regD dst, regD a, regD b, regD c) %{
ins_pipe(fmaD_regx4);
%}
+// Additional patterns matching complement versions that we can map directly to
+// variants of the fused multiply-add instructions.
+
+// Single/Double precision fused floating-point multiply-sub (d = a * b - c)
+instruct fmsubF_regx4(regF dst, regF a, regF b, regF c) %{
+ predicate(UseFMA);
+ match(Set dst (FmaF (NegF c) (Binary a b)));
+ format %{ "fmsubs $a,$b,$c,$dst\t# $dst = $a * $b - $c" %}
+ ins_encode(fmsubs(dst, a, b, c));
+ ins_pipe(fmaF_regx4);
+%}
+
+instruct fmsubD_regx4(regD dst, regD a, regD b, regD c) %{
+ predicate(UseFMA);
+ match(Set dst (FmaD (NegD c) (Binary a b)));
+ format %{ "fmsubd $a,$b,$c,$dst\t# $dst = $a * $b - $c" %}
+ ins_encode(fmsubd(dst, a, b, c));
+ ins_pipe(fmaD_regx4);
+%}
+
+// Single/Double precision fused floating-point neg. multiply-add,
+// d = -1 * a * b - c = -(a * b + c)
+instruct fnmaddF_regx4(regF dst, regF a, regF b, regF c) %{
+ predicate(UseFMA);
+ match(Set dst (FmaF (NegF c) (Binary (NegF a) b)));
+ match(Set dst (FmaF (NegF c) (Binary a (NegF b))));
+ format %{ "fnmadds $a,$b,$c,$dst\t# $dst = -($a * $b + $c)" %}
+ ins_encode(fnmadds(dst, a, b, c));
+ ins_pipe(fmaF_regx4);
+%}
+
+instruct fnmaddD_regx4(regD dst, regD a, regD b, regD c) %{
+ predicate(UseFMA);
+ match(Set dst (FmaD (NegD c) (Binary (NegD a) b)));
+ match(Set dst (FmaD (NegD c) (Binary a (NegD b))));
+ format %{ "fnmaddd $a,$b,$c,$dst\t# $dst = -($a * $b + $c)" %}
+ ins_encode(fnmaddd(dst, a, b, c));
+ ins_pipe(fmaD_regx4);
+%}
+
+// Single/Double precision fused floating-point neg. multiply-sub,
+// d = -1 * a * b + c = -(a * b - c)
+instruct fnmsubF_regx4(regF dst, regF a, regF b, regF c) %{
+ predicate(UseFMA);
+ match(Set dst (FmaF c (Binary (NegF a) b)));
+ match(Set dst (FmaF c (Binary a (NegF b))));
+ format %{ "fnmsubs $a,$b,$c,$dst\t# $dst = -($a * $b - $c)" %}
+ ins_encode(fnmsubs(dst, a, b, c));
+ ins_pipe(fmaF_regx4);
+%}
+
+instruct fnmsubD_regx4(regD dst, regD a, regD b, regD c) %{
+ predicate(UseFMA);
+ match(Set dst (FmaD c (Binary (NegD a) b)));
+ match(Set dst (FmaD c (Binary a (NegD b))));
+ format %{ "fnmsubd $a,$b,$c,$dst\t# $dst = -($a * $b - $c)" %}
+ ins_encode(fnmsubd(dst, a, b, c));
+ ins_pipe(fmaD_regx4);
+%}
+
//----------Logical Instructions-----------------------------------------------
// And Instructions
// Register And
diff --git a/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp b/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp
index 9c4713e936d..351555dbe51 100644
--- a/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp
+++ b/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp
@@ -58,7 +58,6 @@
// Note: The register L7 is used as L7_thread_cache, and may not be used
// any other way within this module.
-
static const Register& Lstub_temp = L2;
// -------------------------------------------------------------------------------------------------------------------------
@@ -4943,7 +4942,7 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
-/**
+ /**
* Arguments:
*
* Inputs:
@@ -4975,6 +4974,773 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ /**
+ * Arguments:
+ *
+ * Inputs:
+ * I0 - int* x-addr
+ * I1 - int x-len
+ * I2 - int* y-addr
+ * I3 - int y-len
+ * I4 - int* z-addr (output vector)
+ * I5 - int z-len
+ */
+ address generate_multiplyToLen() {
+ assert(UseMultiplyToLenIntrinsic, "need VIS3 instructions");
+
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
+ address start = __ pc();
+
+ __ save_frame(0);
+
+ const Register xptr = I0; // input address
+ const Register xlen = I1; // ...and length in 32b-words
+ const Register yptr = I2; //
+ const Register ylen = I3; //
+ const Register zptr = I4; // output address
+ const Register zlen = I5; // ...and length in 32b-words
+
+ /* The minimal "limb" representation suggest that odd length vectors are as
+ * likely as even length dittos. This in turn suggests that we need to cope
+ * with odd/even length arrays and data not aligned properly for 64-bit read
+ * and write operations. We thus use a number of different kernels:
+ *
+ * if (is_even(x.len) && is_even(y.len))
+ * if (is_align64(x) && is_align64(y) && is_align64(z))
+ * if (x.len == y.len && 16 <= x.len && x.len <= 64)
+ * memv_mult_mpmul(...)
+ * else
+ * memv_mult_64x64(...)
+ * else
+ * memv_mult_64x64u(...)
+ * else
+ * memv_mult_32x32(...)
+ *
+ * Here we assume VIS3 support (for 'umulxhi', 'addxc' and 'addxccc').
+ * In case CBCOND instructions are supported, we will use 'cxbX'. If the
+ * MPMUL instruction is supported, we will generate a kernel using 'mpmul'
+ * (for vectors with proper characteristics).
+ */
+ const Register tmp0 = L0;
+ const Register tmp1 = L1;
+
+ Label L_mult_32x32;
+ Label L_mult_64x64u;
+ Label L_mult_64x64;
+ Label L_exit;
+
+ if_both_even(xlen, ylen, tmp0, false, L_mult_32x32);
+ if_all3_aligned(xptr, yptr, zptr, tmp1, 64, false, L_mult_64x64u);
+
+ if (UseMPMUL) {
+ if_eq(xlen, ylen, false, L_mult_64x64);
+ if_in_rng(xlen, 16, 64, tmp0, tmp1, false, L_mult_64x64);
+
+ // 1. Multiply naturally aligned 64b-datums using a generic 'mpmul' kernel,
+ // operating on equal length vectors of size [16..64].
+ gen_mult_mpmul(xlen, xptr, yptr, zptr, L_exit);
+ }
+
+ // 2. Multiply naturally aligned 64-bit datums (64x64).
+ __ bind(L_mult_64x64);
+ gen_mult_64x64(xptr, xlen, yptr, ylen, zptr, zlen, L_exit);
+
+ // 3. Multiply unaligned 64-bit datums (64x64).
+ __ bind(L_mult_64x64u);
+ gen_mult_64x64_unaligned(xptr, xlen, yptr, ylen, zptr, zlen, L_exit);
+
+ // 4. Multiply naturally aligned 32-bit datums (32x32).
+ __ bind(L_mult_32x32);
+ gen_mult_32x32(xptr, xlen, yptr, ylen, zptr, zlen, L_exit);
+
+ __ bind(L_exit);
+ __ ret();
+ __ delayed()->restore();
+
+ return start;
+ }
+
+ // Additional help functions used by multiplyToLen generation.
+
+ void if_both_even(Register r1, Register r2, Register tmp, bool iseven, Label &L)
+ {
+ __ or3(r1, r2, tmp);
+ __ andcc(tmp, 0x1, tmp);
+ __ br_icc_zero(iseven, Assembler::pn, L);
+ }
+
+ void if_all3_aligned(Register r1, Register r2, Register r3,
+ Register tmp, uint align, bool isalign, Label &L)
+ {
+ __ or3(r1, r2, tmp);
+ __ or3(r3, tmp, tmp);
+ __ andcc(tmp, (align - 1), tmp);
+ __ br_icc_zero(isalign, Assembler::pn, L);
+ }
+
+ void if_eq(Register x, Register y, bool iseq, Label &L)
+ {
+ Assembler::Condition cf = (iseq ? Assembler::equal : Assembler::notEqual);
+ __ cmp_and_br_short(x, y, cf, Assembler::pt, L);
+ }
+
+ void if_in_rng(Register x, int lb, int ub, Register t1, Register t2, bool inrng, Label &L)
+ {
+ assert(Assembler::is_simm13(lb), "Small ints only!");
+ assert(Assembler::is_simm13(ub), "Small ints only!");
+ // Compute (x - lb) * (ub - x) >= 0
+ // NOTE: With the local use of this routine, we rely on small integers to
+ // guarantee that we do not overflow in the multiplication.
+ __ add(G0, ub, t2);
+ __ sub(x, lb, t1);
+ __ sub(t2, x, t2);
+ __ mulx(t1, t2, t1);
+ Assembler::Condition cf = (inrng ? Assembler::greaterEqual : Assembler::less);
+ __ cmp_and_br_short(t1, G0, cf, Assembler::pt, L);
+ }
+
+ void ldd_entry(Register base, Register offs, FloatRegister dest)
+ {
+ __ ldd(base, offs, dest);
+ __ inc(offs, 8);
+ }
+
+ void ldx_entry(Register base, Register offs, Register dest)
+ {
+ __ ldx(base, offs, dest);
+ __ inc(offs, 8);
+ }
+
+ void mpmul_entry(int m, Label &next)
+ {
+ __ mpmul(m);
+ __ cbcond(Assembler::equal, Assembler::icc, G0, G0, next);
+ }
+
+ void stx_entry(Label &L, Register r1, Register r2, Register base, Register offs)
+ {
+ __ bind(L);
+ __ stx(r1, base, offs);
+ __ inc(offs, 8);
+ __ stx(r2, base, offs);
+ __ inc(offs, 8);
+ }
+
+ void offs_entry(Label &Lbl0, Label &Lbl1)
+ {
+ assert(Lbl0.is_bound(), "must be");
+ assert(Lbl1.is_bound(), "must be");
+
+ int offset = Lbl0.loc_pos() - Lbl1.loc_pos();
+
+ __ emit_data(offset);
+ }
+
+ /* Generate the actual multiplication kernels for BigInteger vectors:
+ *
+ * 1. gen_mult_mpmul(...)
+ *
+ * 2. gen_mult_64x64(...)
+ *
+ * 3. gen_mult_64x64_unaligned(...)
+ *
+ * 4. gen_mult_32x32(...)
+ */
+ void gen_mult_mpmul(Register len, Register xptr, Register yptr, Register zptr,
+ Label &L_exit)
+ {
+ const Register zero = G0;
+ const Register gxp = G1; // Need to use global registers across RWs.
+ const Register gyp = G2;
+ const Register gzp = G3;
+ const Register offs = G4;
+ const Register disp = G5;
+
+ __ mov(xptr, gxp);
+ __ mov(yptr, gyp);
+ __ mov(zptr, gzp);
+
+ /* Compute jump vector entry:
+ *
+ * 1. mpmul input size (0..31) x 64b
+ * 2. vector input size in 32b limbs (even number)
+ * 3. branch entries in reverse order (31..0), using two
+ * instructions per entry (2 * 4 bytes).
+ *
+ * displacement = byte_offset(bra_offset(len))
+ * = byte_offset((64 - len)/2)
+ * = 8 * (64 - len)/2
+ * = 4 * (64 - len)
+ */
+ Register temp = I5; // Alright to use input regs. in first batch.
+
+ __ sub(zero, len, temp);
+ __ add(temp, 64, temp);
+ __ sllx(temp, 2, disp); // disp := (64 - len) << 2
+
+ // Dispatch relative current PC, into instruction table below.
+ __ rdpc(temp);
+ __ add(temp, 16, temp);
+ __ jmp(temp, disp);
+ __ delayed()->clr(offs);
+
+ ldd_entry(gxp, offs, F22);
+ ldd_entry(gxp, offs, F20);
+ ldd_entry(gxp, offs, F18);
+ ldd_entry(gxp, offs, F16);
+ ldd_entry(gxp, offs, F14);
+ ldd_entry(gxp, offs, F12);
+ ldd_entry(gxp, offs, F10);
+ ldd_entry(gxp, offs, F8);
+ ldd_entry(gxp, offs, F6);
+ ldd_entry(gxp, offs, F4);
+ ldx_entry(gxp, offs, I5);
+ ldx_entry(gxp, offs, I4);
+ ldx_entry(gxp, offs, I3);
+ ldx_entry(gxp, offs, I2);
+ ldx_entry(gxp, offs, I1);
+ ldx_entry(gxp, offs, I0);
+ ldx_entry(gxp, offs, L7);
+ ldx_entry(gxp, offs, L6);
+ ldx_entry(gxp, offs, L5);
+ ldx_entry(gxp, offs, L4);
+ ldx_entry(gxp, offs, L3);
+ ldx_entry(gxp, offs, L2);
+ ldx_entry(gxp, offs, L1);
+ ldx_entry(gxp, offs, L0);
+ ldd_entry(gxp, offs, F2);
+ ldd_entry(gxp, offs, F0);
+ ldx_entry(gxp, offs, O5);
+ ldx_entry(gxp, offs, O4);
+ ldx_entry(gxp, offs, O3);
+ ldx_entry(gxp, offs, O2);
+ ldx_entry(gxp, offs, O1);
+ ldx_entry(gxp, offs, O0);
+
+ __ save(SP, -176, SP);
+
+ const Register addr = gxp; // Alright to reuse 'gxp'.
+
+ // Dispatch relative current PC, into instruction table below.
+ __ rdpc(addr);
+ __ add(addr, 16, addr);
+ __ jmp(addr, disp);
+ __ delayed()->clr(offs);
+
+ ldd_entry(gyp, offs, F58);
+ ldd_entry(gyp, offs, F56);
+ ldd_entry(gyp, offs, F54);
+ ldd_entry(gyp, offs, F52);
+ ldd_entry(gyp, offs, F50);
+ ldd_entry(gyp, offs, F48);
+ ldd_entry(gyp, offs, F46);
+ ldd_entry(gyp, offs, F44);
+ ldd_entry(gyp, offs, F42);
+ ldd_entry(gyp, offs, F40);
+ ldd_entry(gyp, offs, F38);
+ ldd_entry(gyp, offs, F36);
+ ldd_entry(gyp, offs, F34);
+ ldd_entry(gyp, offs, F32);
+ ldd_entry(gyp, offs, F30);
+ ldd_entry(gyp, offs, F28);
+ ldd_entry(gyp, offs, F26);
+ ldd_entry(gyp, offs, F24);
+ ldx_entry(gyp, offs, O5);
+ ldx_entry(gyp, offs, O4);
+ ldx_entry(gyp, offs, O3);
+ ldx_entry(gyp, offs, O2);
+ ldx_entry(gyp, offs, O1);
+ ldx_entry(gyp, offs, O0);
+ ldx_entry(gyp, offs, L7);
+ ldx_entry(gyp, offs, L6);
+ ldx_entry(gyp, offs, L5);
+ ldx_entry(gyp, offs, L4);
+ ldx_entry(gyp, offs, L3);
+ ldx_entry(gyp, offs, L2);
+ ldx_entry(gyp, offs, L1);
+ ldx_entry(gyp, offs, L0);
+
+ __ save(SP, -176, SP);
+ __ save(SP, -176, SP);
+ __ save(SP, -176, SP);
+ __ save(SP, -176, SP);
+ __ save(SP, -176, SP);
+
+ Label L_mpmul_restore_4, L_mpmul_restore_3, L_mpmul_restore_2;
+ Label L_mpmul_restore_1, L_mpmul_restore_0;
+
+ // Dispatch relative current PC, into instruction table below.
+ __ rdpc(addr);
+ __ add(addr, 16, addr);
+ __ jmp(addr, disp);
+ __ delayed()->clr(offs);
+
+ mpmul_entry(31, L_mpmul_restore_0);
+ mpmul_entry(30, L_mpmul_restore_0);
+ mpmul_entry(29, L_mpmul_restore_0);
+ mpmul_entry(28, L_mpmul_restore_0);
+ mpmul_entry(27, L_mpmul_restore_1);
+ mpmul_entry(26, L_mpmul_restore_1);
+ mpmul_entry(25, L_mpmul_restore_1);
+ mpmul_entry(24, L_mpmul_restore_1);
+ mpmul_entry(23, L_mpmul_restore_1);
+ mpmul_entry(22, L_mpmul_restore_1);
+ mpmul_entry(21, L_mpmul_restore_1);
+ mpmul_entry(20, L_mpmul_restore_2);
+ mpmul_entry(19, L_mpmul_restore_2);
+ mpmul_entry(18, L_mpmul_restore_2);
+ mpmul_entry(17, L_mpmul_restore_2);
+ mpmul_entry(16, L_mpmul_restore_2);
+ mpmul_entry(15, L_mpmul_restore_2);
+ mpmul_entry(14, L_mpmul_restore_2);
+ mpmul_entry(13, L_mpmul_restore_3);
+ mpmul_entry(12, L_mpmul_restore_3);
+ mpmul_entry(11, L_mpmul_restore_3);
+ mpmul_entry(10, L_mpmul_restore_3);
+ mpmul_entry( 9, L_mpmul_restore_3);
+ mpmul_entry( 8, L_mpmul_restore_3);
+ mpmul_entry( 7, L_mpmul_restore_3);
+ mpmul_entry( 6, L_mpmul_restore_4);
+ mpmul_entry( 5, L_mpmul_restore_4);
+ mpmul_entry( 4, L_mpmul_restore_4);
+ mpmul_entry( 3, L_mpmul_restore_4);
+ mpmul_entry( 2, L_mpmul_restore_4);
+ mpmul_entry( 1, L_mpmul_restore_4);
+ mpmul_entry( 0, L_mpmul_restore_4);
+
+ Label L_z31, L_z30, L_z29, L_z28, L_z27, L_z26, L_z25, L_z24;
+ Label L_z23, L_z22, L_z21, L_z20, L_z19, L_z18, L_z17, L_z16;
+ Label L_z15, L_z14, L_z13, L_z12, L_z11, L_z10, L_z09, L_z08;
+ Label L_z07, L_z06, L_z05, L_z04, L_z03, L_z02, L_z01, L_z00;
+
+ Label L_zst_base; // Store sequence base address.
+ __ bind(L_zst_base);
+
+ stx_entry(L_z31, L7, L6, gzp, offs);
+ stx_entry(L_z30, L5, L4, gzp, offs);
+ stx_entry(L_z29, L3, L2, gzp, offs);
+ stx_entry(L_z28, L1, L0, gzp, offs);
+ __ restore();
+ stx_entry(L_z27, O5, O4, gzp, offs);
+ stx_entry(L_z26, O3, O2, gzp, offs);
+ stx_entry(L_z25, O1, O0, gzp, offs);
+ stx_entry(L_z24, L7, L6, gzp, offs);
+ stx_entry(L_z23, L5, L4, gzp, offs);
+ stx_entry(L_z22, L3, L2, gzp, offs);
+ stx_entry(L_z21, L1, L0, gzp, offs);
+ __ restore();
+ stx_entry(L_z20, O5, O4, gzp, offs);
+ stx_entry(L_z19, O3, O2, gzp, offs);
+ stx_entry(L_z18, O1, O0, gzp, offs);
+ stx_entry(L_z17, L7, L6, gzp, offs);
+ stx_entry(L_z16, L5, L4, gzp, offs);
+ stx_entry(L_z15, L3, L2, gzp, offs);
+ stx_entry(L_z14, L1, L0, gzp, offs);
+ __ restore();
+ stx_entry(L_z13, O5, O4, gzp, offs);
+ stx_entry(L_z12, O3, O2, gzp, offs);
+ stx_entry(L_z11, O1, O0, gzp, offs);
+ stx_entry(L_z10, L7, L6, gzp, offs);
+ stx_entry(L_z09, L5, L4, gzp, offs);
+ stx_entry(L_z08, L3, L2, gzp, offs);
+ stx_entry(L_z07, L1, L0, gzp, offs);
+ __ restore();
+ stx_entry(L_z06, O5, O4, gzp, offs);
+ stx_entry(L_z05, O3, O2, gzp, offs);
+ stx_entry(L_z04, O1, O0, gzp, offs);
+ stx_entry(L_z03, L7, L6, gzp, offs);
+ stx_entry(L_z02, L5, L4, gzp, offs);
+ stx_entry(L_z01, L3, L2, gzp, offs);
+ stx_entry(L_z00, L1, L0, gzp, offs);
+
+ __ restore();
+ __ restore();
+ // Exit out of 'mpmul' routine, back to multiplyToLen.
+ __ ba_short(L_exit);
+
+ Label L_zst_offs;
+ __ bind(L_zst_offs);
+
+ offs_entry(L_z31, L_zst_base); // index 31: 2048x2048
+ offs_entry(L_z30, L_zst_base);
+ offs_entry(L_z29, L_zst_base);
+ offs_entry(L_z28, L_zst_base);
+ offs_entry(L_z27, L_zst_base);
+ offs_entry(L_z26, L_zst_base);
+ offs_entry(L_z25, L_zst_base);
+ offs_entry(L_z24, L_zst_base);
+ offs_entry(L_z23, L_zst_base);
+ offs_entry(L_z22, L_zst_base);
+ offs_entry(L_z21, L_zst_base);
+ offs_entry(L_z20, L_zst_base);
+ offs_entry(L_z19, L_zst_base);
+ offs_entry(L_z18, L_zst_base);
+ offs_entry(L_z17, L_zst_base);
+ offs_entry(L_z16, L_zst_base);
+ offs_entry(L_z15, L_zst_base);
+ offs_entry(L_z14, L_zst_base);
+ offs_entry(L_z13, L_zst_base);
+ offs_entry(L_z12, L_zst_base);
+ offs_entry(L_z11, L_zst_base);
+ offs_entry(L_z10, L_zst_base);
+ offs_entry(L_z09, L_zst_base);
+ offs_entry(L_z08, L_zst_base);
+ offs_entry(L_z07, L_zst_base);
+ offs_entry(L_z06, L_zst_base);
+ offs_entry(L_z05, L_zst_base);
+ offs_entry(L_z04, L_zst_base);
+ offs_entry(L_z03, L_zst_base);
+ offs_entry(L_z02, L_zst_base);
+ offs_entry(L_z01, L_zst_base);
+ offs_entry(L_z00, L_zst_base); // index 0: 64x64
+
+ __ bind(L_mpmul_restore_4);
+ __ restore();
+ __ bind(L_mpmul_restore_3);
+ __ restore();
+ __ bind(L_mpmul_restore_2);
+ __ restore();
+ __ bind(L_mpmul_restore_1);
+ __ restore();
+ __ bind(L_mpmul_restore_0);
+
+ // Dispatch via offset vector entry, into z-store sequence.
+ Label L_zst_rdpc;
+ __ bind(L_zst_rdpc);
+
+ assert(L_zst_base.is_bound(), "must be");
+ assert(L_zst_offs.is_bound(), "must be");
+ assert(L_zst_rdpc.is_bound(), "must be");
+
+ int dbase = L_zst_rdpc.loc_pos() - L_zst_base.loc_pos();
+ int doffs = L_zst_rdpc.loc_pos() - L_zst_offs.loc_pos();
+
+ temp = gyp; // Alright to reuse 'gyp'.
+
+ __ rdpc(addr);
+ __ sub(addr, doffs, temp);
+ __ srlx(disp, 1, disp);
+ __ lduw(temp, disp, offs);
+ __ sub(addr, dbase, temp);
+ __ jmp(temp, offs);
+ __ delayed()->clr(offs);
+ }
+
+ void gen_mult_64x64(Register xp, Register xn,
+ Register yp, Register yn,
+ Register zp, Register zn, Label &L_exit)
+ {
+ // Assuming that a stack frame has already been created, i.e. local and
+ // output registers are available for immediate use.
+
+ const Register ri = L0; // Outer loop index, xv[i]
+ const Register rj = L1; // Inner loop index, yv[j]
+ const Register rk = L2; // Output loop index, zv[k]
+ const Register rx = L4; // x-vector datum [i]
+ const Register ry = L5; // y-vector datum [j]
+ const Register rz = L6; // z-vector datum [k]
+ const Register rc = L7; // carry over (to z-vector datum [k-1])
+
+ const Register lop = O0; // lo-64b product
+ const Register hip = O1; // hi-64b product
+
+ const Register zero = G0;
+
+ Label L_loop_i, L_exit_loop_i;
+ Label L_loop_j;
+ Label L_loop_i2, L_exit_loop_i2;
+
+ __ srlx(xn, 1, xn); // index for u32 to u64 ditto
+ __ srlx(yn, 1, yn); // index for u32 to u64 ditto
+ __ srlx(zn, 1, zn); // index for u32 to u64 ditto
+ __ dec(xn); // Adjust [0..(N/2)-1]
+ __ dec(yn);
+ __ dec(zn);
+ __ clr(rc); // u64 c = 0
+ __ sllx(xn, 3, ri); // int i = xn (byte offset i = 8*xn)
+ __ sllx(yn, 3, rj); // int j = yn (byte offset i = 8*xn)
+ __ sllx(zn, 3, rk); // int k = zn (byte offset k = 8*zn)
+ __ ldx(yp, rj, ry); // u64 y = yp[yn]
+
+ // for (int i = xn; i >= 0; i--)
+ __ bind(L_loop_i);
+
+ __ cmp_and_br_short(ri, 0, // i >= 0
+ Assembler::less, Assembler::pn, L_exit_loop_i);
+ __ ldx(xp, ri, rx); // x = xp[i]
+ __ mulx(rx, ry, lop); // lo-64b-part of result 64x64
+ __ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64
+ __ addcc(rc, lop, lop); // Accumulate lower order bits (producing carry)
+ __ addxc(hip, zero, rc); // carry over to next datum [k-1]
+ __ stx(lop, zp, rk); // z[k] = lop
+ __ dec(rk, 8); // k--
+ __ dec(ri, 8); // i--
+ __ ba_short(L_loop_i);
+
+ __ bind(L_exit_loop_i);
+ __ stx(rc, zp, rk); // z[k] = c
+
+ // for (int j = yn - 1; j >= 0; j--)
+ __ sllx(yn, 3, rj); // int j = yn - 1 (byte offset j = 8*yn)
+ __ dec(rj, 8);
+
+ __ bind(L_loop_j);
+
+ __ cmp_and_br_short(rj, 0, // j >= 0
+ Assembler::less, Assembler::pn, L_exit);
+ __ clr(rc); // u64 c = 0
+ __ ldx(yp, rj, ry); // u64 y = yp[j]
+
+ // for (int i = xn, k = --zn; i >= 0; i--)
+ __ dec(zn); // --zn
+ __ sllx(xn, 3, ri); // int i = xn (byte offset i = 8*xn)
+ __ sllx(zn, 3, rk); // int k = zn (byte offset k = 8*zn)
+
+ __ bind(L_loop_i2);
+
+ __ cmp_and_br_short(ri, 0, // i >= 0
+ Assembler::less, Assembler::pn, L_exit_loop_i2);
+ __ ldx(xp, ri, rx); // x = xp[i]
+ __ ldx(zp, rk, rz); // z = zp[k], accumulator
+ __ mulx(rx, ry, lop); // lo-64b-part of result 64x64
+ __ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64
+ __ addcc(rz, rc, rz); // Accumulate lower order bits,
+ __ addxc(hip, zero, rc); // Accumulate higher order bits to carry
+ __ addcc(rz, lop, rz); // z += lo(p) + c
+ __ addxc(rc, zero, rc);
+ __ stx(rz, zp, rk); // zp[k] = z
+ __ dec(rk, 8); // k--
+ __ dec(ri, 8); // i--
+ __ ba_short(L_loop_i2);
+
+ __ bind(L_exit_loop_i2);
+ __ stx(rc, zp, rk); // z[k] = c
+ __ dec(rj, 8); // j--
+ __ ba_short(L_loop_j);
+ }
+
+ void gen_mult_64x64_unaligned(Register xp, Register xn,
+ Register yp, Register yn,
+ Register zp, Register zn, Label &L_exit)
+ {
+ // Assuming that a stack frame has already been created, i.e. local and
+ // output registers are available for use.
+
+ const Register xpc = L0; // Outer loop cursor, xp[i]
+ const Register ypc = L1; // Inner loop cursor, yp[j]
+ const Register zpc = L2; // Output loop cursor, zp[k]
+ const Register rx = L4; // x-vector datum [i]
+ const Register ry = L5; // y-vector datum [j]
+ const Register rz = L6; // z-vector datum [k]
+ const Register rc = L7; // carry over (to z-vector datum [k-1])
+ const Register rt = O2;
+
+ const Register lop = O0; // lo-64b product
+ const Register hip = O1; // hi-64b product
+
+ const Register zero = G0;
+
+ Label L_loop_i, L_exit_loop_i;
+ Label L_loop_j;
+ Label L_loop_i2, L_exit_loop_i2;
+
+ __ srlx(xn, 1, xn); // index for u32 to u64 ditto
+ __ srlx(yn, 1, yn); // index for u32 to u64 ditto
+ __ srlx(zn, 1, zn); // index for u32 to u64 ditto
+ __ dec(xn); // Adjust [0..(N/2)-1]
+ __ dec(yn);
+ __ dec(zn);
+ __ clr(rc); // u64 c = 0
+ __ sllx(xn, 3, xpc); // u32* xpc = &xp[xn] (byte offset 8*xn)
+ __ add(xp, xpc, xpc);
+ __ sllx(yn, 3, ypc); // u32* ypc = &yp[yn] (byte offset 8*yn)
+ __ add(yp, ypc, ypc);
+ __ sllx(zn, 3, zpc); // u32* zpc = &zp[zn] (byte offset 8*zn)
+ __ add(zp, zpc, zpc);
+ __ lduw(ypc, 0, rt); // u64 y = yp[yn]
+ __ lduw(ypc, 4, ry); // ...
+ __ sllx(rt, 32, rt);
+ __ or3(rt, ry, ry);
+
+ // for (int i = xn; i >= 0; i--)
+ __ bind(L_loop_i);
+
+ __ cmp_and_br_short(xpc, xp,// i >= 0
+ Assembler::less, Assembler::pn, L_exit_loop_i);
+ __ lduw(xpc, 0, rt); // u64 x = xp[i]
+ __ lduw(xpc, 4, rx); // ...
+ __ sllx(rt, 32, rt);
+ __ or3(rt, rx, rx);
+ __ mulx(rx, ry, lop); // lo-64b-part of result 64x64
+ __ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64
+ __ addcc(rc, lop, lop); // Accumulate lower order bits (producing carry)
+ __ addxc(hip, zero, rc); // carry over to next datum [k-1]
+ __ srlx(lop, 32, rt);
+ __ stw(rt, zpc, 0); // z[k] = lop
+ __ stw(lop, zpc, 4); // ...
+ __ dec(zpc, 8); // k-- (zpc--)
+ __ dec(xpc, 8); // i-- (xpc--)
+ __ ba_short(L_loop_i);
+
+ __ bind(L_exit_loop_i);
+ __ srlx(rc, 32, rt);
+ __ stw(rt, zpc, 0); // z[k] = c
+ __ stw(rc, zpc, 4);
+
+ // for (int j = yn - 1; j >= 0; j--)
+ __ sllx(yn, 3, ypc); // u32* ypc = &yp[yn] (byte offset 8*yn)
+ __ add(yp, ypc, ypc);
+ __ dec(ypc, 8); // yn - 1 (ypc--)
+
+ __ bind(L_loop_j);
+
+ __ cmp_and_br_short(ypc, yp,// j >= 0
+ Assembler::less, Assembler::pn, L_exit);
+ __ clr(rc); // u64 c = 0
+ __ lduw(ypc, 0, rt); // u64 y = yp[j] (= *ypc)
+ __ lduw(ypc, 4, ry); // ...
+ __ sllx(rt, 32, rt);
+ __ or3(rt, ry, ry);
+
+ // for (int i = xn, k = --zn; i >= 0; i--)
+ __ sllx(xn, 3, xpc); // u32* xpc = &xp[xn] (byte offset 8*xn)
+ __ add(xp, xpc, xpc);
+ __ dec(zn); // --zn
+ __ sllx(zn, 3, zpc); // u32* zpc = &zp[zn] (byte offset 8*zn)
+ __ add(zp, zpc, zpc);
+
+ __ bind(L_loop_i2);
+
+ __ cmp_and_br_short(xpc, xp,// i >= 0
+ Assembler::less, Assembler::pn, L_exit_loop_i2);
+ __ lduw(xpc, 0, rt); // u64 x = xp[i] (= *xpc)
+ __ lduw(xpc, 4, rx); // ...
+ __ sllx(rt, 32, rt);
+ __ or3(rt, rx, rx);
+
+ __ lduw(zpc, 0, rt); // u64 z = zp[k] (= *zpc)
+ __ lduw(zpc, 4, rz); // ...
+ __ sllx(rt, 32, rt);
+ __ or3(rt, rz, rz);
+
+ __ mulx(rx, ry, lop); // lo-64b-part of result 64x64
+ __ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64
+ __ addcc(rz, rc, rz); // Accumulate lower order bits...
+ __ addxc(hip, zero, rc); // Accumulate higher order bits to carry
+ __ addcc(rz, lop, rz); // ... z += lo(p) + c
+ __ addxccc(rc, zero, rc);
+ __ srlx(rz, 32, rt);
+ __ stw(rt, zpc, 0); // zp[k] = z (*zpc = z)
+ __ stw(rz, zpc, 4);
+ __ dec(zpc, 8); // k-- (zpc--)
+ __ dec(xpc, 8); // i-- (xpc--)
+ __ ba_short(L_loop_i2);
+
+ __ bind(L_exit_loop_i2);
+ __ srlx(rc, 32, rt);
+ __ stw(rt, zpc, 0); // z[k] = c
+ __ stw(rc, zpc, 4);
+ __ dec(ypc, 8); // j-- (ypc--)
+ __ ba_short(L_loop_j);
+ }
+
+ void gen_mult_32x32(Register xp, Register xn,
+ Register yp, Register yn,
+ Register zp, Register zn, Label &L_exit)
+ {
+ // Assuming that a stack frame has already been created, i.e. local and
+ // output registers are available for use.
+
+ const Register ri = L0; // Outer loop index, xv[i]
+ const Register rj = L1; // Inner loop index, yv[j]
+ const Register rk = L2; // Output loop index, zv[k]
+ const Register rx = L4; // x-vector datum [i]
+ const Register ry = L5; // y-vector datum [j]
+ const Register rz = L6; // z-vector datum [k]
+ const Register rc = L7; // carry over (to z-vector datum [k-1])
+
+ const Register p64 = O0; // 64b product
+ const Register z65 = O1; // carry+64b accumulator
+ const Register c65 = O2; // carry at bit 65
+ const Register c33 = O2; // carry at bit 33 (after shift)
+
+ const Register zero = G0;
+
+ Label L_loop_i, L_exit_loop_i;
+ Label L_loop_j;
+ Label L_loop_i2, L_exit_loop_i2;
+
+ __ dec(xn); // Adjust [0..N-1]
+ __ dec(yn);
+ __ dec(zn);
+ __ clr(rc); // u32 c = 0
+ __ sllx(xn, 2, ri); // int i = xn (byte offset i = 4*xn)
+ __ sllx(yn, 2, rj); // int j = yn (byte offset i = 4*xn)
+ __ sllx(zn, 2, rk); // int k = zn (byte offset k = 4*zn)
+ __ lduw(yp, rj, ry); // u32 y = yp[yn]
+
+ // for (int i = xn; i >= 0; i--)
+ __ bind(L_loop_i);
+
+ __ cmp_and_br_short(ri, 0, // i >= 0
+ Assembler::less, Assembler::pn, L_exit_loop_i);
+ __ lduw(xp, ri, rx); // x = xp[i]
+ __ mulx(rx, ry, p64); // 64b result of 32x32
+ __ addcc(rc, p64, z65); // Accumulate to 65 bits (producing carry)
+ __ addxc(zero, zero, c65); // Materialise carry (in bit 65) into lsb,
+ __ sllx(c65, 32, c33); // and shift into bit 33
+ __ srlx(z65, 32, rc); // carry = c33 | hi(z65) >> 32
+ __ add(c33, rc, rc); // carry over to next datum [k-1]
+ __ stw(z65, zp, rk); // z[k] = lo(z65)
+ __ dec(rk, 4); // k--
+ __ dec(ri, 4); // i--
+ __ ba_short(L_loop_i);
+
+ __ bind(L_exit_loop_i);
+ __ stw(rc, zp, rk); // z[k] = c
+
+ // for (int j = yn - 1; j >= 0; j--)
+ __ sllx(yn, 2, rj); // int j = yn - 1 (byte offset j = 4*yn)
+ __ dec(rj, 4);
+
+ __ bind(L_loop_j);
+
+ __ cmp_and_br_short(rj, 0, // j >= 0
+ Assembler::less, Assembler::pn, L_exit);
+ __ clr(rc); // u32 c = 0
+ __ lduw(yp, rj, ry); // u32 y = yp[j]
+
+ // for (int i = xn, k = --zn; i >= 0; i--)
+ __ dec(zn); // --zn
+ __ sllx(xn, 2, ri); // int i = xn (byte offset i = 4*xn)
+ __ sllx(zn, 2, rk); // int k = zn (byte offset k = 4*zn)
+
+ __ bind(L_loop_i2);
+
+ __ cmp_and_br_short(ri, 0, // i >= 0
+ Assembler::less, Assembler::pn, L_exit_loop_i2);
+ __ lduw(xp, ri, rx); // x = xp[i]
+ __ lduw(zp, rk, rz); // z = zp[k], accumulator
+ __ mulx(rx, ry, p64); // 64b result of 32x32
+ __ add(rz, rc, rz); // Accumulate lower order bits,
+ __ addcc(rz, p64, z65); // z += lo(p64) + c
+ __ addxc(zero, zero, c65); // Materialise carry (in bit 65) into lsb,
+ __ sllx(c65, 32, c33); // and shift into bit 33
+ __ srlx(z65, 32, rc); // carry = c33 | hi(z65) >> 32
+ __ add(c33, rc, rc); // carry over to next datum [k-1]
+ __ stw(z65, zp, rk); // zp[k] = lo(z65)
+ __ dec(rk, 4); // k--
+ __ dec(ri, 4); // i--
+ __ ba_short(L_loop_i2);
+
+ __ bind(L_exit_loop_i2);
+ __ stw(rc, zp, rk); // z[k] = c
+ __ dec(rj, 4); // j--
+ __ ba_short(L_loop_j);
+ }
+
+
void generate_initial() {
// Generates all stubs and initializes the entry points
@@ -5073,8 +5839,14 @@ class StubGenerator: public StubCodeGenerator {
if (UseAdler32Intrinsics) {
StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
}
- }
+#ifdef COMPILER2
+ // Intrinsics supported by C2 only:
+ if (UseMultiplyToLenIntrinsic) {
+ StubRoutines::_multiplyToLen = generate_multiplyToLen();
+ }
+#endif // COMPILER2
+ }
public:
StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
diff --git a/src/hotspot/cpu/sparc/stubRoutines_sparc.hpp b/src/hotspot/cpu/sparc/stubRoutines_sparc.hpp
index eb6c909c0b3..d41c5b8e4ae 100644
--- a/src/hotspot/cpu/sparc/stubRoutines_sparc.hpp
+++ b/src/hotspot/cpu/sparc/stubRoutines_sparc.hpp
@@ -41,7 +41,7 @@ static bool returns_to_call_stub(address return_pc) {
enum /* platform_dependent_constants */ {
// %%%%%%%% May be able to shrink this a lot
code_size1 = 20000, // simply increase if too small (assembler will crash if too small)
- code_size2 = 27000 // simply increase if too small (assembler will crash if too small)
+ code_size2 = 29000 // simply increase if too small (assembler will crash if too small)
};
class Sparc {
diff --git a/src/hotspot/cpu/sparc/templateTable_sparc.cpp b/src/hotspot/cpu/sparc/templateTable_sparc.cpp
index f8d861a1df0..8683c35e635 100644
--- a/src/hotspot/cpu/sparc/templateTable_sparc.cpp
+++ b/src/hotspot/cpu/sparc/templateTable_sparc.cpp
@@ -2049,6 +2049,7 @@ void TemplateTable::load_field_cp_cache_entry(Register Robj,
__ ld_ptr(Rcache, cp_base_offset + ConstantPoolCacheEntry::f1_offset(), Robj);
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ ld_ptr( Robj, mirror_offset, Robj);
+ __ resolve_oop_handle(Robj);
}
}
diff --git a/src/hotspot/cpu/sparc/vmStructs_sparc.hpp b/src/hotspot/cpu/sparc/vmStructs_sparc.hpp
index aa21dbdb2db..d678b4dbfd4 100644
--- a/src/hotspot/cpu/sparc/vmStructs_sparc.hpp
+++ b/src/hotspot/cpu/sparc/vmStructs_sparc.hpp
@@ -101,6 +101,14 @@
declare_constant(VM_Version::ISA_XMONT) \
declare_constant(VM_Version::ISA_PAUSE_NSEC) \
declare_constant(VM_Version::ISA_VAMASK) \
+ declare_constant(VM_Version::ISA_SPARC6) \
+ declare_constant(VM_Version::ISA_DICTUNP) \
+ declare_constant(VM_Version::ISA_FPCMPSHL) \
+ declare_constant(VM_Version::ISA_RLE) \
+ declare_constant(VM_Version::ISA_SHA3) \
+ declare_constant(VM_Version::ISA_VIS3C) \
+ declare_constant(VM_Version::ISA_SPARC5B) \
+ declare_constant(VM_Version::ISA_MME) \
declare_constant(VM_Version::CPU_FAST_IDIV) \
declare_constant(VM_Version::CPU_FAST_RDPC) \
declare_constant(VM_Version::CPU_FAST_BIS) \
diff --git a/src/hotspot/cpu/sparc/vm_version_sparc.cpp b/src/hotspot/cpu/sparc/vm_version_sparc.cpp
index 37203221f71..b5ef619c35e 100644
--- a/src/hotspot/cpu/sparc/vm_version_sparc.cpp
+++ b/src/hotspot/cpu/sparc/vm_version_sparc.cpp
@@ -103,7 +103,7 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(AllocatePrefetchInstr, 1);
}
else if (has_sparc5()) {
- // Use prefetch instruction to avoid partial RAW issue on Core S4 processors,
+ // Use prefetch instruction to avoid partial RAW issue on Core C4 processors,
// also use prefetch style 3.
FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
@@ -128,7 +128,7 @@ void VM_Version::initialize() {
// We increase the number of prefetched cache lines, to use just a bit more
// aggressive approach, when the L2-cache line size is small (32 bytes), or
- // when running on newer processor implementations, such as the Core S4.
+ // when running on newer processor implementations, such as the Core C4.
bool inc_prefetch = cache_line_size > 0 && (cache_line_size < 64 || has_sparc5());
if (inc_prefetch) {
@@ -168,6 +168,16 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseCBCond, false);
}
+ // Use 'mpmul' instruction if available.
+ if (has_mpmul()) {
+ if (FLAG_IS_DEFAULT(UseMPMUL)) {
+ FLAG_SET_DEFAULT(UseMPMUL, true);
+ }
+ } else if (UseMPMUL) {
+ warning("MPMUL instruction is not available on this CPU");
+ FLAG_SET_DEFAULT(UseMPMUL, false);
+ }
+
assert(BlockZeroingLowLimit > 0, "invalid value");
if (has_blk_zeroing() && cache_line_size > 0) {
@@ -208,7 +218,9 @@ void VM_Version::initialize() {
char buf[512];
jio_snprintf(buf, sizeof(buf),
- "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+ "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s"
+ "%s%s%s%s%s%s%s%s%s" "%s%s%s%s%s%s%s%s%s"
+ "%s%s%s%s%s%s%s",
(has_v9() ? "v9" : ""),
(has_popc() ? ", popc" : ""),
(has_vis1() ? ", vis1" : ""),
@@ -241,6 +253,16 @@ void VM_Version::initialize() {
(has_pause_nsec() ? ", pause_nsec" : ""),
(has_vamask() ? ", vamask" : ""),
+ (has_sparc6() ? ", sparc6" : ""),
+ (has_dictunp() ? ", dictunp" : ""),
+ (has_fpcmpshl() ? ", fpcmpshl" : ""),
+ (has_rle() ? ", rle" : ""),
+ (has_sha3() ? ", sha3" : ""),
+ (has_athena_plus2()? ", athena_plus2" : ""),
+ (has_vis3c() ? ", vis3c" : ""),
+ (has_sparc5b() ? ", sparc5b" : ""),
+ (has_mme() ? ", mme" : ""),
+
(has_fast_idiv() ? ", *idiv" : ""),
(has_fast_rdpc() ? ", *rdpc" : ""),
(has_fast_bis() ? ", *bis" : ""),
@@ -409,6 +431,15 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
}
+ if (UseVIS > 2) {
+ if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
+ FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
+ }
+ } else if (UseMultiplyToLenIntrinsic) {
+ warning("SPARC multiplyToLen intrinsics require VIS3 instructions support. Intrinsics will be disabled");
+ FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
+ }
+
if (UseVectorizedMismatchIntrinsic) {
warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU.");
FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
diff --git a/src/hotspot/cpu/sparc/vm_version_sparc.hpp b/src/hotspot/cpu/sparc/vm_version_sparc.hpp
index 58e8283d6ee..04ff200f439 100644
--- a/src/hotspot/cpu/sparc/vm_version_sparc.hpp
+++ b/src/hotspot/cpu/sparc/vm_version_sparc.hpp
@@ -67,6 +67,16 @@ protected:
ISA_PAUSE_NSEC,
ISA_VAMASK,
+ ISA_SPARC6,
+ ISA_DICTUNP,
+ ISA_FPCMPSHL,
+ ISA_RLE,
+ ISA_SHA3,
+ ISA_FJATHPLUS2,
+ ISA_VIS3C,
+ ISA_SPARC5B,
+ ISA_MME,
+
// Synthesised properties:
CPU_FAST_IDIV,
@@ -79,7 +89,7 @@ protected:
};
private:
- enum { ISA_last_feature = ISA_VAMASK,
+ enum { ISA_last_feature = ISA_MME,
CPU_last_feature = CPU_BLK_ZEROING };
enum {
@@ -119,6 +129,16 @@ private:
ISA_pause_nsec_msk = UINT64_C(1) << ISA_PAUSE_NSEC,
ISA_vamask_msk = UINT64_C(1) << ISA_VAMASK,
+ ISA_sparc6_msk = UINT64_C(1) << ISA_SPARC6,
+ ISA_dictunp_msk = UINT64_C(1) << ISA_DICTUNP,
+ ISA_fpcmpshl_msk = UINT64_C(1) << ISA_FPCMPSHL,
+ ISA_rle_msk = UINT64_C(1) << ISA_RLE,
+ ISA_sha3_msk = UINT64_C(1) << ISA_SHA3,
+ ISA_fjathplus2_msk = UINT64_C(1) << ISA_FJATHPLUS2,
+ ISA_vis3c_msk = UINT64_C(1) << ISA_VIS3C,
+ ISA_sparc5b_msk = UINT64_C(1) << ISA_SPARC5B,
+ ISA_mme_msk = UINT64_C(1) << ISA_MME,
+
CPU_fast_idiv_msk = UINT64_C(1) << CPU_FAST_IDIV,
CPU_fast_rdpc_msk = UINT64_C(1) << CPU_FAST_RDPC,
CPU_fast_bis_msk = UINT64_C(1) << CPU_FAST_BIS,
@@ -153,40 +173,51 @@ private:
* UltraSPARC T2+: (Victoria Falls, etc.)
* SPARC-V9, VIS, VIS2, ASI_BIS, POPC (Crypto/hash in SPU)
*
- * UltraSPARC T3: (Rainbow Falls/S2)
+ * UltraSPARC T3: (Rainbow Falls/C2)
* SPARC-V9, VIS, VIS2, ASI_BIS, POPC (Crypto/hash in SPU)
*
- * Oracle SPARC T4/T5/M5: (Core S3)
+ * Oracle SPARC T4/T5/M5: (Core C3)
* SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND,
* AES, DES, Kasumi, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL
*
- * Oracle SPARC M7: (Core S4)
+ * Oracle SPARC M7: (Core C4)
* SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND,
* AES, DES, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL, VIS3b,
* ADI, SPARC5, MWAIT, XMPMUL, XMONT, PAUSE_NSEC, VAMASK
*
+ * Oracle SPARC M8: (Core C5)
+ * SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND,
+ * AES, DES, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL, VIS3b,
+ * ADI, SPARC5, MWAIT, XMPMUL, XMONT, PAUSE_NSEC, VAMASK, SPARC6, FPCMPSHL,
+ * DICTUNP, RLE, SHA3, MME
+ *
+ * NOTE: Oracle Number support ignored.
*/
enum {
niagara1_msk = ISA_v9_msk | ISA_vis1_msk | ISA_blk_init_msk,
niagara2_msk = niagara1_msk | ISA_popc_msk,
- core_S2_msk = niagara2_msk | ISA_vis2_msk,
+ core_C2_msk = niagara2_msk | ISA_vis2_msk,
- core_S3_msk = core_S2_msk | ISA_fmaf_msk | ISA_vis3_msk | ISA_hpc_msk |
+ core_C3_msk = core_C2_msk | ISA_fmaf_msk | ISA_vis3_msk | ISA_hpc_msk |
ISA_ima_msk | ISA_aes_msk | ISA_des_msk | ISA_kasumi_msk |
ISA_camellia_msk | ISA_md5_msk | ISA_sha1_msk | ISA_sha256_msk |
ISA_sha512_msk | ISA_mpmul_msk | ISA_mont_msk | ISA_pause_msk |
ISA_cbcond_msk | ISA_crc32c_msk,
- core_S4_msk = core_S3_msk - ISA_kasumi_msk |
+ core_C4_msk = core_C3_msk - ISA_kasumi_msk |
ISA_vis3b_msk | ISA_adi_msk | ISA_sparc5_msk | ISA_mwait_msk |
ISA_xmpmul_msk | ISA_xmont_msk | ISA_pause_nsec_msk | ISA_vamask_msk,
+ core_C5_msk = core_C4_msk | ISA_sparc6_msk | ISA_dictunp_msk |
+ ISA_fpcmpshl_msk | ISA_rle_msk | ISA_sha3_msk | ISA_mme_msk,
+
ultra_sparc_t1_msk = niagara1_msk,
ultra_sparc_t2_msk = niagara2_msk,
- ultra_sparc_t3_msk = core_S2_msk,
- ultra_sparc_m5_msk = core_S3_msk, // NOTE: First out-of-order pipeline.
- ultra_sparc_m7_msk = core_S4_msk
+ ultra_sparc_t3_msk = core_C2_msk,
+ ultra_sparc_m5_msk = core_C3_msk, // NOTE: First out-of-order pipeline.
+ ultra_sparc_m7_msk = core_C4_msk,
+ ultra_sparc_m8_msk = core_C5_msk
};
static uint _L2_data_cache_line_size;
@@ -247,6 +278,16 @@ public:
static bool has_pause_nsec() { return (_features & ISA_pause_nsec_msk) != 0; }
static bool has_vamask() { return (_features & ISA_vamask_msk) != 0; }
+ static bool has_sparc6() { return (_features & ISA_sparc6_msk) != 0; }
+ static bool has_dictunp() { return (_features & ISA_dictunp_msk) != 0; }
+ static bool has_fpcmpshl() { return (_features & ISA_fpcmpshl_msk) != 0; }
+ static bool has_rle() { return (_features & ISA_rle_msk) != 0; }
+ static bool has_sha3() { return (_features & ISA_sha3_msk) != 0; }
+ static bool has_athena_plus2() { return (_features & ISA_fjathplus2_msk) != 0; }
+ static bool has_vis3c() { return (_features & ISA_vis3c_msk) != 0; }
+ static bool has_sparc5b() { return (_features & ISA_sparc5b_msk) != 0; }
+ static bool has_mme() { return (_features & ISA_mme_msk) != 0; }
+
static bool has_fast_idiv() { return (_features & CPU_fast_idiv_msk) != 0; }
static bool has_fast_rdpc() { return (_features & CPU_fast_rdpc_msk) != 0; }
static bool has_fast_bis() { return (_features & CPU_fast_bis_msk) != 0; }
diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
index f747e41e522..37c4c6dfa3e 100644
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
@@ -2571,7 +2571,7 @@ void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2,
if (opr2->is_single_cpu()) {
// cpu register - cpu register
if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) {
- __ cmpptr(reg1, opr2->as_register());
+ __ cmpoop(reg1, opr2->as_register());
} else {
assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY, "cmp int, oop?");
__ cmpl(reg1, opr2->as_register());
@@ -2579,7 +2579,7 @@ void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2,
} else if (opr2->is_stack()) {
// cpu register - stack
if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) {
- __ cmpptr(reg1, frame_map()->address_for_slot(opr2->single_stack_ix()));
+ __ cmpoop(reg1, frame_map()->address_for_slot(opr2->single_stack_ix()));
} else {
__ cmpl(reg1, frame_map()->address_for_slot(opr2->single_stack_ix()));
}
@@ -2594,12 +2594,7 @@ void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2,
if (o == NULL) {
__ cmpptr(reg1, (int32_t)NULL_WORD);
} else {
-#ifdef _LP64
- __ movoop(rscratch1, o);
- __ cmpptr(reg1, rscratch1);
-#else
- __ cmpoop(reg1, c->as_jobject());
-#endif // _LP64
+ __ cmpoop(reg1, o);
}
} else {
fatal("unexpected type: %s", basictype_to_str(c->type()));
@@ -2709,7 +2704,7 @@ void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2,
#ifdef _LP64
// %%% Make this explode if addr isn't reachable until we figure out a
// better strategy by giving noreg as the temp for as_Address
- __ cmpptr(rscratch1, as_Address(addr, noreg));
+ __ cmpoop(rscratch1, as_Address(addr, noreg));
#else
__ cmpoop(as_Address(addr), c->as_jobject());
#endif // _LP64
@@ -3487,13 +3482,9 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
Register mdo = op->mdo()->as_register();
__ mov_metadata(mdo, md->constant_encoding());
Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
- Bytecodes::Code bc = method->java_code_at_bci(bci);
- const bool callee_is_static = callee->is_loaded() && callee->is_static();
// Perform additional virtual call profiling for invokevirtual and
// invokeinterface bytecodes
- if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
- !callee_is_static && // required for optimized MH invokes
- C1ProfileVirtualCalls) {
+ if (op->should_profile_receiver_type()) {
assert(op->recv()->is_single_cpu(), "recv must be allocated");
Register recv = op->recv()->as_register();
assert_different_registers(mdo, recv);
diff --git a/src/hotspot/cpu/x86/frame_x86.cpp b/src/hotspot/cpu/x86/frame_x86.cpp
index a64ceb2a2ed..918a413adee 100644
--- a/src/hotspot/cpu/x86/frame_x86.cpp
+++ b/src/hotspot/cpu/x86/frame_x86.cpp
@@ -383,6 +383,7 @@ void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp
//------------------------------------------------------------------------------
// frame::adjust_unextended_sp
+#ifdef ASSERT
void frame::adjust_unextended_sp() {
// On x86, sites calling method handle intrinsics and lambda forms are treated
// as any other call site. Therefore, no special action is needed when we are
@@ -394,11 +395,12 @@ void frame::adjust_unextended_sp() {
// If the sender PC is a deoptimization point, get the original PC.
if (sender_cm->is_deopt_entry(_pc) ||
sender_cm->is_deopt_mh_entry(_pc)) {
- DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp));
+ verify_deopt_original_pc(sender_cm, _unextended_sp);
}
}
}
}
+#endif
//------------------------------------------------------------------------------
// frame::update_map_with_saved_link
diff --git a/src/hotspot/cpu/x86/frame_x86.hpp b/src/hotspot/cpu/x86/frame_x86.hpp
index dbfcaf70dc3..db8d5dc71ea 100644
--- a/src/hotspot/cpu/x86/frame_x86.hpp
+++ b/src/hotspot/cpu/x86/frame_x86.hpp
@@ -117,7 +117,7 @@
// original sp we use that convention.
intptr_t* _unextended_sp;
- void adjust_unextended_sp();
+ void adjust_unextended_sp() NOT_DEBUG_RETURN;
intptr_t* ptr_at_addr(int offset) const {
return (intptr_t*) addr_at(offset);
diff --git a/src/hotspot/cpu/x86/globalDefinitions_x86.hpp b/src/hotspot/cpu/x86/globalDefinitions_x86.hpp
index 681dcc8a56b..54583b154c2 100644
--- a/src/hotspot/cpu/x86/globalDefinitions_x86.hpp
+++ b/src/hotspot/cpu/x86/globalDefinitions_x86.hpp
@@ -46,7 +46,7 @@ const bool CCallingConventionRequiresIntsAsLongs = false;
// pure C1, 32-bit, small machine
// i486 was the last Intel chip with 16-byte cache line size
#define DEFAULT_CACHE_LINE_SIZE 32
-#elif defined(COMPILER2) || defined(SHARK)
+#elif defined(COMPILER2)
#ifdef _LP64
// pure C2, 64-bit, large machine
#define DEFAULT_CACHE_LINE_SIZE 128
diff --git a/src/hotspot/cpu/x86/jniTypes_x86.hpp b/src/hotspot/cpu/x86/jniTypes_x86.hpp
index 170cd6e3adc..ad888dbf059 100644
--- a/src/hotspot/cpu/x86/jniTypes_x86.hpp
+++ b/src/hotspot/cpu/x86/jniTypes_x86.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -25,9 +25,9 @@
#ifndef CPU_X86_VM_JNITYPES_X86_HPP
#define CPU_X86_VM_JNITYPES_X86_HPP
+#include "jni.h"
#include "memory/allocation.hpp"
#include "oops/oop.hpp"
-#include "prims/jni.h"
// This file holds platform-dependent routines used to write primitive jni
// types to the array of arguments passed into JavaCalls::call
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
index a8a908344e1..e2be5a3a6a4 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
@@ -2783,6 +2783,21 @@ void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) {
#endif // _LP64
}
+void MacroAssembler::cmpoop(Register src1, Register src2) {
+ cmpptr(src1, src2);
+}
+
+void MacroAssembler::cmpoop(Register src1, Address src2) {
+ cmpptr(src1, src2);
+}
+
+#ifdef _LP64
+void MacroAssembler::cmpoop(Register src1, jobject src2) {
+ movoop(rscratch1, src2);
+ cmpptr(src1, rscratch1);
+}
+#endif
+
void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
if (reachable(adr)) {
if (os::is_MP())
@@ -6617,6 +6632,7 @@ void MacroAssembler::load_mirror(Register mirror, Register method) {
movptr(mirror, Address(mirror, ConstMethod::constants_offset()));
movptr(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes()));
movptr(mirror, Address(mirror, mirror_offset));
+ resolve_oop_handle(mirror);
}
void MacroAssembler::load_klass(Register dst, Register src) {
@@ -8398,7 +8414,7 @@ void MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ar
if (is_array_equ) {
// Check the input args
- cmpptr(ary1, ary2);
+ cmpoop(ary1, ary2);
jcc(Assembler::equal, TRUE_LABEL);
// Need additional checks for arrays_equals.
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
index 9fa0bdbcd65..fe6ca1e467f 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
@@ -750,9 +750,12 @@ class MacroAssembler: public Assembler {
void cmpklass(Address dst, Metadata* obj);
void cmpklass(Register dst, Metadata* obj);
void cmpoop(Address dst, jobject obj);
- void cmpoop(Register dst, jobject obj);
#endif // _LP64
+ void cmpoop(Register src1, Register src2);
+ void cmpoop(Register src1, Address src2);
+ void cmpoop(Register dst, jobject obj);
+
// NOTE src2 must be the lval. This is NOT an mem-mem compare
void cmpptr(Address src1, AddressLiteral src2);
diff --git a/src/hotspot/cpu/x86/methodHandles_x86.cpp b/src/hotspot/cpu/x86/methodHandles_x86.cpp
index 2dc5660e9d9..81d57ed3422 100644
--- a/src/hotspot/cpu/x86/methodHandles_x86.cpp
+++ b/src/hotspot/cpu/x86/methodHandles_x86.cpp
@@ -182,7 +182,7 @@ void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
sizeof(u2), /*is_signed*/ false);
// assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
Label L;
- __ cmpptr(recv, __ argument_address(temp2, -1));
+ __ cmpoop(recv, __ argument_address(temp2, -1));
__ jcc(Assembler::equal, L);
__ movptr(rax, __ argument_address(temp2, -1));
__ STOP("receiver not on stack");
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
index bbe81875e91..fd8ee726abd 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -566,7 +566,7 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
- // Support for intptr_t atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest)
+ // Support for intptr_t atomic::xchg_long(jlong exchange_value, volatile jlong* dest)
//
// Arguments :
// c_rarg0: exchange_value
@@ -574,8 +574,8 @@ class StubGenerator: public StubCodeGenerator {
//
// Result:
// *dest <- ex, return (orig *dest)
- address generate_atomic_xchg_ptr() {
- StubCodeMark mark(this, "StubRoutines", "atomic_xchg_ptr");
+ address generate_atomic_xchg_long() {
+ StubCodeMark mark(this, "StubRoutines", "atomic_xchg_long");
address start = __ pc();
__ movptr(rax, c_rarg0); // Copy to eax we need a return value anyhow
@@ -4998,7 +4998,7 @@ class StubGenerator: public StubCodeGenerator {
// atomic calls
StubRoutines::_atomic_xchg_entry = generate_atomic_xchg();
- StubRoutines::_atomic_xchg_ptr_entry = generate_atomic_xchg_ptr();
+ StubRoutines::_atomic_xchg_long_entry = generate_atomic_xchg_long();
StubRoutines::_atomic_cmpxchg_entry = generate_atomic_cmpxchg();
StubRoutines::_atomic_cmpxchg_byte_entry = generate_atomic_cmpxchg_byte();
StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long();
diff --git a/src/hotspot/cpu/x86/templateTable_x86.cpp b/src/hotspot/cpu/x86/templateTable_x86.cpp
index 60335333076..e3c2d878344 100644
--- a/src/hotspot/cpu/x86/templateTable_x86.cpp
+++ b/src/hotspot/cpu/x86/templateTable_x86.cpp
@@ -2315,7 +2315,7 @@ void TemplateTable::if_acmp(Condition cc) {
// assume branch is more often taken than not (loops use backward branches)
Label not_taken;
__ pop_ptr(rdx);
- __ cmpptr(rdx, rax);
+ __ cmpoop(rdx, rax);
__ jcc(j_not(cc), not_taken);
branch(false, false);
__ bind(not_taken);
@@ -2563,6 +2563,13 @@ void TemplateTable::_return(TosState state) {
__ bind(skip_register_finalizer);
}
+ // Explicitly reset last_sp, for handling special case in TemplateInterpreter::deopt_reexecute_entry
+#ifdef ASSERT
+ if (state == vtos) {
+ __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
+ }
+#endif
+
// Narrow result if state is itos but result type is smaller.
// Need to narrow in the return bytecode rather than in generate_return_entry
// since compiled code callers expect the result to already be narrowed.
@@ -2665,6 +2672,7 @@ void TemplateTable::load_field_cp_cache_entry(Register obj,
ConstantPoolCacheEntry::f1_offset())));
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ movptr(obj, Address(obj, mirror_offset));
+ __ resolve_oop_handle(obj);
}
}
diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp
index 99e402f8dee..6e3acc22315 100644
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp
@@ -46,7 +46,7 @@ address VM_Version::_cpuinfo_segv_addr = 0;
address VM_Version::_cpuinfo_cont_addr = 0;
static BufferBlob* stub_blob;
-static const int stub_size = 1000;
+static const int stub_size = 1100;
extern "C" {
typedef void (*get_cpu_info_stub_t)(void*);
@@ -70,7 +70,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
- Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done, wrapup;
+ Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
@@ -267,14 +267,30 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
__ jcc(Assembler::belowEqual, done);
__ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
- __ jccb(Assembler::belowEqual, ext_cpuid1);
+ __ jcc(Assembler::belowEqual, ext_cpuid1);
__ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported?
__ jccb(Assembler::belowEqual, ext_cpuid5);
__ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
__ jccb(Assembler::belowEqual, ext_cpuid7);
+ __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported?
+ __ jccb(Assembler::belowEqual, ext_cpuid8);
+ __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported?
+ __ jccb(Assembler::below, ext_cpuid8);
+ //
+ // Extended cpuid(0x8000001E)
+ //
+ __ movl(rax, 0x8000001E);
+ __ cpuid();
+ __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
+ __ movl(Address(rsi, 0), rax);
+ __ movl(Address(rsi, 4), rbx);
+ __ movl(Address(rsi, 8), rcx);
+ __ movl(Address(rsi,12), rdx);
+
//
// Extended cpuid(0x80000008)
//
+ __ bind(ext_cpuid8);
__ movl(rax, 0x80000008);
__ cpuid();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
@@ -1109,11 +1125,27 @@ void VM_Version::get_processor_features() {
}
#ifdef COMPILER2
- if (MaxVectorSize > 16) {
- // Limit vectors size to 16 bytes on current AMD cpus.
+ if (cpu_family() < 0x17 && MaxVectorSize > 16) {
+ // Limit vectors size to 16 bytes on AMD cpus < 17h.
FLAG_SET_DEFAULT(MaxVectorSize, 16);
}
#endif // COMPILER2
+
+ // Some defaults for AMD family 17h
+ if ( cpu_family() == 0x17 ) {
+ // On family 17h processors use XMM and UnalignedLoadStores for Array Copy
+ if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
+ FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
+ }
+ if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
+ FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
+ }
+#ifdef COMPILER2
+ if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
+ FLAG_SET_DEFAULT(UseFPUForSpilling, true);
+ }
+#endif
+ }
}
if( is_intel() ) { // Intel cpus specific settings
diff --git a/src/hotspot/cpu/x86/vm_version_x86.hpp b/src/hotspot/cpu/x86/vm_version_x86.hpp
index 23c2c7c195c..0a3b53a5271 100644
--- a/src/hotspot/cpu/x86/vm_version_x86.hpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.hpp
@@ -228,6 +228,15 @@ class VM_Version : public Abstract_VM_Version {
} bits;
};
+ union ExtCpuid1EEbx {
+ uint32_t value;
+ struct {
+ uint32_t : 8,
+ threads_per_core : 8,
+ : 16;
+ } bits;
+ };
+
union XemXcr0Eax {
uint32_t value;
struct {
@@ -398,6 +407,12 @@ protected:
ExtCpuid8Ecx ext_cpuid8_ecx;
uint32_t ext_cpuid8_edx; // reserved
+ // cpuid function 0x8000001E // AMD 17h
+ uint32_t ext_cpuid1E_eax;
+ ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
+ uint32_t ext_cpuid1E_ecx;
+ uint32_t ext_cpuid1E_edx; // unused currently
+
// extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
XemXcr0Eax xem_xcr0_eax;
uint32_t xem_xcr0_edx; // reserved
@@ -505,6 +520,14 @@ protected:
result |= CPU_CLMUL;
if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
result |= CPU_RTM;
+ if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
+ result |= CPU_ADX;
+ if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
+ result |= CPU_BMI2;
+ if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
+ result |= CPU_SHA;
+ if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
+ result |= CPU_FMA;
// AMD features.
if (is_amd()) {
@@ -518,16 +541,8 @@ protected:
}
// Intel features.
if(is_intel()) {
- if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
- result |= CPU_ADX;
- if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
- result |= CPU_BMI2;
- if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
- result |= CPU_SHA;
if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
result |= CPU_LZCNT;
- if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
- result |= CPU_FMA;
// for Intel, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
result |= CPU_3DNOW_PREFETCH;
@@ -590,6 +605,7 @@ public:
static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
+ static ByteSize ext_cpuid1E_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
@@ -673,8 +689,12 @@ public:
if (is_intel() && supports_processor_topology()) {
result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
} else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
- result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
- cores_per_cpu();
+ if (cpu_family() >= 0x17) {
+ result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
+ } else {
+ result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
+ cores_per_cpu();
+ }
}
return (result == 0 ? 1 : result);
}
diff --git a/src/hotspot/cpu/zero/cppInterpreter_zero.cpp b/src/hotspot/cpu/zero/cppInterpreter_zero.cpp
index 5c5a12a7085..4d6a9425395 100644
--- a/src/hotspot/cpu/zero/cppInterpreter_zero.cpp
+++ b/src/hotspot/cpu/zero/cppInterpreter_zero.cpp
@@ -50,9 +50,6 @@
#include "stack_zero.inline.hpp"
#include "utilities/debug.hpp"
#include "utilities/macros.hpp"
-#ifdef SHARK
-#include "shark/shark_globals.hpp"
-#endif
#ifdef CC_INTERP
@@ -276,7 +273,7 @@ int CppInterpreter::native_entry(Method* method, intptr_t UNUSED, TRAPS) {
markOop disp = lockee->mark()->set_unlocked();
monitor->lock()->set_displaced_header(disp);
- if (Atomic::cmpxchg_ptr(monitor, lockee->mark_addr(), disp) != disp) {
+ if (Atomic::cmpxchg((markOop)monitor, lockee->mark_addr(), disp) != disp) {
if (thread->is_lock_owned((address) disp->clear_lock_bits())) {
monitor->lock()->set_displaced_header(NULL);
}
@@ -420,7 +417,8 @@ int CppInterpreter::native_entry(Method* method, intptr_t UNUSED, TRAPS) {
monitor->set_obj(NULL);
if (header != NULL) {
- if (Atomic::cmpxchg_ptr(header, rcvr->mark_addr(), lock) != lock) {
+ markOop old_header = markOopDesc::encode(lock);
+ if (rcvr->cas_set_mark(header, old_header) != old_header) {
monitor->set_obj(rcvr); {
HandleMark hm(thread);
CALL_VM_NOCHECK(InterpreterRuntime::monitorexit(thread, monitor));
diff --git a/src/hotspot/cpu/zero/frame_zero.cpp b/src/hotspot/cpu/zero/frame_zero.cpp
index ad4887fb4d6..24a1ea99eb4 100644
--- a/src/hotspot/cpu/zero/frame_zero.cpp
+++ b/src/hotspot/cpu/zero/frame_zero.cpp
@@ -71,7 +71,6 @@ frame frame::sender_for_entry_frame(RegisterMap *map) const {
frame frame::sender_for_nonentry_frame(RegisterMap *map) const {
assert(zeroframe()->is_interpreter_frame() ||
- zeroframe()->is_shark_frame() ||
zeroframe()->is_fake_stub_frame(), "wrong type of frame");
return frame(zeroframe()->next(), sender_sp());
}
@@ -101,8 +100,6 @@ void frame::patch_pc(Thread* thread, address pc) {
if (pc != NULL) {
_cb = CodeCache::find_blob(pc);
- SharkFrame* sharkframe = zeroframe()->as_shark_frame();
- sharkframe->set_pc(pc);
_pc = pc;
_deopt_state = is_deoptimized;
@@ -233,8 +230,6 @@ void ZeroFrame::identify_word(int frame_index,
strncpy(valuebuf, "ENTRY_FRAME", buflen);
else if (is_interpreter_frame())
strncpy(valuebuf, "INTERPRETER_FRAME", buflen);
- else if (is_shark_frame())
- strncpy(valuebuf, "SHARK_FRAME", buflen);
else if (is_fake_stub_frame())
strncpy(valuebuf, "FAKE_STUB_FRAME", buflen);
break;
@@ -248,10 +243,6 @@ void ZeroFrame::identify_word(int frame_index,
as_interpreter_frame()->identify_word(
frame_index, offset, fieldbuf, valuebuf, buflen);
}
- else if (is_shark_frame()) {
- as_shark_frame()->identify_word(
- frame_index, offset, fieldbuf, valuebuf, buflen);
- }
else if (is_fake_stub_frame()) {
as_fake_stub_frame()->identify_word(
frame_index, offset, fieldbuf, valuebuf, buflen);
@@ -350,50 +341,6 @@ void InterpreterFrame::identify_word(int frame_index,
fieldbuf, buflen);
}
-void SharkFrame::identify_word(int frame_index,
- int offset,
- char* fieldbuf,
- char* valuebuf,
- int buflen) const {
- // Fixed part
- switch (offset) {
- case pc_off:
- strncpy(fieldbuf, "pc", buflen);
- if (method()->is_method()) {
- CompiledMethod *code = method()->code();
- if (code && code->pc_desc_at(pc())) {
- SimpleScopeDesc ssd(code, pc());
- snprintf(valuebuf, buflen, PTR_FORMAT " (bci %d)",
- (intptr_t) pc(), ssd.bci());
- }
- }
- return;
-
- case unextended_sp_off:
- strncpy(fieldbuf, "unextended_sp", buflen);
- return;
-
- case method_off:
- strncpy(fieldbuf, "method", buflen);
- if (method()->is_method()) {
- method()->name_and_sig_as_C_string(valuebuf, buflen);
- }
- return;
-
- case oop_tmp_off:
- strncpy(fieldbuf, "oop_tmp", buflen);
- return;
- }
-
- // Variable part
- if (method()->is_method()) {
- identify_vp_word(frame_index, addr_of_word(offset),
- addr_of_word(header_words + 1),
- unextended_sp() + method()->max_stack(),
- fieldbuf, buflen);
- }
-}
-
void ZeroFrame::identify_vp_word(int frame_index,
intptr_t* addr,
intptr_t* monitor_base,
diff --git a/src/hotspot/cpu/zero/frame_zero.hpp b/src/hotspot/cpu/zero/frame_zero.hpp
index 2f1c931c690..23773c12db0 100644
--- a/src/hotspot/cpu/zero/frame_zero.hpp
+++ b/src/hotspot/cpu/zero/frame_zero.hpp
@@ -62,9 +62,6 @@
const InterpreterFrame *zero_interpreterframe() const {
return zeroframe()->as_interpreter_frame();
}
- const SharkFrame *zero_sharkframe() const {
- return zeroframe()->as_shark_frame();
- }
public:
bool is_fake_stub_frame() const;
diff --git a/src/hotspot/cpu/zero/frame_zero.inline.hpp b/src/hotspot/cpu/zero/frame_zero.inline.hpp
index 1ecd9db38a8..498840724b7 100644
--- a/src/hotspot/cpu/zero/frame_zero.inline.hpp
+++ b/src/hotspot/cpu/zero/frame_zero.inline.hpp
@@ -56,18 +56,6 @@ inline frame::frame(ZeroFrame* zf, intptr_t* sp) {
_deopt_state = not_deoptimized;
break;
- case ZeroFrame::SHARK_FRAME: {
- _pc = zero_sharkframe()->pc();
- _cb = CodeCache::find_blob_unsafe(pc());
- address original_pc = CompiledMethod::get_deopt_original_pc(this);
- if (original_pc != NULL) {
- _pc = original_pc;
- _deopt_state = is_deoptimized;
- } else {
- _deopt_state = not_deoptimized;
- }
- break;
- }
case ZeroFrame::FAKE_STUB_FRAME:
_pc = NULL;
_cb = NULL;
@@ -177,10 +165,7 @@ inline intptr_t* frame::entry_frame_argument_at(int offset) const {
}
inline intptr_t* frame::unextended_sp() const {
- if (zeroframe()->is_shark_frame())
- return zero_sharkframe()->unextended_sp();
- else
- return (intptr_t *) -1;
+ return (intptr_t *) -1;
}
#endif // CPU_ZERO_VM_FRAME_ZERO_INLINE_HPP
diff --git a/src/hotspot/cpu/zero/icache_zero.hpp b/src/hotspot/cpu/zero/icache_zero.hpp
index 2383d211e2e..eefe552f78d 100644
--- a/src/hotspot/cpu/zero/icache_zero.hpp
+++ b/src/hotspot/cpu/zero/icache_zero.hpp
@@ -29,7 +29,7 @@
// Interface for updating the instruction cache. Whenever the VM
// modifies code, part of the processor instruction cache potentially
// has to be flushed. This implementation is empty: Zero never deals
-// with code, and LLVM handles cache flushing for Shark.
+// with code.
class ICache : public AbstractICache {
public:
diff --git a/src/hotspot/cpu/zero/jniTypes_zero.hpp b/src/hotspot/cpu/zero/jniTypes_zero.hpp
index 766b5e1d6bc..ae464c77aae 100644
--- a/src/hotspot/cpu/zero/jniTypes_zero.hpp
+++ b/src/hotspot/cpu/zero/jniTypes_zero.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -25,9 +25,9 @@
#ifndef CPU_ZERO_VM_JNITYPES_ZERO_HPP
#define CPU_ZERO_VM_JNITYPES_ZERO_HPP
+#include "jni.h"
#include "memory/allocation.hpp"
#include "oops/oop.hpp"
-#include "prims/jni.h"
// This file holds platform-dependent routines used to write primitive jni
// types to the array of arguments passed into JavaCalls::call
diff --git a/src/hotspot/cpu/zero/nativeInst_zero.cpp b/src/hotspot/cpu/zero/nativeInst_zero.cpp
index 144683f2b4f..c60caa507c7 100644
--- a/src/hotspot/cpu/zero/nativeInst_zero.cpp
+++ b/src/hotspot/cpu/zero/nativeInst_zero.cpp
@@ -42,11 +42,6 @@
// insert a jump to SharedRuntime::get_handle_wrong_method_stub()
// (dest) at the start of a compiled method (verified_entry) to avoid
// a race where a method is invoked while being made non-entrant.
-//
-// In Shark, verified_entry is a pointer to a SharkEntry. We can
-// handle this simply by changing it's entry point to point at the
-// interpreter. This only works because the interpreter and Shark
-// calling conventions are the same.
void NativeJump::patch_verified_entry(address entry,
address verified_entry,
diff --git a/src/hotspot/cpu/zero/relocInfo_zero.cpp b/src/hotspot/cpu/zero/relocInfo_zero.cpp
index 82fa13da684..8482e53d37b 100644
--- a/src/hotspot/cpu/zero/relocInfo_zero.cpp
+++ b/src/hotspot/cpu/zero/relocInfo_zero.cpp
@@ -50,7 +50,7 @@ address Relocation::pd_get_address_from_code() {
}
address* Relocation::pd_address_in_code() {
- // Relocations in Shark are just stored directly
+ ShouldNotCallThis();
return (address *) addr();
}
diff --git a/src/hotspot/cpu/zero/sharedRuntime_zero.cpp b/src/hotspot/cpu/zero/sharedRuntime_zero.cpp
index 27b96b57bec..e9916c429b1 100644
--- a/src/hotspot/cpu/zero/sharedRuntime_zero.cpp
+++ b/src/hotspot/cpu/zero/sharedRuntime_zero.cpp
@@ -41,11 +41,6 @@
#ifdef COMPILER2
#include "opto/runtime.hpp"
#endif
-#ifdef SHARK
-#include "compiler/compileBroker.hpp"
-#include "shark/sharkCompiler.hpp"
-#endif
-
static address zero_null_code_stub() {
@@ -80,16 +75,8 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
BasicType *sig_bt,
VMRegPair *regs,
BasicType ret_type) {
-#ifdef SHARK
- return SharkCompiler::compiler()->generate_native_wrapper(masm,
- method,
- compile_id,
- sig_bt,
- ret_type);
-#else
ShouldNotCallThis();
return NULL;
-#endif // SHARK
}
int Deoptimization::last_frame_adjust(int callee_parameters,
diff --git a/src/hotspot/cpu/zero/sharkFrame_zero.hpp b/src/hotspot/cpu/zero/sharkFrame_zero.hpp
deleted file mode 100644
index ff26fbccf86..00000000000
--- a/src/hotspot/cpu/zero/sharkFrame_zero.hpp
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2008, 2009 Red Hat, Inc.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef CPU_ZERO_VM_SHARKFRAME_ZERO_HPP
-#define CPU_ZERO_VM_SHARKFRAME_ZERO_HPP
-
-#include "oops/method.hpp"
-#include "stack_zero.hpp"
-
-// | ... |
-// +--------------------+ ------------------
-// | stack slot n-1 | low addresses
-// | ... |
-// | stack slot 0 |
-// | monitor m-1 |
-// | ... |
-// | monitor 0 |
-// | oop_tmp |
-// | method |
-// | unextended_sp |
-// | pc |
-// | frame_type |
-// | next_frame | high addresses
-// +--------------------+ ------------------
-// | ... |
-
-class SharkFrame : public ZeroFrame {
- friend class SharkStack;
-
- private:
- SharkFrame() : ZeroFrame() {
- ShouldNotCallThis();
- }
-
- protected:
- enum Layout {
- pc_off = jf_header_words,
- unextended_sp_off,
- method_off,
- oop_tmp_off,
- header_words
- };
-
- public:
- address pc() const {
- return (address) value_of_word(pc_off);
- }
-
- void set_pc(address pc) const {
- *((address*) addr_of_word(pc_off)) = pc;
- }
-
- intptr_t* unextended_sp() const {
- return (intptr_t *) value_of_word(unextended_sp_off);
- }
-
- Method* method() const {
- return (Method*) value_of_word(method_off);
- }
-
- public:
- void identify_word(int frame_index,
- int offset,
- char* fieldbuf,
- char* valuebuf,
- int buflen) const;
-};
-
-#endif // CPU_ZERO_VM_SHARKFRAME_ZERO_HPP
diff --git a/src/hotspot/cpu/zero/shark_globals_zero.hpp b/src/hotspot/cpu/zero/shark_globals_zero.hpp
deleted file mode 100644
index 9d478114520..00000000000
--- a/src/hotspot/cpu/zero/shark_globals_zero.hpp
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2008, 2009, 2010 Red Hat, Inc.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef CPU_ZERO_VM_SHARK_GLOBALS_ZERO_HPP
-#define CPU_ZERO_VM_SHARK_GLOBALS_ZERO_HPP
-
-// Set the default values for platform dependent flags used by the
-// Shark compiler. See globals.hpp for details of what they do.
-
-define_pd_global(bool, BackgroundCompilation, true );
-define_pd_global(bool, UseTLAB, true );
-define_pd_global(bool, ResizeTLAB, true );
-define_pd_global(bool, InlineIntrinsics, false);
-define_pd_global(bool, PreferInterpreterNativeStubs, false);
-define_pd_global(bool, ProfileTraps, false);
-define_pd_global(bool, UseOnStackReplacement, true );
-define_pd_global(bool, TieredCompilation, false);
-
-define_pd_global(intx, CompileThreshold, 1500);
-define_pd_global(intx, Tier2CompileThreshold, 1500);
-define_pd_global(intx, Tier3CompileThreshold, 2500);
-define_pd_global(intx, Tier4CompileThreshold, 4500);
-
-define_pd_global(intx, Tier2BackEdgeThreshold, 100000);
-define_pd_global(intx, Tier3BackEdgeThreshold, 100000);
-define_pd_global(intx, Tier4BackEdgeThreshold, 100000);
-
-define_pd_global(intx, OnStackReplacePercentage, 933 );
-define_pd_global(intx, FreqInlineSize, 325 );
-define_pd_global(uintx, NewRatio, 12 );
-define_pd_global(size_t, NewSizeThreadIncrease, 4*K );
-define_pd_global(intx, InitialCodeCacheSize, 160*K);
-define_pd_global(intx, ReservedCodeCacheSize, 32*M );
-define_pd_global(intx, NonProfiledCodeHeapSize, 13*M );
-define_pd_global(intx, ProfiledCodeHeapSize, 14*M );
-define_pd_global(intx, NonNMethodCodeHeapSize, 5*M );
-define_pd_global(bool, ProfileInterpreter, false);
-define_pd_global(intx, CodeCacheExpansionSize, 32*K );
-define_pd_global(uintx, CodeCacheMinBlockLength, 1 );
-define_pd_global(uintx, CodeCacheMinimumUseSpace, 200*K);
-
-define_pd_global(size_t, MetaspaceSize, 12*M );
-define_pd_global(bool, NeverActAsServerClassMachine, true );
-define_pd_global(uint64_t, MaxRAM, 1ULL*G);
-define_pd_global(bool, CICompileOSR, true );
-
-#endif // CPU_ZERO_VM_SHARK_GLOBALS_ZERO_HPP
diff --git a/src/hotspot/cpu/zero/stack_zero.cpp b/src/hotspot/cpu/zero/stack_zero.cpp
index a9bf5309921..924f21c302b 100644
--- a/src/hotspot/cpu/zero/stack_zero.cpp
+++ b/src/hotspot/cpu/zero/stack_zero.cpp
@@ -52,9 +52,6 @@ void ZeroStack::handle_overflow(TRAPS) {
intptr_t *sp = thread->zero_stack()->sp();
ZeroFrame *frame = thread->top_zero_frame();
while (frame) {
- if (frame->is_shark_frame())
- break;
-
if (frame->is_interpreter_frame()) {
interpreterState istate =
frame->as_interpreter_frame()->interpreter_state();
diff --git a/src/hotspot/cpu/zero/stack_zero.hpp b/src/hotspot/cpu/zero/stack_zero.hpp
index ca95c55a2b6..b610f45ac32 100644
--- a/src/hotspot/cpu/zero/stack_zero.hpp
+++ b/src/hotspot/cpu/zero/stack_zero.hpp
@@ -121,7 +121,6 @@ class ZeroStack {
class EntryFrame;
class InterpreterFrame;
-class SharkFrame;
class FakeStubFrame;
//
@@ -151,7 +150,6 @@ class ZeroFrame {
enum FrameType {
ENTRY_FRAME = 1,
INTERPRETER_FRAME,
- SHARK_FRAME,
FAKE_STUB_FRAME
};
@@ -180,9 +178,6 @@ class ZeroFrame {
bool is_interpreter_frame() const {
return type() == INTERPRETER_FRAME;
}
- bool is_shark_frame() const {
- return type() == SHARK_FRAME;
- }
bool is_fake_stub_frame() const {
return type() == FAKE_STUB_FRAME;
}
@@ -196,10 +191,6 @@ class ZeroFrame {
assert(is_interpreter_frame(), "should be");
return (InterpreterFrame *) this;
}
- SharkFrame *as_shark_frame() const {
- assert(is_shark_frame(), "should be");
- return (SharkFrame *) this;
- }
FakeStubFrame *as_fake_stub_frame() const {
assert(is_fake_stub_frame(), "should be");
return (FakeStubFrame *) this;
diff --git a/src/hotspot/cpu/zero/stack_zero.inline.hpp b/src/hotspot/cpu/zero/stack_zero.inline.hpp
index 02d12e8e96d..3398df72486 100644
--- a/src/hotspot/cpu/zero/stack_zero.inline.hpp
+++ b/src/hotspot/cpu/zero/stack_zero.inline.hpp
@@ -29,7 +29,6 @@
#include "runtime/thread.hpp"
#include "stack_zero.hpp"
-// This function should match SharkStack::CreateStackOverflowCheck
inline void ZeroStack::overflow_check(int required_words, TRAPS) {
// Check the Zero stack
if (available_words() < required_words) {
diff --git a/src/hotspot/cpu/zero/stubGenerator_zero.cpp b/src/hotspot/cpu/zero/stubGenerator_zero.cpp
index 9d64fb99e94..748ad405293 100644
--- a/src/hotspot/cpu/zero/stubGenerator_zero.cpp
+++ b/src/hotspot/cpu/zero/stubGenerator_zero.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright 2007, 2008, 2010, 2015 Red Hat, Inc.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -253,9 +253,8 @@ class StubGenerator: public StubCodeGenerator {
// atomic calls
StubRoutines::_atomic_xchg_entry = ShouldNotCallThisStub();
- StubRoutines::_atomic_xchg_ptr_entry = ShouldNotCallThisStub();
+ StubRoutines::_atomic_xchg_long_entry = ShouldNotCallThisStub();
StubRoutines::_atomic_cmpxchg_entry = ShouldNotCallThisStub();
- StubRoutines::_atomic_cmpxchg_ptr_entry = ShouldNotCallThisStub();
StubRoutines::_atomic_cmpxchg_byte_entry = ShouldNotCallThisStub();
StubRoutines::_atomic_cmpxchg_long_entry = ShouldNotCallThisStub();
StubRoutines::_atomic_add_entry = ShouldNotCallThisStub();
diff --git a/src/hotspot/os/aix/decoder_aix.hpp b/src/hotspot/os/aix/decoder_aix.hpp
index 50b26b5193d..0389852e4cb 100644
--- a/src/hotspot/os/aix/decoder_aix.hpp
+++ b/src/hotspot/os/aix/decoder_aix.hpp
@@ -34,8 +34,6 @@ class AIXDecoder: public AbstractDecoder {
}
virtual ~AIXDecoder() {}
- virtual bool can_decode_C_frame_in_vm() const { return true; }
-
virtual bool demangle(const char* symbol, char* buf, int buflen) { return false; } // use AixSymbols::get_function_name to demangle
virtual bool decode(address addr, char* buf, int buflen, int* offset, const char* modulepath, bool demangle) {
diff --git a/src/hotspot/os/aix/os_aix.cpp b/src/hotspot/os/aix/os_aix.cpp
index 68a206afd14..eaf7a4f2ae9 100644
--- a/src/hotspot/os/aix/os_aix.cpp
+++ b/src/hotspot/os/aix/os_aix.cpp
@@ -770,8 +770,15 @@ static void *thread_native_entry(Thread *thread) {
const pthread_t pthread_id = ::pthread_self();
const tid_t kernel_thread_id = ::thread_self();
- log_info(os, thread)("Thread is alive (tid: " UINTX_FORMAT ", kernel thread id: " UINTX_FORMAT ").",
- os::current_thread_id(), (uintx) kernel_thread_id);
+ LogTarget(Info, os, thread) lt;
+ if (lt.is_enabled()) {
+ address low_address = thread->stack_end();
+ address high_address = thread->stack_base();
+ lt.print("Thread is alive (tid: " UINTX_FORMAT ", kernel thread id: " UINTX_FORMAT
+ ", stack [" PTR_FORMAT " - " PTR_FORMAT " (" SIZE_FORMAT "k using %uk pages)).",
+ os::current_thread_id(), (uintx) kernel_thread_id, low_address, high_address,
+ (high_address - low_address) / K, os::Aix::query_pagesize(low_address) / K);
+ }
// Normally, pthread stacks on AIX live in the data segment (are allocated with malloc()
// by the pthread library). In rare cases, this may not be the case, e.g. when third-party
@@ -864,6 +871,14 @@ bool os::create_thread(Thread* thread, ThreadType thr_type,
// Calculate stack size if it's not specified by caller.
size_t stack_size = os::Posix::get_initial_stack_size(thr_type, req_stack_size);
+ // JDK-8187028: It was observed that on some configurations (4K backed thread stacks)
+ // the real thread stack size may be smaller than the requested stack size, by as much as 64K.
+ // This very much looks like a pthread lib error. As a workaround, increase the stack size
+ // by 64K for small thread stacks (arbitrarily choosen to be < 4MB)
+ if (stack_size < 4096 * K) {
+ stack_size += 64 * K;
+ }
+
// On Aix, pthread_attr_setstacksize fails with huge values and leaves the
// thread size in attr unchanged. If this is the minimal stack size as set
// by pthread_attr_init this leads to crashes after thread creation. E.g. the
@@ -874,8 +889,12 @@ bool os::create_thread(Thread* thread, ThreadType thr_type,
stack_size / K);
}
- // Configure libc guard page.
- ret = pthread_attr_setguardsize(&attr, os::Aix::default_guard_size(thr_type));
+ // Save some cycles and a page by disabling OS guard pages where we have our own
+ // VM guard pages (in java threads). For other threads, keep system default guard
+ // pages in place.
+ if (thr_type == java_thread || thr_type == compiler_thread) {
+ ret = pthread_attr_setguardsize(&attr, 0);
+ }
pthread_t tid = 0;
if (ret == 0) {
@@ -3004,19 +3023,6 @@ bool os::Aix::chained_handler(int sig, siginfo_t* siginfo, void* context) {
return chained;
}
-size_t os::Aix::default_guard_size(os::ThreadType thr_type) {
- // Creating guard page is very expensive. Java thread has HotSpot
- // guard pages, only enable glibc guard page for non-Java threads.
- // (Remember: compiler thread is a Java thread, too!)
- //
- // Aix can have different page sizes for stack (4K) and heap (64K).
- // As Hotspot knows only one page size, we assume the stack has
- // the same page size as the heap. Returning page_size() here can
- // cause 16 guard pages which we want to avoid. Thus we return 4K
- // which will be rounded to the real page size by the OS.
- return ((thr_type == java_thread || thr_type == compiler_thread) ? 0 : 4 * K);
-}
-
struct sigaction* os::Aix::get_preinstalled_handler(int sig) {
if (sigismember(&sigs, sig)) {
return &sigact[sig];
@@ -3443,8 +3449,6 @@ void os::init(void) {
init_random(1234567);
- ThreadCritical::initialize();
-
// Main_thread points to the aboriginal thread.
Aix::_main_thread = pthread_self();
diff --git a/src/hotspot/os/aix/os_aix.hpp b/src/hotspot/os/aix/os_aix.hpp
index 95a32704195..65bbe9186dc 100644
--- a/src/hotspot/os/aix/os_aix.hpp
+++ b/src/hotspot/os/aix/os_aix.hpp
@@ -139,9 +139,6 @@ class Aix {
// libpthread version string
static void libpthread_init();
- // Return default libc guard size for the specified thread type.
- static size_t default_guard_size(os::ThreadType thr_type);
-
// Function returns true if we run on OS/400 (pase), false if we run
// on AIX.
static bool on_pase() {
diff --git a/src/hotspot/os/aix/threadCritical_aix.cpp b/src/hotspot/os/aix/threadCritical_aix.cpp
index a5d893ba9f1..cd25cb68dc4 100644
--- a/src/hotspot/os/aix/threadCritical_aix.cpp
+++ b/src/hotspot/os/aix/threadCritical_aix.cpp
@@ -38,12 +38,6 @@ static pthread_t tc_owner = 0;
static pthread_mutex_t tc_mutex = PTHREAD_MUTEX_INITIALIZER;
static int tc_count = 0;
-void ThreadCritical::initialize() {
-}
-
-void ThreadCritical::release() {
-}
-
ThreadCritical::ThreadCritical() {
pthread_t self = pthread_self();
if (self != tc_owner) {
diff --git a/src/hotspot/os/bsd/decoder_machO.hpp b/src/hotspot/os/bsd/decoder_machO.hpp
index 7872d6da5a9..d15dc738d34 100644
--- a/src/hotspot/os/bsd/decoder_machO.hpp
+++ b/src/hotspot/os/bsd/decoder_machO.hpp
@@ -35,9 +35,6 @@ class MachODecoder : public AbstractDecoder {
public:
MachODecoder() { }
virtual ~MachODecoder() { }
- virtual bool can_decode_C_frame_in_vm() const {
- return true;
- }
virtual bool demangle(const char* symbol, char* buf, int buflen);
virtual bool decode(address pc, char* buf, int buflen, int* offset,
const void* base);
diff --git a/src/hotspot/os/bsd/os_bsd.cpp b/src/hotspot/os/bsd/os_bsd.cpp
index ef83bf31130..bfa7811f8da 100644
--- a/src/hotspot/os/bsd/os_bsd.cpp
+++ b/src/hotspot/os/bsd/os_bsd.cpp
@@ -3353,8 +3353,6 @@ void os::init(void) {
init_random(1234567);
- ThreadCritical::initialize();
-
Bsd::set_page_size(getpagesize());
if (Bsd::page_size() == -1) {
fatal("os_bsd.cpp: os::init: sysconf failed (%s)", os::strerror(errno));
diff --git a/src/hotspot/os/bsd/threadCritical_bsd.cpp b/src/hotspot/os/bsd/threadCritical_bsd.cpp
index 7cac3ca228b..71c51df599d 100644
--- a/src/hotspot/os/bsd/threadCritical_bsd.cpp
+++ b/src/hotspot/os/bsd/threadCritical_bsd.cpp
@@ -37,12 +37,6 @@ static pthread_t tc_owner = 0;
static pthread_mutex_t tc_mutex = PTHREAD_MUTEX_INITIALIZER;
static int tc_count = 0;
-void ThreadCritical::initialize() {
-}
-
-void ThreadCritical::release() {
-}
-
ThreadCritical::ThreadCritical() {
pthread_t self = pthread_self();
if (self != tc_owner) {
diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
index e6ebec3e53e..5eabc870287 100644
--- a/src/hotspot/os/linux/os_linux.cpp
+++ b/src/hotspot/os/linux/os_linux.cpp
@@ -4768,8 +4768,6 @@ void os::init(void) {
init_random(1234567);
- ThreadCritical::initialize();
-
Linux::set_page_size(sysconf(_SC_PAGESIZE));
if (Linux::page_size() == -1) {
fatal("os_linux.cpp: os::init: sysconf failed (%s)",
diff --git a/src/hotspot/os/linux/os_linux.inline.hpp b/src/hotspot/os/linux/os_linux.inline.hpp
index a665e4c69c6..cf00c6a4621 100644
--- a/src/hotspot/os/linux/os_linux.inline.hpp
+++ b/src/hotspot/os/linux/os_linux.inline.hpp
@@ -98,6 +98,11 @@ inline int os::ftruncate(int fd, jlong length) {
inline struct dirent* os::readdir(DIR* dirp, dirent *dbuf)
{
+// readdir_r has been deprecated since glibc 2.24.
+// See https://sourceware.org/bugzilla/show_bug.cgi?id=19056 for more details.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
dirent* p;
int status;
assert(dirp != NULL, "just checking");
@@ -111,6 +116,8 @@ inline struct dirent* os::readdir(DIR* dirp, dirent *dbuf)
return NULL;
} else
return p;
+
+#pragma GCC diagnostic pop
}
inline int os::closedir(DIR *dirp) {
diff --git a/src/hotspot/os/linux/threadCritical_linux.cpp b/src/hotspot/os/linux/threadCritical_linux.cpp
index 7cac3ca228b..71c51df599d 100644
--- a/src/hotspot/os/linux/threadCritical_linux.cpp
+++ b/src/hotspot/os/linux/threadCritical_linux.cpp
@@ -37,12 +37,6 @@ static pthread_t tc_owner = 0;
static pthread_mutex_t tc_mutex = PTHREAD_MUTEX_INITIALIZER;
static int tc_count = 0;
-void ThreadCritical::initialize() {
-}
-
-void ThreadCritical::release() {
-}
-
ThreadCritical::ThreadCritical() {
pthread_t self = pthread_self();
if (self != tc_owner) {
diff --git a/src/hotspot/os/posix/os_posix.cpp b/src/hotspot/os/posix/os_posix.cpp
index 554238313f8..422f7fcac3c 100644
--- a/src/hotspot/os/posix/os_posix.cpp
+++ b/src/hotspot/os/posix/os_posix.cpp
@@ -1770,6 +1770,12 @@ int os::PlatformEvent::park(jlong millis) {
if (v == 0) { // Do this the hard way by blocking ...
struct timespec abst;
+ // We have to watch for overflow when converting millis to nanos,
+ // but if millis is that large then we will end up limiting to
+ // MAX_SECS anyway, so just do that here.
+ if (millis / MILLIUNITS > MAX_SECS) {
+ millis = jlong(MAX_SECS) * MILLIUNITS;
+ }
to_abstime(&abst, millis * (NANOUNITS / MILLIUNITS), false);
int ret = OS_TIMEOUT;
diff --git a/src/hotspot/os/solaris/os_solaris.cpp b/src/hotspot/os/solaris/os_solaris.cpp
index 065fcb4e602..814589d375d 100644
--- a/src/hotspot/os/solaris/os_solaris.cpp
+++ b/src/hotspot/os/solaris/os_solaris.cpp
@@ -4076,6 +4076,7 @@ int_fnP_cond_tP os::Solaris::_cond_broadcast;
int_fnP_cond_tP_i_vP os::Solaris::_cond_init;
int_fnP_cond_tP os::Solaris::_cond_destroy;
int os::Solaris::_cond_scope = USYNC_THREAD;
+bool os::Solaris::_synchronization_initialized;
void os::Solaris::synchronization_init() {
if (UseLWPSynchronization) {
@@ -4125,6 +4126,7 @@ void os::Solaris::synchronization_init() {
os::Solaris::set_cond_destroy(::cond_destroy);
}
}
+ _synchronization_initialized = true;
}
bool os::Solaris::liblgrp_init() {
@@ -4198,9 +4200,6 @@ void os::init(void) {
dladdr1_func = CAST_TO_FN_PTR(dladdr1_func_type, dlsym(hdl, "dladdr1"));
}
- // (Solaris only) this switches to calls that actually do locking.
- ThreadCritical::initialize();
-
main_thread = thr_self();
// dynamic lookup of functions that may not be available in our lowest
diff --git a/src/hotspot/os/solaris/os_solaris.hpp b/src/hotspot/os/solaris/os_solaris.hpp
index 56305846c58..c5fe31847f0 100644
--- a/src/hotspot/os/solaris/os_solaris.hpp
+++ b/src/hotspot/os/solaris/os_solaris.hpp
@@ -65,6 +65,8 @@ class Solaris {
static int_fnP_cond_tP _cond_destroy;
static int _cond_scope;
+ static bool _synchronization_initialized;
+
typedef uintptr_t lgrp_cookie_t;
typedef id_t lgrp_id_t;
typedef int lgrp_rsrc_t;
@@ -227,6 +229,8 @@ class Solaris {
static void set_cond_destroy(int_fnP_cond_tP func) { _cond_destroy = func; }
static void set_cond_scope(int scope) { _cond_scope = scope; }
+ static bool synchronization_initialized() { return _synchronization_initialized; }
+
static void set_lgrp_home(lgrp_home_func_t func) { _lgrp_home = func; }
static void set_lgrp_init(lgrp_init_func_t func) { _lgrp_init = func; }
static void set_lgrp_fini(lgrp_fini_func_t func) { _lgrp_fini = func; }
diff --git a/src/hotspot/os/solaris/threadCritical_solaris.cpp b/src/hotspot/os/solaris/threadCritical_solaris.cpp
index 53bd865b592..bb2c9e7e875 100644
--- a/src/hotspot/os/solaris/threadCritical_solaris.cpp
+++ b/src/hotspot/os/solaris/threadCritical_solaris.cpp
@@ -42,10 +42,9 @@
static mutex_t global_mut;
static thread_t global_mut_owner = -1;
static int global_mut_count = 0;
-static bool initialized = false;
ThreadCritical::ThreadCritical() {
- if (initialized) {
+ if (os::Solaris::synchronization_initialized()) {
thread_t owner = thr_self();
if (global_mut_owner != owner) {
if (os::Solaris::mutex_lock(&global_mut))
@@ -62,7 +61,7 @@ ThreadCritical::ThreadCritical() {
}
ThreadCritical::~ThreadCritical() {
- if (initialized) {
+ if (os::Solaris::synchronization_initialized()) {
assert(global_mut_owner == thr_self(), "must have correct owner");
assert(global_mut_count > 0, "must have correct count");
--global_mut_count;
@@ -75,12 +74,3 @@ ThreadCritical::~ThreadCritical() {
assert (Threads::number_of_threads() == 0, "valid only during initialization");
}
}
-
-void ThreadCritical::initialize() {
- // This method is called at the end of os::init(). Until
- // then, we don't do real locking.
- initialized = true;
-}
-
-void ThreadCritical::release() {
-}
diff --git a/src/hotspot/os/windows/decoder_windows.cpp b/src/hotspot/os/windows/decoder_windows.cpp
index 9e43367d494..ecb4c67934f 100644
--- a/src/hotspot/os/windows/decoder_windows.cpp
+++ b/src/hotspot/os/windows/decoder_windows.cpp
@@ -23,136 +23,28 @@
*/
#include "precompiled.hpp"
-#include "prims/jvm.h"
-#include "runtime/arguments.hpp"
-#include "runtime/os.hpp"
-#include "decoder_windows.hpp"
+#include "utilities/decoder.hpp"
+#include "symbolengine.hpp"
#include "windbghelp.hpp"
-WindowsDecoder::WindowsDecoder() {
- _can_decode_in_vm = true;
- _decoder_status = no_error;
- initialize();
+bool Decoder::decode(address addr, char* buf, int buflen, int* offset, const char* modulepath, bool demangle) {
+ return SymbolEngine::decode(addr, buf, buflen, offset, demangle);
}
-void WindowsDecoder::initialize() {
- if (!has_error()) {
- HANDLE hProcess = ::GetCurrentProcess();
- WindowsDbgHelp::symSetOptions(SYMOPT_UNDNAME | SYMOPT_DEFERRED_LOADS | SYMOPT_EXACT_SYMBOLS);
- if (!WindowsDbgHelp::symInitialize(hProcess, NULL, TRUE)) {
- _decoder_status = helper_init_error;
- return;
- }
-
- // set pdb search paths
- char paths[MAX_PATH];
- int len = sizeof(paths);
- if (!WindowsDbgHelp::symGetSearchPath(hProcess, paths, len)) {
- paths[0] = '\0';
- } else {
- // available spaces in path buffer
- len -= (int)strlen(paths);
- }
-
- char tmp_path[MAX_PATH];
- DWORD dwSize;
- HMODULE hJVM = ::GetModuleHandle("jvm.dll");
- tmp_path[0] = '\0';
- // append the path where jvm.dll is located
- if (hJVM != NULL && (dwSize = ::GetModuleFileName(hJVM, tmp_path, sizeof(tmp_path))) > 0) {
- while (dwSize > 0 && tmp_path[dwSize] != '\\') {
- dwSize --;
- }
-
- tmp_path[dwSize] = '\0';
-
- if (dwSize > 0 && len > (int)dwSize + 1) {
- strncat(paths, os::path_separator(), 1);
- strncat(paths, tmp_path, dwSize);
- len -= dwSize + 1;
- }
- }
-
- // append $JRE/bin. Arguments::get_java_home actually returns $JRE
- // path
- char *p = Arguments::get_java_home();
- assert(p != NULL, "empty java home");
- size_t java_home_len = strlen(p);
- if (len > (int)java_home_len + 5) {
- strncat(paths, os::path_separator(), 1);
- strncat(paths, p, java_home_len);
- strncat(paths, "\\bin", 4);
- len -= (int)(java_home_len + 5);
- }
-
- // append $JDK/bin path if it exists
- assert(java_home_len < MAX_PATH, "Invalid path length");
- // assume $JRE is under $JDK, construct $JDK/bin path and
- // see if it exists or not
- if (strncmp(&p[java_home_len - 3], "jre", 3) == 0) {
- strncpy(tmp_path, p, java_home_len - 3);
- tmp_path[java_home_len - 3] = '\0';
- strncat(tmp_path, "bin", 3);
-
- // if the directory exists
- DWORD dwAttrib = GetFileAttributes(tmp_path);
- if (dwAttrib != INVALID_FILE_ATTRIBUTES &&
- (dwAttrib & FILE_ATTRIBUTE_DIRECTORY)) {
- // tmp_path should have the same length as java_home_len, since we only
- // replaced 'jre' with 'bin'
- if (len > (int)java_home_len + 1) {
- strncat(paths, os::path_separator(), 1);
- strncat(paths, tmp_path, java_home_len);
- }
- }
- }
-
- WindowsDbgHelp::symSetSearchPath(hProcess, paths);
-
- // find out if jvm.dll contains private symbols, by decoding
- // current function and comparing the result
- address addr = (address)Decoder::demangle;
- char buf[MAX_PATH];
- if (decode(addr, buf, sizeof(buf), NULL, NULL, true /* demangle */)) {
- _can_decode_in_vm = !strcmp(buf, "Decoder::demangle");
- }
- }
+bool Decoder::decode(address addr, char* buf, int buflen, int* offset, const void* base) {
+ return SymbolEngine::decode(addr, buf, buflen, offset, true);
}
-void WindowsDecoder::uninitialize() {}
-
-bool WindowsDecoder::can_decode_C_frame_in_vm() const {
- return (!has_error() && _can_decode_in_vm);
+bool Decoder::get_source_info(address pc, char* buf, size_t buflen, int* line) {
+ return SymbolEngine::get_source_info(pc, buf, buflen, line);
}
-
-bool WindowsDecoder::decode(address addr, char *buf, int buflen, int* offset, const char* modulepath, bool demangle_name) {
- if (!has_error()) {
- PIMAGEHLP_SYMBOL64 pSymbol;
- char symbolInfo[MAX_PATH + sizeof(IMAGEHLP_SYMBOL64)];
- pSymbol = (PIMAGEHLP_SYMBOL64)symbolInfo;
- pSymbol->MaxNameLength = MAX_PATH;
- pSymbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64);
- DWORD64 displacement;
- if (WindowsDbgHelp::symGetSymFromAddr64(::GetCurrentProcess(), (DWORD64)addr, &displacement, pSymbol)) {
- if (buf != NULL) {
- if (!(demangle_name && demangle(pSymbol->Name, buf, buflen))) {
- jio_snprintf(buf, buflen, "%s", pSymbol->Name);
- }
- }
- if(offset != NULL) *offset = (int)displacement;
- return true;
- }
- }
- if (buf != NULL && buflen > 0) buf[0] = '\0';
- if (offset != NULL) *offset = -1;
- return false;
+bool Decoder::demangle(const char* symbol, char* buf, int buflen) {
+ return SymbolEngine::demangle(symbol, buf, buflen);
}
-bool WindowsDecoder::demangle(const char* symbol, char *buf, int buflen) {
- if (!has_error()) {
- return WindowsDbgHelp::unDecorateSymbolName(symbol, buf, buflen, UNDNAME_COMPLETE) > 0;
- }
- return false;
+void Decoder::print_state_on(outputStream* st) {
+ WindowsDbgHelp::print_state_on(st);
+ SymbolEngine::print_state_on(st);
}
diff --git a/src/hotspot/os/windows/os_windows.cpp b/src/hotspot/os/windows/os_windows.cpp
index 5c464768eea..dedbc3f2e9b 100644
--- a/src/hotspot/os/windows/os_windows.cpp
+++ b/src/hotspot/os/windows/os_windows.cpp
@@ -74,6 +74,7 @@
#include "utilities/growableArray.hpp"
#include "utilities/macros.hpp"
#include "utilities/vmError.hpp"
+#include "symbolengine.hpp"
#include "windbghelp.hpp"
@@ -134,6 +135,8 @@ BOOL WINAPI DllMain(HINSTANCE hinst, DWORD reason, LPVOID reserved) {
if (ForceTimeHighResolution) {
timeBeginPeriod(1L);
}
+ WindowsDbgHelp::pre_initialize();
+ SymbolEngine::pre_initialize();
break;
case DLL_PROCESS_DETACH:
if (ForceTimeHighResolution) {
@@ -428,7 +431,7 @@ static unsigned __stdcall thread_native_entry(Thread* thread) {
// When the VMThread gets here, the main thread may have already exited
// which frees the CodeHeap containing the Atomic::add code
if (thread != VMThread::vm_thread() && VMThread::vm_thread() != NULL) {
- Atomic::dec_ptr((intptr_t*)&os::win32::_os_thread_count);
+ Atomic::dec(&os::win32::_os_thread_count);
}
// If a thread has not deleted itself ("delete this") as part of its
@@ -634,7 +637,7 @@ bool os::create_thread(Thread* thread, ThreadType thr_type,
return NULL;
}
- Atomic::inc_ptr((intptr_t*)&os::win32::_os_thread_count);
+ Atomic::inc(&os::win32::_os_thread_count);
// Store info on the Win32 thread into the OSThread
osthread->set_thread_handle(thread_handle);
@@ -1319,6 +1322,8 @@ static int _print_module(const char* fname, address base_address,
void * os::dll_load(const char *name, char *ebuf, int ebuflen) {
void * result = LoadLibrary(name);
if (result != NULL) {
+ // Recalculate pdb search path if a DLL was loaded successfully.
+ SymbolEngine::recalc_search_path();
return result;
}
@@ -4032,6 +4037,8 @@ jint os::init_2(void) {
return JNI_ERR;
}
+ SymbolEngine::recalc_search_path();
+
return JNI_OK;
}
diff --git a/src/hotspot/os/windows/symbolengine.cpp b/src/hotspot/os/windows/symbolengine.cpp
new file mode 100644
index 00000000000..7f816dc365e
--- /dev/null
+++ b/src/hotspot/os/windows/symbolengine.cpp
@@ -0,0 +1,641 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "symbolengine.hpp"
+#include "utilities/debug.hpp"
+#include "windbghelp.hpp"
+
+#include
+
+#include
+#include
+
+
+
+// This code may be invoked normally but also as part of error reporting
+// In the latter case, we may run under tight memory constraints (native oom)
+// or in a stack overflow situation or the C heap may be corrupted. We may
+// run very early before VM initialization or very late when C exit handlers
+// run. In all these cases, callstacks would still be nice, so lets be robust.
+//
+// We need a number of buffers - for the pdb search path, module handle
+// lists, for demangled symbols, etc.
+//
+// These buffers, while typically small, may need to be large for corner
+// cases (e.g. templatized C++ symbols, or many DLLs loaded). Where do we
+// allocate them?
+//
+// We may be in error handling for a stack overflow, so lets not put them on
+// the stack.
+//
+// Dynamically allocating them may fail if we are handling a native OOM. It
+// is also a bit dangerous, as the C heap may be corrupted already.
+//
+// That leaves pre-allocating them globally, which is safe and should always
+// work (if we synchronize access) but incurs an undesirable footprint for
+// non-error cases.
+//
+// We follow a two-way strategy: Allocate the buffers on the C heap in a
+// reasonable large size. Failing that, fall back to static preallocated
+// buffers. The size of the latter is large enough to handle common scenarios
+// but small enough not to drive up the footprint too much (several kb).
+//
+// We keep these buffers around once allocated, for subsequent requests. This
+// means that by running the initialization early at a safe time - before
+// any error happens - buffers can be pre-allocated. This increases the chance
+// of useful callstacks in error scenarios in exchange for a some cycles spent
+// at startup. This behavior can be controlled with -XX:+InitializeDbgHelpEarly
+// and is off by default.
+
+///////
+
+// A simple buffer which attempts to allocate an optimal size but will
+// fall back to a static minimally sized array on allocation error.
+template
+class SimpleBufferWithFallback {
+ T _fallback_buffer[MINIMAL_CAPACITY];
+ T* _p;
+ int _capacity;
+
+ // A sentinel at the end of the buffer to catch overflows.
+ void imprint_sentinel() {
+ assert(_p && _capacity > 0, "Buffer must be allocated");
+ _p[_capacity - 1] = (T)'X';
+ _capacity --;
+ }
+
+public:
+
+ SimpleBufferWithFallback ()
+ : _p(NULL), _capacity(0)
+ {}
+
+ // Note: no destructor because these buffers should, once
+ // allocated, live until process end.
+ // ~SimpleBufferWithFallback()
+
+ // Note: We use raw ::malloc/::free here instead of os::malloc()/os::free
+ // to prevent circularities or secondary crashes during error reporting.
+ virtual void initialize () {
+ assert(_p == NULL && _capacity == 0, "Only call once.");
+ const size_t bytes = OPTIMAL_CAPACITY * sizeof(T);
+ T* q = (T*) ::malloc(bytes);
+ if (q != NULL) {
+ _p = q;
+ _capacity = OPTIMAL_CAPACITY;
+ } else {
+ _p = _fallback_buffer;
+ _capacity = (int)(sizeof(_fallback_buffer) / sizeof(T));
+ }
+ _p[0] = '\0';
+ imprint_sentinel();
+ }
+
+ // We need a way to reset the buffer to fallback size for one special
+ // case, where two buffers need to be of identical capacity.
+ void reset_to_fallback_capacity() {
+ if (_p != _fallback_buffer) {
+ ::free(_p);
+ }
+ _p = _fallback_buffer;
+ _capacity = (int)(sizeof(_fallback_buffer) / sizeof(T));
+ _p[0] = '\0';
+ imprint_sentinel();
+ }
+
+ T* ptr() { return _p; }
+ const T* ptr() const { return _p; }
+ int capacity() const { return _capacity; }
+
+#ifdef ASSERT
+ void check() const {
+ assert(_p[_capacity] == (T)'X', "sentinel lost");
+ }
+#else
+ void check() const {}
+#endif
+
+};
+
+////
+
+// ModuleHandleArray: a list holding module handles. Needs to be large enough
+// to hold one handle per loaded DLL.
+// Note: a standard OpenJDK loads normally ~30 libraries, including system
+// libraries, without third party libraries.
+
+typedef SimpleBufferWithFallback ModuleHandleArrayBase;
+
+class ModuleHandleArray : public ModuleHandleArrayBase {
+
+ int _num; // Number of handles in this array (may be < capacity).
+
+public:
+
+ void initialize() {
+ ModuleHandleArrayBase::initialize();
+ _num = 0;
+ }
+
+ int num() const { return _num; }
+ void set_num(int n) {
+ assert(n <= capacity(), "Too large");
+ _num = n;
+ }
+
+ // Compare with another list; returns true if all handles are equal (incl.
+ // sort order)
+ bool equals(const ModuleHandleArray& other) const {
+ if (_num != other._num) {
+ return false;
+ }
+ if (::memcmp(ptr(), other.ptr(), _num * sizeof(HMODULE)) != 0) {
+ return false;
+ }
+ return true;
+ }
+
+ // Copy content from other list.
+ void copy_content_from(ModuleHandleArray& other) {
+ assert(capacity() == other.capacity(), "Different capacities.");
+ memcpy(ptr(), other.ptr(), other._num * sizeof(HMODULE));
+ _num = other._num;
+ }
+
+};
+
+////
+
+// PathBuffer: a buffer to hold and work with a pdb search PATH - a concatenation
+// of multiple directories separated by ';'.
+// A single directory name can be (NTFS) as long as 32K, but in reality is
+// seldom larger than the (historical) MAX_PATH of 260.
+
+#define MINIMUM_PDB_PATH_LENGTH MAX_PATH * 4
+#define OPTIMAL_PDB_PATH_LENGTH MAX_PATH * 64
+
+typedef SimpleBufferWithFallback PathBufferBase;
+
+class PathBuffer: public PathBufferBase {
+public:
+
+ // Search PDB path for a directory. Search is case insensitive. Returns
+ // true if directory was found in the path, false otherwise.
+ bool contains_directory(const char* directory) {
+ if (ptr() == NULL) {
+ return false;
+ }
+ const size_t len = strlen(directory);
+ if (len == 0) {
+ return false;
+ }
+ char* p = ptr();
+ for(;;) {
+ char* q = strchr(p, ';');
+ if (q != NULL) {
+ if (len == (q - p)) {
+ if (strnicmp(p, directory, len) == 0) {
+ return true;
+ }
+ }
+ p = q + 1;
+ } else {
+ // tail
+ return stricmp(p, directory) == 0 ? true : false;
+ }
+ }
+ return false;
+ }
+
+ // Appends the given directory to the path. Returns false if internal
+ // buffer size was not sufficient.
+ bool append_directory(const char* directory) {
+ const size_t len = strlen(directory);
+ if (len == 0) {
+ return false;
+ }
+ char* p = ptr();
+ const size_t len_now = strlen(p);
+ const size_t needs_capacity = len_now + 1 + len + 1; // xxx;yy\0
+ if (needs_capacity > (size_t)capacity()) {
+ return false; // OOM
+ }
+ if (len_now > 0) { // Not the first path element.
+ p += len_now;
+ *p = ';';
+ p ++;
+ }
+ strcpy(p, directory);
+ return true;
+ }
+
+};
+
+// A simple buffer to hold one single file name. A file name can be (NTFS) as
+// long as 32K, but in reality is seldom larger than MAX_PATH.
+typedef SimpleBufferWithFallback FileNameBuffer;
+
+// A buffer to hold a C++ symbol. Usually small, but symbols may be larger for
+// templates.
+#define MINIMUM_SYMBOL_NAME_LEN 128
+#define OPTIMAL_SYMBOL_NAME_LEN 1024
+
+typedef SimpleBufferWithFallback SymbolBuffer;
+
+static struct {
+
+ // Two buffers to hold lists of loaded modules. handles across invocations of
+ // SymbolEngine::recalc_search_path().
+ ModuleHandleArray loaded_modules;
+ ModuleHandleArray last_loaded_modules;
+ // Buffer to retrieve and assemble the pdb search path.
+ PathBuffer search_path;
+ // Buffer to retrieve directory names for loaded modules.
+ FileNameBuffer dir_name;
+ // Buffer to retrieve decoded symbol information (in SymbolEngine::decode)
+ SymbolBuffer decode_buffer;
+
+ void initialize() {
+ search_path.initialize();
+ dir_name.initialize();
+ decode_buffer.initialize();
+
+ loaded_modules.initialize();
+ last_loaded_modules.initialize();
+
+ // Note: both module lists must have the same capacity. If one allocation
+ // did fail, let them both fall back to the fallback size.
+ if (loaded_modules.capacity() != last_loaded_modules.capacity()) {
+ loaded_modules.reset_to_fallback_capacity();
+ last_loaded_modules.reset_to_fallback_capacity();
+ }
+
+ assert(search_path.capacity() > 0 && dir_name.capacity() > 0 &&
+ decode_buffer.capacity() > 0 && loaded_modules.capacity() > 0 &&
+ last_loaded_modules.capacity() > 0, "Init error.");
+ }
+
+} g_buffers;
+
+
+// Scan the loaded modules.
+//
+// For each loaded module, add the directory it is located in to the pdb search
+// path, but avoid duplicates. Prior search path content is preserved.
+//
+// If p_search_path_was_updated is not NULL, points to a bool which, upon
+// successful return from the function, contains true if the search path
+// was updated, false if no update was needed because no new DLLs were
+// loaded or unloaded.
+//
+// Returns true for success, false for error.
+static bool recalc_search_path_locked(bool* p_search_path_was_updated) {
+
+ if (p_search_path_was_updated) {
+ *p_search_path_was_updated = false;
+ }
+
+ HANDLE hProcess = ::GetCurrentProcess();
+
+ BOOL success = false;
+
+ // 1) Retrieve current set search path.
+ // (PDB search path is a global setting and someone might have modified
+ // it, so take care not to remove directories, just to add our own).
+
+ if (!WindowsDbgHelp::symGetSearchPath(hProcess, g_buffers.search_path.ptr(),
+ (int)g_buffers.search_path.capacity())) {
+ return false;
+ }
+ DEBUG_ONLY(g_buffers.search_path.check();)
+
+ // 2) Retrieve list of modules handles of all currently loaded modules.
+ DWORD bytes_needed = 0;
+ const DWORD buffer_capacity_bytes = (DWORD)g_buffers.loaded_modules.capacity() * sizeof(HMODULE);
+ success = ::EnumProcessModules(hProcess, g_buffers.loaded_modules.ptr(),
+ buffer_capacity_bytes, &bytes_needed);
+ DEBUG_ONLY(g_buffers.loaded_modules.check();)
+
+ // Note: EnumProcessModules is sloppily defined in terms of whether a
+ // too-small output buffer counts as error. Will it truncate but still
+ // return TRUE? Nobody knows and the manpage is not telling. So we count
+ // truncation it as error, disregarding the return value.
+ if (!success || bytes_needed > buffer_capacity_bytes) {
+ return false;
+ } else {
+ const int num_modules = bytes_needed / sizeof(HMODULE);
+ g_buffers.loaded_modules.set_num(num_modules);
+ }
+
+ // Compare the list of module handles with the last list. If the lists are
+ // identical, no additional dlls were loaded and we can stop.
+ if (g_buffers.loaded_modules.equals(g_buffers.last_loaded_modules)) {
+ return true;
+ } else {
+ // Remember the new set of module handles and continue.
+ g_buffers.last_loaded_modules.copy_content_from(g_buffers.loaded_modules);
+ }
+
+ // 3) For each loaded module: retrieve directory from which it was loaded.
+ // Add directory to search path (but avoid duplicates).
+
+ bool did_modify_searchpath = false;
+
+ for (int i = 0; i < (int)g_buffers.loaded_modules.num(); i ++) {
+
+ const HMODULE hMod = g_buffers.loaded_modules.ptr()[i];
+ char* const filebuffer = g_buffers.dir_name.ptr();
+ const int file_buffer_capacity = g_buffers.dir_name.capacity();
+ const int len_returned = (int)::GetModuleFileName(hMod, filebuffer, (DWORD)file_buffer_capacity);
+ DEBUG_ONLY(g_buffers.dir_name.check();)
+ if (len_returned == 0) {
+ // Error. This is suspicious - this may happen if a module has just been
+ // unloaded concurrently after our call to EnumProcessModules and
+ // GetModuleFileName, but probably just indicates a coding error.
+ assert(false, "GetModuleFileName failed (%u)", ::GetLastError());
+ } else if (len_returned == file_buffer_capacity) {
+ // Truncation. Just skip this module and continue with the next module.
+ continue;
+ }
+
+ // Cut file name part off.
+ char* last_slash = ::strrchr(filebuffer, '\\');
+ if (last_slash == NULL) {
+ last_slash = ::strrchr(filebuffer, '/');
+ }
+ if (last_slash) {
+ *last_slash = '\0';
+ }
+
+ // If this is already part of the search path, ignore it, otherwise
+ // append to search path.
+ if (!g_buffers.search_path.contains_directory(filebuffer)) {
+ if (!g_buffers.search_path.append_directory(filebuffer)) {
+ return false; // oom
+ }
+ DEBUG_ONLY(g_buffers.search_path.check();)
+ did_modify_searchpath = true;
+ }
+
+ } // for each loaded module.
+
+ // If we did not modify the search path, nothing further needs to be done.
+ if (!did_modify_searchpath) {
+ return true;
+ }
+
+ // Set the search path to its new value.
+ if (!WindowsDbgHelp::symSetSearchPath(hProcess, g_buffers.search_path.ptr())) {
+ return false;
+ }
+
+ if (p_search_path_was_updated) {
+ *p_search_path_was_updated = true;
+ }
+
+ return true;
+
+}
+
+static bool demangle_locked(const char* symbol, char *buf, int buflen) {
+
+ return WindowsDbgHelp::unDecorateSymbolName(symbol, buf, buflen, UNDNAME_COMPLETE) > 0;
+
+}
+
+static bool decode_locked(const void* addr, char* buf, int buflen, int* offset, bool do_demangle) {
+
+ assert(g_buffers.decode_buffer.capacity() >= (sizeof(IMAGEHLP_SYMBOL64) + MINIMUM_SYMBOL_NAME_LEN),
+ "Decode buffer too small.");
+ assert(buf != NULL && buflen > 0 && offset != NULL, "invalid output buffer.");
+
+ DWORD64 displacement;
+ PIMAGEHLP_SYMBOL64 pSymbol = NULL;
+ bool success = false;
+
+ pSymbol = (PIMAGEHLP_SYMBOL64) g_buffers.decode_buffer.ptr();
+ pSymbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64);
+ pSymbol->MaxNameLength = (DWORD)(g_buffers.decode_buffer.capacity() - sizeof(IMAGEHLP_SYMBOL64) - 1);
+
+ // It is unclear how SymGetSymFromAddr64 handles truncation. Experiments
+ // show it will return TRUE but not zero terminate (which is a really bad
+ // combination). Lets be super careful.
+ ::memset(pSymbol->Name, 0, pSymbol->MaxNameLength); // To catch truncation.
+
+ if (WindowsDbgHelp::symGetSymFromAddr64(::GetCurrentProcess(), (DWORD64)addr, &displacement, pSymbol)) {
+ success = true;
+ if (pSymbol->Name[pSymbol->MaxNameLength - 1] != '\0') {
+ // Symbol was truncated. Do not attempt to demangle. Instead, zero terminate the
+ // truncated string. We still return success - the truncated string may still
+ // be usable for the caller.
+ pSymbol->Name[pSymbol->MaxNameLength - 1] = '\0';
+ do_demangle = false;
+ }
+
+ // Attempt to demangle.
+ if (do_demangle && demangle_locked(pSymbol->Name, buf, buflen)) {
+ // ok.
+ } else {
+ ::strncpy(buf, pSymbol->Name, buflen - 1);
+ }
+ buf[buflen - 1] = '\0';
+
+ *offset = (int)displacement;
+ }
+
+ DEBUG_ONLY(g_buffers.decode_buffer.check();)
+
+ return success;
+}
+
+static enum {
+ state_uninitialized = 0,
+ state_ready = 1,
+ state_error = 2
+} g_state = state_uninitialized;
+
+static void initialize() {
+
+ assert(g_state == state_uninitialized, "wrong sequence");
+ g_state = state_error;
+
+ // 1) Initialize buffers.
+ g_buffers.initialize();
+
+ // 1) Call SymInitialize
+ HANDLE hProcess = ::GetCurrentProcess();
+ WindowsDbgHelp::symSetOptions(SYMOPT_FAIL_CRITICAL_ERRORS | SYMOPT_DEFERRED_LOADS |
+ SYMOPT_EXACT_SYMBOLS | SYMOPT_LOAD_LINES);
+ if (!WindowsDbgHelp::symInitialize(hProcess, NULL, TRUE)) {
+ return;
+ }
+
+ // Note: we ignore any errors from this point on. The symbol engine may be
+ // usable enough.
+ g_state = state_ready;
+
+ (void)recalc_search_path_locked(NULL);
+
+}
+
+///////////////////// External functions //////////////////////////
+
+// All outside facing functions are synchronized. Also, we run
+// initialization on first touch.
+
+static CRITICAL_SECTION g_cs;
+
+namespace { // Do not export.
+ class SymbolEngineEntry {
+ public:
+ SymbolEngineEntry() {
+ ::EnterCriticalSection(&g_cs);
+ if (g_state == state_uninitialized) {
+ initialize();
+ }
+ }
+ ~SymbolEngineEntry() {
+ ::LeaveCriticalSection(&g_cs);
+ }
+ };
+}
+
+// Called at DLL_PROCESS_ATTACH.
+void SymbolEngine::pre_initialize() {
+ ::InitializeCriticalSection(&g_cs);
+}
+
+bool SymbolEngine::decode(const void* addr, char* buf, int buflen, int* offset, bool do_demangle) {
+
+ assert(buf != NULL && buflen > 0 && offset != NULL, "Argument error");
+ buf[0] = '\0';
+ *offset = -1;
+
+ if (addr == NULL) {
+ return false;
+ }
+
+ SymbolEngineEntry entry_guard;
+
+ // Try decoding the symbol once. If we fail, attempt to rebuild the
+ // symbol search path - maybe the pc points to a dll whose pdb file is
+ // outside our search path. Then do attempt the decode again.
+ bool success = decode_locked(addr, buf, buflen, offset, do_demangle);
+ if (!success) {
+ bool did_update_search_path = false;
+ if (recalc_search_path_locked(&did_update_search_path)) {
+ if (did_update_search_path) {
+ success = decode_locked(addr, buf, buflen, offset, do_demangle);
+ }
+ }
+ }
+
+ return success;
+
+}
+
+bool SymbolEngine::demangle(const char* symbol, char *buf, int buflen) {
+
+ SymbolEngineEntry entry_guard;
+
+ return demangle_locked(symbol, buf, buflen);
+
+}
+
+bool SymbolEngine::recalc_search_path(bool* p_search_path_was_updated) {
+
+ SymbolEngineEntry entry_guard;
+
+ return recalc_search_path_locked(p_search_path_was_updated);
+
+}
+
+bool SymbolEngine::get_source_info(const void* addr, char* buf, size_t buflen,
+ int* line_no)
+{
+ assert(buf != NULL && buflen > 0 && line_no != NULL, "Argument error");
+ buf[0] = '\0';
+ *line_no = -1;
+
+ if (addr == NULL) {
+ return false;
+ }
+
+ SymbolEngineEntry entry_guard;
+
+ IMAGEHLP_LINE64 lineinfo;
+ memset(&lineinfo, 0, sizeof(lineinfo));
+ lineinfo.SizeOfStruct = sizeof(lineinfo);
+ DWORD displacement;
+ if (WindowsDbgHelp::symGetLineFromAddr64(::GetCurrentProcess(), (DWORD64)addr,
+ &displacement, &lineinfo)) {
+ if (buf != NULL && buflen > 0 && lineinfo.FileName != NULL) {
+ // We only return the file name, not the whole path.
+ char* p = lineinfo.FileName;
+ char* q = strrchr(lineinfo.FileName, '\\');
+ if (q) {
+ p = q + 1;
+ }
+ ::strncpy(buf, p, buflen - 1);
+ buf[buflen - 1] = '\0';
+ }
+ if (line_no != 0) {
+ *line_no = lineinfo.LineNumber;
+ }
+ return true;
+ }
+ return false;
+}
+
+// Print one liner describing state (if library loaded, which functions are
+// missing - if any, and the dbhelp API version)
+void SymbolEngine::print_state_on(outputStream* st) {
+
+ SymbolEngineEntry entry_guard;
+
+ st->print("symbol engine: ");
+
+ if (g_state == state_uninitialized) {
+ st->print("uninitialized.");
+ } else if (g_state == state_error) {
+ st->print("initialization error.");
+ } else {
+ st->print("initialized successfully");
+ st->print(" - sym options: 0x%X", WindowsDbgHelp::symGetOptions());
+ st->print(" - pdb path: ");
+ if (WindowsDbgHelp::symGetSearchPath(::GetCurrentProcess(),
+ g_buffers.search_path.ptr(),
+ (int)g_buffers.search_path.capacity())) {
+ st->print_raw(g_buffers.search_path.ptr());
+ } else {
+ st->print_raw("(cannot be retrieved)");
+ }
+ }
+ st->cr();
+
+}
diff --git a/src/hotspot/os/windows/symbolengine.hpp b/src/hotspot/os/windows/symbolengine.hpp
new file mode 100644
index 00000000000..c01bd9fc93c
--- /dev/null
+++ b/src/hotspot/os/windows/symbolengine.hpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_WINDOWS_VM_SYMBOLENGINE_HPP
+#define OS_WINDOWS_VM_SYMBOLENGINE_HPP
+
+class outputStream;
+
+namespace SymbolEngine {
+
+ bool decode(const void* addr, char* buf, int buflen, int* offset, bool do_demangle);
+
+ bool demangle(const char* symbol, char *buf, int buflen);
+
+ // given an address, attempts to retrieve the source file and line number.
+ bool get_source_info(const void* addr, char* filename, size_t filename_len,
+ int* line_no);
+
+ // Scan the loaded modules. Add all directories for all loaded modules
+ // to the current search path, unless they are already part of the search
+ // path. Prior search path content is preserved, directories are only
+ // added, never removed.
+ // If p_search_path_was_updated is not NULL, points to a bool which, upon
+ // successful return from the function, contains true if the search path
+ // was updated, false if no update was needed because no new DLLs were
+ // loaded or unloaded.
+ // Returns true for success, false for error.
+ bool recalc_search_path(bool* p_search_path_was_updated = NULL);
+
+ // Print one liner describing state (if library loaded, which functions are
+ // missing - if any, and the dbhelp API version)
+ void print_state_on(outputStream* st);
+
+ // Call at DLL_PROCESS_ATTACH.
+ void pre_initialize();
+
+};
+
+#endif // #ifndef OS_WINDOWS_VM_SYMBOLENGINE_HPP
+
+
diff --git a/src/hotspot/os/windows/threadCritical_windows.cpp b/src/hotspot/os/windows/threadCritical_windows.cpp
index b432f7bb078..66f19e91fc0 100644
--- a/src/hotspot/os/windows/threadCritical_windows.cpp
+++ b/src/hotspot/os/windows/threadCritical_windows.cpp
@@ -51,16 +51,6 @@ static DWORD lock_owner = -1;
// and found them ~30 times slower than the critical region code.
//
-void ThreadCritical::initialize() {
-}
-
-void ThreadCritical::release() {
- assert(lock_owner == -1, "Mutex being deleted while owned.");
- assert(lock_count == -1, "Mutex being deleted while recursively locked");
- assert(lock_event != NULL, "Sanity check");
- CloseHandle(lock_event);
-}
-
ThreadCritical::ThreadCritical() {
DWORD current_thread = GetCurrentThreadId();
diff --git a/src/hotspot/os/windows/windbghelp.cpp b/src/hotspot/os/windows/windbghelp.cpp
index f7119583ed8..e55ecc1d5b3 100644
--- a/src/hotspot/os/windows/windbghelp.cpp
+++ b/src/hotspot/os/windows/windbghelp.cpp
@@ -116,38 +116,36 @@ static void initialize() {
}
+
///////////////////// External functions //////////////////////////
// All outside facing functions are synchronized. Also, we run
// initialization on first touch.
+static CRITICAL_SECTION g_cs;
-// Call InitializeCriticalSection as early as possible.
-class CritSect {
- CRITICAL_SECTION cs;
-public:
- CritSect() { ::InitializeCriticalSection(&cs); }
- void enter() { ::EnterCriticalSection(&cs); }
- void leave() { ::LeaveCriticalSection(&cs); }
-};
-
-static CritSect g_cs;
-
-class EntryGuard {
-public:
- EntryGuard() {
- g_cs.enter();
- if (g_state == state_uninitialized) {
- initialize();
+namespace { // Do not export.
+ class WindowsDbgHelpEntry {
+ public:
+ WindowsDbgHelpEntry() {
+ ::EnterCriticalSection(&g_cs);
+ if (g_state == state_uninitialized) {
+ initialize();
+ }
}
- }
- ~EntryGuard() {
- g_cs.leave();
- }
-};
+ ~WindowsDbgHelpEntry() {
+ ::LeaveCriticalSection(&g_cs);
+ }
+ };
+}
+
+// Called at DLL_PROCESS_ATTACH.
+void WindowsDbgHelp::pre_initialize() {
+ ::InitializeCriticalSection(&g_cs);
+}
DWORD WindowsDbgHelp::symSetOptions(DWORD arg) {
- EntryGuard entry_guard;
+ WindowsDbgHelpEntry entry_guard;
if (g_pfn_SymSetOptions != NULL) {
return g_pfn_SymSetOptions(arg);
}
@@ -155,7 +153,7 @@ DWORD WindowsDbgHelp::symSetOptions(DWORD arg) {
}
DWORD WindowsDbgHelp::symGetOptions(void) {
- EntryGuard entry_guard;
+ WindowsDbgHelpEntry entry_guard;
if (g_pfn_SymGetOptions != NULL) {
return g_pfn_SymGetOptions();
}
@@ -163,7 +161,7 @@ DWORD WindowsDbgHelp::symGetOptions(void) {
}
BOOL WindowsDbgHelp::symInitialize(HANDLE hProcess, PCTSTR UserSearchPath, BOOL fInvadeProcess) {
- EntryGuard entry_guard;
+ WindowsDbgHelpEntry entry_guard;
if (g_pfn_SymInitialize != NULL) {
return g_pfn_SymInitialize(hProcess, UserSearchPath, fInvadeProcess);
}
@@ -172,7 +170,7 @@ BOOL WindowsDbgHelp::symInitialize(HANDLE hProcess, PCTSTR UserSearchPath, BOOL
BOOL WindowsDbgHelp::symGetSymFromAddr64(HANDLE hProcess, DWORD64 the_address,
PDWORD64 Displacement, PIMAGEHLP_SYMBOL64 Symbol) {
- EntryGuard entry_guard;
+ WindowsDbgHelpEntry entry_guard;
if (g_pfn_SymGetSymFromAddr64 != NULL) {
return g_pfn_SymGetSymFromAddr64(hProcess, the_address, Displacement, Symbol);
}
@@ -181,7 +179,7 @@ BOOL WindowsDbgHelp::symGetSymFromAddr64(HANDLE hProcess, DWORD64 the_address,
DWORD WindowsDbgHelp::unDecorateSymbolName(const char* DecoratedName, char* UnDecoratedName,
DWORD UndecoratedLength, DWORD Flags) {
- EntryGuard entry_guard;
+ WindowsDbgHelpEntry entry_guard;
if (g_pfn_UnDecorateSymbolName != NULL) {
return g_pfn_UnDecorateSymbolName(DecoratedName, UnDecoratedName, UndecoratedLength, Flags);
}
@@ -192,7 +190,7 @@ DWORD WindowsDbgHelp::unDecorateSymbolName(const char* DecoratedName, char* UnDe
}
BOOL WindowsDbgHelp::symSetSearchPath(HANDLE hProcess, PCTSTR SearchPath) {
- EntryGuard entry_guard;
+ WindowsDbgHelpEntry entry_guard;
if (g_pfn_SymSetSearchPath != NULL) {
return g_pfn_SymSetSearchPath(hProcess, SearchPath);
}
@@ -200,7 +198,7 @@ BOOL WindowsDbgHelp::symSetSearchPath(HANDLE hProcess, PCTSTR SearchPath) {
}
BOOL WindowsDbgHelp::symGetSearchPath(HANDLE hProcess, PTSTR SearchPath, int SearchPathLength) {
- EntryGuard entry_guard;
+ WindowsDbgHelpEntry entry_guard;
if (g_pfn_SymGetSearchPath != NULL) {
return g_pfn_SymGetSearchPath(hProcess, SearchPath, SearchPathLength);
}
@@ -212,7 +210,7 @@ BOOL WindowsDbgHelp::stackWalk64(DWORD MachineType,
HANDLE hThread,
LPSTACKFRAME64 StackFrame,
PVOID ContextRecord) {
- EntryGuard entry_guard;
+ WindowsDbgHelpEntry entry_guard;
if (g_pfn_StackWalk64 != NULL) {
return g_pfn_StackWalk64(MachineType, hProcess, hThread, StackFrame,
ContextRecord,
@@ -226,7 +224,7 @@ BOOL WindowsDbgHelp::stackWalk64(DWORD MachineType,
}
PVOID WindowsDbgHelp::symFunctionTableAccess64(HANDLE hProcess, DWORD64 AddrBase) {
- EntryGuard entry_guard;
+ WindowsDbgHelpEntry entry_guard;
if (g_pfn_SymFunctionTableAccess64 != NULL) {
return g_pfn_SymFunctionTableAccess64(hProcess, AddrBase);
}
@@ -234,7 +232,7 @@ PVOID WindowsDbgHelp::symFunctionTableAccess64(HANDLE hProcess, DWORD64 AddrBase
}
DWORD64 WindowsDbgHelp::symGetModuleBase64(HANDLE hProcess, DWORD64 dwAddr) {
- EntryGuard entry_guard;
+ WindowsDbgHelpEntry entry_guard;
if (g_pfn_SymGetModuleBase64 != NULL) {
return g_pfn_SymGetModuleBase64(hProcess, dwAddr);
}
@@ -245,7 +243,7 @@ BOOL WindowsDbgHelp::miniDumpWriteDump(HANDLE hProcess, DWORD ProcessId, HANDLE
MINIDUMP_TYPE DumpType, PMINIDUMP_EXCEPTION_INFORMATION ExceptionParam,
PMINIDUMP_USER_STREAM_INFORMATION UserStreamParam,
PMINIDUMP_CALLBACK_INFORMATION CallbackParam) {
- EntryGuard entry_guard;
+ WindowsDbgHelpEntry entry_guard;
if (g_pfn_MiniDumpWriteDump != NULL) {
return g_pfn_MiniDumpWriteDump(hProcess, ProcessId, hFile, DumpType,
ExceptionParam, UserStreamParam, CallbackParam);
@@ -255,7 +253,7 @@ BOOL WindowsDbgHelp::miniDumpWriteDump(HANDLE hProcess, DWORD ProcessId, HANDLE
BOOL WindowsDbgHelp::symGetLineFromAddr64(HANDLE hProcess, DWORD64 dwAddr,
PDWORD pdwDisplacement, PIMAGEHLP_LINE64 Line) {
- EntryGuard entry_guard;
+ WindowsDbgHelpEntry entry_guard;
if (g_pfn_SymGetLineFromAddr64 != NULL) {
return g_pfn_SymGetLineFromAddr64(hProcess, dwAddr, pdwDisplacement, Line);
}
diff --git a/src/hotspot/os/windows/windbghelp.hpp b/src/hotspot/os/windows/windbghelp.hpp
index 1aaa9e4965a..97dddbc9aa5 100644
--- a/src/hotspot/os/windows/windbghelp.hpp
+++ b/src/hotspot/os/windows/windbghelp.hpp
@@ -66,6 +66,9 @@ namespace WindowsDbgHelp {
// missing - if any, and the dbhelp API version)
void print_state_on(outputStream* st);
+ // Call at DLL_PROCESS_ATTACH.
+ void pre_initialize();
+
};
diff --git a/src/hotspot/os_cpu/aix_ppc/atomic_aix_ppc.hpp b/src/hotspot/os_cpu/aix_ppc/atomic_aix_ppc.hpp
index cbf54b82c0b..ec124e8e2a6 100644
--- a/src/hotspot/os_cpu/aix_ppc/atomic_aix_ppc.hpp
+++ b/src/hotspot/os_cpu/aix_ppc/atomic_aix_ppc.hpp
@@ -34,22 +34,6 @@
// Implementation of class atomic
-inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
-inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
-
-inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
-inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
-
-inline jlong Atomic::load(const volatile jlong* src) { return *src; }
-
//
// machine barrier instructions:
//
@@ -148,90 +132,15 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
return result;
}
-
-inline void Atomic::inc (volatile jint* dest) {
-
- unsigned int temp;
-
- __asm__ __volatile__ (
- strasm_nobarrier
- "1: lwarx %0, 0, %2 \n"
- " addic %0, %0, 1 \n"
- " stwcx. %0, 0, %2 \n"
- " bne- 1b \n"
- strasm_nobarrier
- : /*%0*/"=&r" (temp), "=m" (*dest)
- : /*%2*/"r" (dest), "m" (*dest)
- : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-
- long temp;
-
- __asm__ __volatile__ (
- strasm_nobarrier
- "1: ldarx %0, 0, %2 \n"
- " addic %0, %0, 1 \n"
- " stdcx. %0, 0, %2 \n"
- " bne- 1b \n"
- strasm_nobarrier
- : /*%0*/"=&r" (temp), "=m" (*dest)
- : /*%2*/"r" (dest), "m" (*dest)
- : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::inc_ptr(volatile void* dest) {
- inc_ptr((volatile intptr_t*)dest);
-}
-
-
-inline void Atomic::dec (volatile jint* dest) {
-
- unsigned int temp;
-
- __asm__ __volatile__ (
- strasm_nobarrier
- "1: lwarx %0, 0, %2 \n"
- " addic %0, %0, -1 \n"
- " stwcx. %0, 0, %2 \n"
- " bne- 1b \n"
- strasm_nobarrier
- : /*%0*/"=&r" (temp), "=m" (*dest)
- : /*%2*/"r" (dest), "m" (*dest)
- : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-
- long temp;
-
- __asm__ __volatile__ (
- strasm_nobarrier
- "1: ldarx %0, 0, %2 \n"
- " addic %0, %0, -1 \n"
- " stdcx. %0, 0, %2 \n"
- " bne- 1b \n"
- strasm_nobarrier
- : /*%0*/"=&r" (temp), "=m" (*dest)
- : /*%2*/"r" (dest), "m" (*dest)
- : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::dec_ptr(volatile void* dest) {
- dec_ptr((volatile intptr_t*)dest);
-}
-
-inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
-
- // Note that xchg_ptr doesn't necessarily do an acquire
+template<>
+template
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(4 == sizeof(T));
+ // Note that xchg doesn't necessarily do an acquire
// (see synchronizer.cpp).
- unsigned int old_value;
+ T old_value;
const uint64_t zero = 0;
__asm__ __volatile__ (
@@ -259,15 +168,18 @@ inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
"memory"
);
- return (jint) old_value;
+ return old_value;
}
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
-
- // Note that xchg_ptr doesn't necessarily do an acquire
+template<>
+template
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(8 == sizeof(T));
+ // Note that xchg doesn't necessarily do an acquire
// (see synchronizer.cpp).
- long old_value;
+ T old_value;
const uint64_t zero = 0;
__asm__ __volatile__ (
@@ -295,11 +207,7 @@ inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* des
"memory"
);
- return (intptr_t) old_value;
-}
-
-inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
- return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
+ return old_value;
}
inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {
diff --git a/src/hotspot/os_cpu/aix_ppc/orderAccess_aix_ppc.inline.hpp b/src/hotspot/os_cpu/aix_ppc/orderAccess_aix_ppc.inline.hpp
index b6214c15aea..7e71e11d2a3 100644
--- a/src/hotspot/os_cpu/aix_ppc/orderAccess_aix_ppc.inline.hpp
+++ b/src/hotspot/os_cpu/aix_ppc/orderAccess_aix_ppc.inline.hpp
@@ -78,16 +78,17 @@ inline void OrderAccess::acquire() { inlasm_lwsync(); }
inline void OrderAccess::release() { inlasm_lwsync(); }
inline void OrderAccess::fence() { inlasm_sync(); }
-template<> inline jbyte OrderAccess::specialized_load_acquire (const volatile jbyte* p) { register jbyte t = load(p); inlasm_acquire_reg(t); return t; }
-template<> inline jshort OrderAccess::specialized_load_acquire(const volatile jshort* p) { register jshort t = load(p); inlasm_acquire_reg(t); return t; }
-template<> inline jint OrderAccess::specialized_load_acquire (const volatile jint* p) { register jint t = load(p); inlasm_acquire_reg(t); return t; }
-template<> inline jlong OrderAccess::specialized_load_acquire (const volatile jlong* p) { register jlong t = load(p); inlasm_acquire_reg(t); return t; }
+template
+struct OrderAccess::PlatformOrderedLoad
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ T operator()(const volatile T* p) const { register T t = Atomic::load(p); inlasm_acquire_reg(t); return t; }
+};
#undef inlasm_sync
#undef inlasm_lwsync
#undef inlasm_eieio
#undef inlasm_isync
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
-
#endif // OS_CPU_AIX_OJDKPPC_VM_ORDERACCESS_AIX_PPC_INLINE_HPP
diff --git a/src/hotspot/os_cpu/bsd_x86/atomic_bsd_x86.hpp b/src/hotspot/os_cpu/bsd_x86/atomic_bsd_x86.hpp
index 77528598d15..458dcf242c8 100644
--- a/src/hotspot/os_cpu/bsd_x86/atomic_bsd_x86.hpp
+++ b/src/hotspot/os_cpu/bsd_x86/atomic_bsd_x86.hpp
@@ -27,19 +27,6 @@
// Implementation of class atomic
-inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
-
-inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
-
-
template
struct Atomic::PlatformAdd
: Atomic::FetchAndAdd >
@@ -61,25 +48,11 @@ inline D Atomic::PlatformAdd<4>::fetch_and_add(I add_value, D volatile* dest) co
return old_value;
}
-inline void Atomic::inc (volatile jint* dest) {
- __asm__ volatile ( "lock addl $1,(%0)" :
- : "r" (dest) : "cc", "memory");
-}
-
-inline void Atomic::inc_ptr(volatile void* dest) {
- inc_ptr((volatile intptr_t*)dest);
-}
-
-inline void Atomic::dec (volatile jint* dest) {
- __asm__ volatile ( "lock subl $1,(%0)" :
- : "r" (dest) : "cc", "memory");
-}
-
-inline void Atomic::dec_ptr(volatile void* dest) {
- dec_ptr((volatile intptr_t*)dest);
-}
-
-inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
+template<>
+template
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(4 == sizeof(T));
__asm__ volatile ( "xchgl (%2),%0"
: "=r" (exchange_value)
: "0" (exchange_value), "r" (dest)
@@ -87,10 +60,6 @@ inline jint Atomic::xchg (jint exchange_value, volatile jint* des
return exchange_value;
}
-inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
- return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
-}
-
template<>
template
inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
@@ -120,9 +89,6 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
}
#ifdef AMD64
-inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
-inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
-
template<>
template
inline D Atomic::PlatformAdd<8>::fetch_and_add(I add_value, D volatile* dest) const {
@@ -136,21 +102,11 @@ inline D Atomic::PlatformAdd<8>::fetch_and_add(I add_value, D volatile* dest) co
return old_value;
}
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
- __asm__ __volatile__ ( "lock addq $1,(%0)"
- :
- : "r" (dest)
- : "cc", "memory");
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
- __asm__ __volatile__ ( "lock subq $1,(%0)"
- :
- : "r" (dest)
- : "cc", "memory");
-}
-
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
+template<>
+template
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(8 == sizeof(T));
__asm__ __volatile__ ("xchgq (%2),%0"
: "=r" (exchange_value)
: "0" (exchange_value), "r" (dest)
@@ -172,22 +128,8 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
return exchange_value;
}
-inline jlong Atomic::load(const volatile jlong* src) { return *src; }
-
#else // !AMD64
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
- inc((volatile jint*)dest);
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
- dec((volatile jint*)dest);
-}
-
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
- return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
-}
-
extern "C" {
// defined in bsd_x86.s
jlong _Atomic_cmpxchg_long(jlong, volatile jlong*, jlong, bool);
@@ -204,18 +146,21 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
return cmpxchg_using_helper(_Atomic_cmpxchg_long, exchange_value, dest, compare_value);
}
-inline jlong Atomic::load(const volatile jlong* src) {
+template<>
+template
+inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
+ STATIC_ASSERT(8 == sizeof(T));
volatile jlong dest;
- _Atomic_move_long(src, &dest);
- return dest;
+ _Atomic_move_long(reinterpret_cast(src), reinterpret_cast(&dest));
+ return PrimitiveConversions::cast(dest);
}
-inline void Atomic::store(jlong store_value, jlong* dest) {
- _Atomic_move_long((volatile jlong*)&store_value, (volatile jlong*)dest);
-}
-
-inline void Atomic::store(jlong store_value, volatile jlong* dest) {
- _Atomic_move_long((volatile jlong*)&store_value, dest);
+template<>
+template
+inline void Atomic::PlatformStore<8>::operator()(T store_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(8 == sizeof(T));
+ _Atomic_move_long(reinterpret_cast(&store_value), reinterpret_cast(dest));
}
#endif // AMD64
diff --git a/src/hotspot/os_cpu/bsd_x86/orderAccess_bsd_x86.inline.hpp b/src/hotspot/os_cpu/bsd_x86/orderAccess_bsd_x86.inline.hpp
index 038d6f985d5..d2b0674a36d 100644
--- a/src/hotspot/os_cpu/bsd_x86/orderAccess_bsd_x86.inline.hpp
+++ b/src/hotspot/os_cpu/bsd_x86/orderAccess_bsd_x86.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -64,46 +64,57 @@ inline void OrderAccess::fence() {
}
template<>
-inline void OrderAccess::specialized_release_store_fence (volatile jbyte* p, jbyte v) {
- __asm__ volatile ( "xchgb (%2),%0"
- : "=q" (v)
- : "0" (v), "r" (p)
- : "memory");
-}
+struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE>
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ void operator()(T v, volatile T* p) const {
+ __asm__ volatile ( "xchgb (%2),%0"
+ : "=q" (v)
+ : "0" (v), "r" (p)
+ : "memory");
+ }
+};
+
template<>
-inline void OrderAccess::specialized_release_store_fence(volatile jshort* p, jshort v) {
- __asm__ volatile ( "xchgw (%2),%0"
- : "=r" (v)
- : "0" (v), "r" (p)
- : "memory");
-}
+struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE>
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ void operator()(T v, volatile T* p) const {
+ __asm__ volatile ( "xchgw (%2),%0"
+ : "=r" (v)
+ : "0" (v), "r" (p)
+ : "memory");
+ }
+};
+
template<>
-inline void OrderAccess::specialized_release_store_fence (volatile jint* p, jint v) {
- __asm__ volatile ( "xchgl (%2),%0"
- : "=r" (v)
- : "0" (v), "r" (p)
- : "memory");
-}
+struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE>
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ void operator()(T v, volatile T* p) const {
+ __asm__ volatile ( "xchgl (%2),%0"
+ : "=r" (v)
+ : "0" (v), "r" (p)
+ : "memory");
+ }
+};
#ifdef AMD64
template<>
-inline void OrderAccess::specialized_release_store_fence (volatile jlong* p, jlong v) {
- __asm__ volatile ( "xchgq (%2), %0"
- : "=r" (v)
- : "0" (v), "r" (p)
- : "memory");
-}
+struct OrderAccess::PlatformOrderedStore<8, RELEASE_X_FENCE>
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ void operator()(T v, volatile T* p) const {
+ __asm__ volatile ( "xchgq (%2), %0"
+ : "=r" (v)
+ : "0" (v), "r" (p)
+ : "memory");
+ }
+};
#endif // AMD64
-template<>
-inline void OrderAccess::specialized_release_store_fence (volatile jfloat* p, jfloat v) {
- release_store_fence((volatile jint*)p, jint_cast(v));
-}
-template<>
-inline void OrderAccess::specialized_release_store_fence(volatile jdouble* p, jdouble v) {
- release_store_fence((volatile jlong*)p, jlong_cast(v));
-}
-
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
-
#endif // OS_CPU_BSD_X86_VM_ORDERACCESS_BSD_X86_INLINE_HPP
diff --git a/src/hotspot/os_cpu/bsd_zero/atomic_bsd_zero.hpp b/src/hotspot/os_cpu/bsd_zero/atomic_bsd_zero.hpp
index 7fcaf785f9a..0daea3c6c4b 100644
--- a/src/hotspot/os_cpu/bsd_zero/atomic_bsd_zero.hpp
+++ b/src/hotspot/os_cpu/bsd_zero/atomic_bsd_zero.hpp
@@ -87,7 +87,7 @@ static inline int m68k_add_and_fetch(int add_value, volatile int *ptr) {
/* Atomically write VALUE into `*PTR' and returns the previous
contents of `*PTR'. */
-static inline int m68k_lock_test_and_set(volatile int *ptr, int newval) {
+static inline int m68k_lock_test_and_set(int newval, volatile int *ptr) {
for (;;) {
// Loop until success.
int prev = *ptr;
@@ -148,7 +148,7 @@ static inline int arm_add_and_fetch(int add_value, volatile int *ptr) {
/* Atomically write VALUE into `*PTR' and returns the previous
contents of `*PTR'. */
-static inline int arm_lock_test_and_set(volatile int *ptr, int newval) {
+static inline int arm_lock_test_and_set(int newval, volatile int *ptr) {
for (;;) {
// Loop until a __kernel_cmpxchg succeeds.
int prev = *ptr;
@@ -159,20 +159,6 @@ static inline int arm_lock_test_and_set(volatile int *ptr, int newval) {
}
#endif // ARM
-inline void Atomic::store(jint store_value, volatile jint* dest) {
-#if !defined(ARM) && !defined(M68K)
- __sync_synchronize();
-#endif
- *dest = store_value;
-}
-
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) {
-#if !defined(ARM) && !defined(M68K)
- __sync_synchronize();
-#endif
- *dest = store_value;
-}
-
template
struct Atomic::PlatformAdd
: Atomic::AddAndFetch >
@@ -207,42 +193,22 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
return __sync_add_and_fetch(dest, add_value);
}
-inline void Atomic::inc(volatile jint* dest) {
- add(1, dest);
-}
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
- add_ptr(1, dest);
-}
-
-inline void Atomic::inc_ptr(volatile void* dest) {
- add_ptr(1, dest);
-}
-
-inline void Atomic::dec(volatile jint* dest) {
- add(-1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
- add_ptr(-1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile void* dest) {
- add_ptr(-1, dest);
-}
-
-inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
+template<>
+template
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(4 == sizeof(T));
#ifdef ARM
- return arm_lock_test_and_set(dest, exchange_value);
+ return xchg_using_helper(arm_lock_test_and_set, exchange_value, dest);
#else
#ifdef M68K
- return m68k_lock_test_and_set(dest, exchange_value);
+ return xchg_using_helper(m68k_lock_test_and_set, exchange_value, dest);
#else
// __sync_lock_test_and_set is a bizarrely named atomic exchange
// operation. Note that some platforms only support this with the
// limitation that the only valid value to store is the immediate
// constant 1. There is a test for this in JNI_CreateJavaVM().
- jint result = __sync_lock_test_and_set (dest, exchange_value);
+ T result = __sync_lock_test_and_set (dest, exchange_value);
// All atomic operations are expected to be full memory barriers
// (see atomic.hpp). However, __sync_lock_test_and_set is not
// a full memory barrier, but an acquire barrier. Hence, this added
@@ -253,24 +219,14 @@ inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
#endif // ARM
}
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value,
- volatile intptr_t* dest) {
-#ifdef ARM
- return arm_lock_test_and_set(dest, exchange_value);
-#else
-#ifdef M68K
- return m68k_lock_test_and_set(dest, exchange_value);
-#else
- intptr_t result = __sync_lock_test_and_set (dest, exchange_value);
+template<>
+template
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(8 == sizeof(T));
+ T result = __sync_lock_test_and_set (dest, exchange_value);
__sync_synchronize();
return result;
-#endif // M68K
-#endif // ARM
-}
-
-inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
- return (void *) xchg_ptr((intptr_t) exchange_value,
- (volatile intptr_t*) dest);
}
// No direct support for cmpxchg of bytes; emulate using int.
@@ -305,18 +261,21 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
}
-inline jlong Atomic::load(const volatile jlong* src) {
+template<>
+template
+inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
+ STATIC_ASSERT(8 == sizeof(T));
volatile jlong dest;
- os::atomic_copy64(src, &dest);
- return dest;
+ os::atomic_copy64(reinterpret_cast(src), reinterpret_cast(&dest));
+ return PrimitiveConversions::cast(dest);
}
-inline void Atomic::store(jlong store_value, jlong* dest) {
- os::atomic_copy64((volatile jlong*)&store_value, (volatile jlong*)dest);
-}
-
-inline void Atomic::store(jlong store_value, volatile jlong* dest) {
- os::atomic_copy64((volatile jlong*)&store_value, dest);
+template<>
+template
+inline void Atomic::PlatformStore<8>::operator()(T store_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(8 == sizeof(T));
+ os::atomic_copy64(reinterpret_cast(&store_value), reinterpret_cast(dest));
}
#endif // OS_CPU_BSD_ZERO_VM_ATOMIC_BSD_ZERO_HPP
diff --git a/src/hotspot/os_cpu/bsd_zero/orderAccess_bsd_zero.inline.hpp b/src/hotspot/os_cpu/bsd_zero/orderAccess_bsd_zero.inline.hpp
index fb3017ce9d1..96ea19a4a70 100644
--- a/src/hotspot/os_cpu/bsd_zero/orderAccess_bsd_zero.inline.hpp
+++ b/src/hotspot/os_cpu/bsd_zero/orderAccess_bsd_zero.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright 2007, 2008, 2009 Red Hat, Inc.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -74,6 +74,4 @@ inline void OrderAccess::acquire() { LIGHT_MEM_BARRIER; }
inline void OrderAccess::release() { LIGHT_MEM_BARRIER; }
inline void OrderAccess::fence() { FULL_MEM_BARRIER; }
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
-
#endif // OS_CPU_BSD_ZERO_VM_ORDERACCESS_BSD_ZERO_INLINE_HPP
diff --git a/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp b/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp
index 4074df4fe5a..e4076609924 100644
--- a/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp
+++ b/src/hotspot/os_cpu/linux_aarch64/atomic_linux_aarch64.hpp
@@ -34,19 +34,6 @@
#define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE);
#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
-inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
-
-inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
-
-
template
struct Atomic::PlatformAdd
: Atomic::AddAndFetch >
@@ -57,39 +44,16 @@ struct Atomic::PlatformAdd
}
};
-inline void Atomic::inc(volatile jint* dest)
-{
- add(1, dest);
-}
-
-inline void Atomic::inc_ptr(volatile void* dest)
-{
- add_ptr(1, dest);
-}
-
-inline void Atomic::dec (volatile jint* dest)
-{
- add(-1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile void* dest)
-{
- add_ptr(-1, dest);
-}
-
-inline jint Atomic::xchg (jint exchange_value, volatile jint* dest)
-{
- jint res = __sync_lock_test_and_set (dest, exchange_value);
+template
+template
+inline T Atomic::PlatformXchg::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(byte_size == sizeof(T));
+ T res = __sync_lock_test_and_set(dest, exchange_value);
FULL_MEM_BARRIER;
return res;
}
-inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest)
-{
- return (void *) xchg_ptr((intptr_t) exchange_value,
- (volatile intptr_t*) dest);
-}
-
template
template
inline T Atomic::PlatformCmpxchg::operator()(T exchange_value,
@@ -107,26 +71,4 @@ inline T Atomic::PlatformCmpxchg::operator()(T exchange_value,
}
}
-inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
-inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest)
-{
- add_ptr(1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest)
-{
- add_ptr(-1, dest);
-}
-
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest)
-{
- intptr_t res = __sync_lock_test_and_set (dest, exchange_value);
- FULL_MEM_BARRIER;
- return res;
-}
-
-inline jlong Atomic::load(const volatile jlong* src) { return *src; }
-
#endif // OS_CPU_LINUX_AARCH64_VM_ATOMIC_LINUX_AARCH64_HPP
diff --git a/src/hotspot/os_cpu/linux_aarch64/orderAccess_linux_aarch64.inline.hpp b/src/hotspot/os_cpu/linux_aarch64/orderAccess_linux_aarch64.inline.hpp
index dcbce021456..32164dd3058 100644
--- a/src/hotspot/os_cpu/linux_aarch64/orderAccess_linux_aarch64.inline.hpp
+++ b/src/hotspot/os_cpu/linux_aarch64/orderAccess_linux_aarch64.inline.hpp
@@ -50,93 +50,28 @@ inline void OrderAccess::fence() {
FULL_MEM_BARRIER;
}
-inline jbyte OrderAccess::load_acquire(const volatile jbyte* p)
-{ jbyte data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline jshort OrderAccess::load_acquire(const volatile jshort* p)
-{ jshort data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline jint OrderAccess::load_acquire(const volatile jint* p)
-{ jint data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline jlong OrderAccess::load_acquire(const volatile jlong* p)
-{ jlong data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline jubyte OrderAccess::load_acquire(const volatile jubyte* p)
-{ jubyte data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline jushort OrderAccess::load_acquire(const volatile jushort* p)
-{ jushort data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline juint OrderAccess::load_acquire(const volatile juint* p)
-{ juint data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline julong OrderAccess::load_acquire(const volatile julong* p)
-{ julong data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline jfloat OrderAccess::load_acquire(const volatile jfloat* p)
-{ jfloat data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline jdouble OrderAccess::load_acquire(const volatile jdouble* p)
-{ jdouble data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline intptr_t OrderAccess::load_ptr_acquire(const volatile intptr_t* p)
-{ intptr_t data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
-inline void* OrderAccess::load_ptr_acquire(const volatile void* p)
-{ void* data; __atomic_load((void* const volatile *)p, &data, __ATOMIC_ACQUIRE); return data; }
+template
+struct OrderAccess::PlatformOrderedLoad
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ T operator()(const volatile T* p) const { T data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
+};
-inline void OrderAccess::release_store(volatile jbyte* p, jbyte v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void OrderAccess::release_store(volatile jshort* p, jshort v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void OrderAccess::release_store(volatile jint* p, jint v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void OrderAccess::release_store(volatile jlong* p, jlong v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void OrderAccess::release_store(volatile jubyte* p, jubyte v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void OrderAccess::release_store(volatile jushort* p, jushort v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void OrderAccess::release_store(volatile juint* p, juint v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void OrderAccess::release_store(volatile julong* p, julong v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void OrderAccess::release_store(volatile jfloat* p, jfloat v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void OrderAccess::release_store(volatile jdouble* p, jdouble v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v)
-{ __atomic_store(p, &v, __ATOMIC_RELEASE); }
-inline void OrderAccess::release_store_ptr(volatile void* p, void* v)
-{ __atomic_store((void* volatile *)p, &v, __ATOMIC_RELEASE); }
+template
+struct OrderAccess::PlatformOrderedStore
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ void operator()(T v, volatile T* p) const { __atomic_store(p, &v, __ATOMIC_RELEASE); }
+};
-inline void OrderAccess::store_fence(jbyte* p, jbyte v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void OrderAccess::store_fence(jshort* p, jshort v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void OrderAccess::store_fence(jint* p, jint v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void OrderAccess::store_fence(jlong* p, jlong v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void OrderAccess::store_fence(jubyte* p, jubyte v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void OrderAccess::store_fence(jushort* p, jushort v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void OrderAccess::store_fence(juint* p, juint v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void OrderAccess::store_fence(julong* p, julong v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void OrderAccess::store_fence(jfloat* p, jfloat v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void OrderAccess::store_fence(jdouble* p, jdouble v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-inline void OrderAccess::store_ptr_fence(void** p, void* v)
-{ __atomic_store(p, &v, __ATOMIC_RELAXED); fence(); }
-
-inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { release_store(p, v); fence(); }
-inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { release_store(p, v); fence(); }
-inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { release_store(p, v); fence(); }
-inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { release_store(p, v); fence(); }
-inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { release_store(p, v); fence(); }
-inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { release_store(p, v); fence(); }
-inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { release_store(p, v); fence(); }
-inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { release_store(p, v); fence(); }
-inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { release_store(p, v); fence(); }
-inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store(p, v); fence(); }
-
-inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { release_store_ptr(p, v); fence(); }
-inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { release_store_ptr(p, v); fence(); }
+template
+struct OrderAccess::PlatformOrderedStore
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ void operator()(T v, volatile T* p) const { release_store(p, v); fence(); }
+};
#endif // OS_CPU_LINUX_AARCH64_VM_ORDERACCESS_LINUX_AARCH64_INLINE_HPP
diff --git a/src/hotspot/os_cpu/linux_arm/atomic_linux_arm.hpp b/src/hotspot/os_cpu/linux_arm/atomic_linux_arm.hpp
index fcd7e1f9ba2..d5c6ecd9f8d 100644
--- a/src/hotspot/os_cpu/linux_arm/atomic_linux_arm.hpp
+++ b/src/hotspot/os_cpu/linux_arm/atomic_linux_arm.hpp
@@ -44,39 +44,24 @@
* kernel source or kernel_user_helpers.txt in Linux Doc.
*/
-inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
+#ifndef AARCH64
+template<>
+template
+inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
+ STATIC_ASSERT(8 == sizeof(T));
+ return PrimitiveConversions::cast(
+ (*os::atomic_load_long_func)(reinterpret_cast(src)));
+}
-inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
-
-inline jlong Atomic::load (const volatile jlong* src) {
- assert(((intx)src & (sizeof(jlong)-1)) == 0, "Atomic load jlong mis-aligned");
-#ifdef AARCH64
- return *src;
-#else
- return (*os::atomic_load_long_func)(src);
+template<>
+template
+inline void Atomic::PlatformStore<8>::operator()(T store_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(8 == sizeof(T));
+ (*os::atomic_store_long_func)(
+ PrimitiveConversions::cast(store_value), reinterpret_cast(dest));
+}
#endif
-}
-
-inline void Atomic::store (jlong value, volatile jlong* dest) {
- assert(((intx)dest & (sizeof(jlong)-1)) == 0, "Atomic store jlong mis-aligned");
-#ifdef AARCH64
- *dest = value;
-#else
- (*os::atomic_store_long_func)(value, dest);
-#endif
-}
-
-inline void Atomic::store (jlong value, jlong* dest) {
- store(value, (volatile jlong*)dest);
-}
// As per atomic.hpp all read-modify-write operations have to provide two-way
// barriers semantics. For AARCH64 we are using load-acquire-with-reservation and
@@ -122,14 +107,6 @@ inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) co
#endif
}
-inline void Atomic::inc(volatile jint* dest) {
- Atomic::add(1, (volatile jint *)dest);
-}
-
-inline void Atomic::dec(volatile jint* dest) {
- Atomic::add(-1, (volatile jint *)dest);
-}
-
#ifdef AARCH64
template<>
template
@@ -149,28 +126,15 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
: "memory");
return val;
}
-#endif // AARCH64
+#endif
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
- Atomic::add_ptr(1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
- Atomic::add_ptr(-1, dest);
-}
-
-inline void Atomic::inc_ptr(volatile void* dest) {
- inc_ptr((volatile intptr_t*)dest);
-}
-
-inline void Atomic::dec_ptr(volatile void* dest) {
- dec_ptr((volatile intptr_t*)dest);
-}
-
-
-inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
+template<>
+template
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(4 == sizeof(T));
#ifdef AARCH64
- jint old_val;
+ T old_val;
int tmp;
__asm__ volatile(
"1:\n\t"
@@ -182,13 +146,17 @@ inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
: "memory");
return old_val;
#else
- return (*os::atomic_xchg_func)(exchange_value, dest);
+ return xchg_using_helper(os::atomic_xchg_func, exchange_value, dest);
#endif
}
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
#ifdef AARCH64
- intptr_t old_val;
+template<>
+template
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(8 == sizeof(T));
+ T old_val;
int tmp;
__asm__ volatile(
"1:\n\t"
@@ -199,14 +167,8 @@ inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* des
: [new_val] "r" (exchange_value), [dest] "r" (dest)
: "memory");
return old_val;
-#else
- return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
-#endif
-}
-
-inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
- return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
}
+#endif // AARCH64
// The memory_order parameter is ignored - we always provide the strongest/most-conservative ordering
diff --git a/src/hotspot/os_cpu/linux_arm/orderAccess_linux_arm.inline.hpp b/src/hotspot/os_cpu/linux_arm/orderAccess_linux_arm.inline.hpp
index a115dd82311..4a737c12a90 100644
--- a/src/hotspot/os_cpu/linux_arm/orderAccess_linux_arm.inline.hpp
+++ b/src/hotspot/os_cpu/linux_arm/orderAccess_linux_arm.inline.hpp
@@ -33,7 +33,6 @@
// - we define the high level barriers below and use the general
// implementation in orderAccess.inline.hpp, with customizations
// on AARCH64 via the specialized_* template functions
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
// Memory Ordering on ARM is weak.
//
@@ -131,91 +130,126 @@ inline void OrderAccess::fence() { dmb_sy(); }
#ifdef AARCH64
-template<> inline jbyte OrderAccess::specialized_load_acquire(const volatile jbyte* p) {
- volatile jbyte result;
- __asm__ volatile(
- "ldarb %w[res], [%[ptr]]"
- : [res] "=&r" (result)
- : [ptr] "r" (p)
- : "memory");
- return result;
-}
+template<>
+struct OrderAccess::PlatformOrderedLoad<1, X_ACQUIRE>
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ T operator()(const volatile T* p) const {
+ volatile T result;
+ __asm__ volatile(
+ "ldarb %w[res], [%[ptr]]"
+ : [res] "=&r" (result)
+ : [ptr] "r" (p)
+ : "memory");
+ return result;
+ }
+};
-template<> inline jshort OrderAccess::specialized_load_acquire(const volatile jshort* p) {
- volatile jshort result;
- __asm__ volatile(
- "ldarh %w[res], [%[ptr]]"
- : [res] "=&r" (result)
- : [ptr] "r" (p)
- : "memory");
- return result;
-}
+template<>
+struct OrderAccess::PlatformOrderedLoad<2, X_ACQUIRE>
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ T operator()(const volatile T* p) const {
+ volatile T result;
+ __asm__ volatile(
+ "ldarh %w[res], [%[ptr]]"
+ : [res] "=&r" (result)
+ : [ptr] "r" (p)
+ : "memory");
+ return result;
+ }
+};
-template<> inline jint OrderAccess::specialized_load_acquire(const volatile jint* p) {
- volatile jint result;
- __asm__ volatile(
- "ldar %w[res], [%[ptr]]"
- : [res] "=&r" (result)
- : [ptr] "r" (p)
- : "memory");
- return result;
-}
+template<>
+struct OrderAccess::PlatformOrderedLoad<4, X_ACQUIRE>
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ T operator()(const volatile T* p) const {
+ volatile T result;
+ __asm__ volatile(
+ "ldar %w[res], [%[ptr]]"
+ : [res] "=&r" (result)
+ : [ptr] "r" (p)
+ : "memory");
+ return result;
+ }
+};
-template<> inline jfloat OrderAccess::specialized_load_acquire(const volatile jfloat* p) {
- return jfloat_cast(specialized_load_acquire((const volatile jint*)p));
-}
+template<>
+struct OrderAccess::PlatformOrderedLoad<8, X_ACQUIRE>
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ T operator()(const volatile T* p) const {
+ volatile T result;
+ __asm__ volatile(
+ "ldar %[res], [%[ptr]]"
+ : [res] "=&r" (result)
+ : [ptr] "r" (p)
+ : "memory");
+ return result;
+ }
+};
-// This is implicit as jlong and intptr_t are both "long int"
-//template<> inline jlong OrderAccess::specialized_load_acquire(const volatile jlong* p) {
-// return (volatile jlong)specialized_load_acquire((const volatile intptr_t*)p);
-//}
+template<>
+struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE>
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ void operator()(T v, volatile T* p) const {
+ __asm__ volatile(
+ "stlrb %w[val], [%[ptr]]"
+ :
+ : [ptr] "r" (p), [val] "r" (v)
+ : "memory");
+ }
+};
-template<> inline intptr_t OrderAccess::specialized_load_acquire(const volatile intptr_t* p) {
- volatile intptr_t result;
- __asm__ volatile(
- "ldar %[res], [%[ptr]]"
- : [res] "=&r" (result)
- : [ptr] "r" (p)
- : "memory");
- return result;
-}
+template<>
+struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE>
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ void operator()(T v, volatile T* p) const {
+ __asm__ volatile(
+ "stlrh %w[val], [%[ptr]]"
+ :
+ : [ptr] "r" (p), [val] "r" (v)
+ : "memory");
+ }
+};
-template<> inline jdouble OrderAccess::specialized_load_acquire(const volatile jdouble* p) {
- return jdouble_cast(specialized_load_acquire((const volatile intptr_t*)p));
-}
+template<>
+struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE>
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ void operator()(T v, volatile T* p) const {
+ __asm__ volatile(
+ "stlr %w[val], [%[ptr]]"
+ :
+ : [ptr] "r" (p), [val] "r" (v)
+ : "memory");
+ }
+};
+template<>
+struct OrderAccess::PlatformOrderedStore<8, RELEASE_X_FENCE>
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ void operator()(T v, volatile T* p) const {
+ __asm__ volatile(
+ "stlr %[val], [%[ptr]]"
+ :
+ : [ptr] "r" (p), [val] "r" (v)
+ : "memory");
+ }
+};
-template<> inline void OrderAccess::specialized_release_store(volatile jbyte* p, jbyte v) {
- __asm__ volatile(
- "stlrb %w[val], [%[ptr]]"
- :
- : [ptr] "r" (p), [val] "r" (v)
- : "memory");
-}
-
-template<> inline void OrderAccess::specialized_release_store(volatile jshort* p, jshort v) {
- __asm__ volatile(
- "stlrh %w[val], [%[ptr]]"
- :
- : [ptr] "r" (p), [val] "r" (v)
- : "memory");
-}
-
-template<> inline void OrderAccess::specialized_release_store(volatile jint* p, jint v) {
- __asm__ volatile(
- "stlr %w[val], [%[ptr]]"
- :
- : [ptr] "r" (p), [val] "r" (v)
- : "memory");
-}
-
-template<> inline void OrderAccess::specialized_release_store(volatile jlong* p, jlong v) {
- __asm__ volatile(
- "stlr %[val], [%[ptr]]"
- :
- : [ptr] "r" (p), [val] "r" (v)
- : "memory");
-}
#endif // AARCH64
#endif // OS_CPU_LINUX_ARM_VM_ORDERACCESS_LINUX_ARM_INLINE_HPP
diff --git a/src/hotspot/os_cpu/linux_ppc/atomic_linux_ppc.hpp b/src/hotspot/os_cpu/linux_ppc/atomic_linux_ppc.hpp
index a5a0f7d3124..764243960cc 100644
--- a/src/hotspot/os_cpu/linux_ppc/atomic_linux_ppc.hpp
+++ b/src/hotspot/os_cpu/linux_ppc/atomic_linux_ppc.hpp
@@ -32,22 +32,6 @@
// Implementation of class atomic
-inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
-inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
-
-inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
-inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
-
-inline jlong Atomic::load(const volatile jlong* src) { return *src; }
-
//
// machine barrier instructions:
//
@@ -146,90 +130,14 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
return result;
}
-
-inline void Atomic::inc (volatile jint* dest) {
-
- unsigned int temp;
-
- __asm__ __volatile__ (
- strasm_nobarrier
- "1: lwarx %0, 0, %2 \n"
- " addic %0, %0, 1 \n"
- " stwcx. %0, 0, %2 \n"
- " bne- 1b \n"
- strasm_nobarrier
- : /*%0*/"=&r" (temp), "=m" (*dest)
- : /*%2*/"r" (dest), "m" (*dest)
- : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
-
- long temp;
-
- __asm__ __volatile__ (
- strasm_nobarrier
- "1: ldarx %0, 0, %2 \n"
- " addic %0, %0, 1 \n"
- " stdcx. %0, 0, %2 \n"
- " bne- 1b \n"
- strasm_nobarrier
- : /*%0*/"=&r" (temp), "=m" (*dest)
- : /*%2*/"r" (dest), "m" (*dest)
- : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::inc_ptr(volatile void* dest) {
- inc_ptr((volatile intptr_t*)dest);
-}
-
-
-inline void Atomic::dec (volatile jint* dest) {
-
- unsigned int temp;
-
- __asm__ __volatile__ (
- strasm_nobarrier
- "1: lwarx %0, 0, %2 \n"
- " addic %0, %0, -1 \n"
- " stwcx. %0, 0, %2 \n"
- " bne- 1b \n"
- strasm_nobarrier
- : /*%0*/"=&r" (temp), "=m" (*dest)
- : /*%2*/"r" (dest), "m" (*dest)
- : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
-
- long temp;
-
- __asm__ __volatile__ (
- strasm_nobarrier
- "1: ldarx %0, 0, %2 \n"
- " addic %0, %0, -1 \n"
- " stdcx. %0, 0, %2 \n"
- " bne- 1b \n"
- strasm_nobarrier
- : /*%0*/"=&r" (temp), "=m" (*dest)
- : /*%2*/"r" (dest), "m" (*dest)
- : "cc" strasm_nobarrier_clobber_memory);
-
-}
-
-inline void Atomic::dec_ptr(volatile void* dest) {
- dec_ptr((volatile intptr_t*)dest);
-}
-
-inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
-
- // Note that xchg_ptr doesn't necessarily do an acquire
+template<>
+template
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+ T volatile* dest) const {
+ // Note that xchg doesn't necessarily do an acquire
// (see synchronizer.cpp).
- unsigned int old_value;
+ T old_value;
const uint64_t zero = 0;
__asm__ __volatile__ (
@@ -257,15 +165,18 @@ inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
"memory"
);
- return (jint) old_value;
+ return old_value;
}
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
-
- // Note that xchg_ptr doesn't necessarily do an acquire
+template<>
+template
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(8 == sizeof(T));
+ // Note that xchg doesn't necessarily do an acquire
// (see synchronizer.cpp).
- long old_value;
+ T old_value;
const uint64_t zero = 0;
__asm__ __volatile__ (
@@ -293,11 +204,7 @@ inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* des
"memory"
);
- return (intptr_t) old_value;
-}
-
-inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
- return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
+ return old_value;
}
inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {
diff --git a/src/hotspot/os_cpu/linux_ppc/orderAccess_linux_ppc.inline.hpp b/src/hotspot/os_cpu/linux_ppc/orderAccess_linux_ppc.inline.hpp
index d41b788ab68..2f600407727 100644
--- a/src/hotspot/os_cpu/linux_ppc/orderAccess_linux_ppc.inline.hpp
+++ b/src/hotspot/os_cpu/linux_ppc/orderAccess_linux_ppc.inline.hpp
@@ -80,10 +80,14 @@ inline void OrderAccess::acquire() { inlasm_lwsync(); }
inline void OrderAccess::release() { inlasm_lwsync(); }
inline void OrderAccess::fence() { inlasm_sync(); }
-template<> inline jbyte OrderAccess::specialized_load_acquire (const volatile jbyte* p) { register jbyte t = load(p); inlasm_acquire_reg(t); return t; }
-template<> inline jshort OrderAccess::specialized_load_acquire(const volatile jshort* p) { register jshort t = load(p); inlasm_acquire_reg(t); return t; }
-template<> inline jint OrderAccess::specialized_load_acquire (const volatile jint* p) { register jint t = load(p); inlasm_acquire_reg(t); return t; }
-template<> inline jlong OrderAccess::specialized_load_acquire (const volatile jlong* p) { register jlong t = load(p); inlasm_acquire_reg(t); return t; }
+
+template
+struct OrderAccess::PlatformOrderedLoad
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ T operator()(const volatile T* p) const { register T t = Atomic::load(p); inlasm_acquire_reg(t); return t; }
+};
#undef inlasm_sync
#undef inlasm_lwsync
@@ -91,6 +95,4 @@ template<> inline jlong OrderAccess::specialized_load_acquire (const vol
#undef inlasm_isync
#undef inlasm_acquire_reg
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
-
#endif // OS_CPU_LINUX_PPC_VM_ORDERACCESS_LINUX_PPC_INLINE_HPP
diff --git a/src/hotspot/os_cpu/linux_s390/atomic_linux_s390.hpp b/src/hotspot/os_cpu/linux_s390/atomic_linux_s390.hpp
index e7c436bdd6e..5821bb018c3 100644
--- a/src/hotspot/os_cpu/linux_s390/atomic_linux_s390.hpp
+++ b/src/hotspot/os_cpu/linux_s390/atomic_linux_s390.hpp
@@ -53,20 +53,6 @@
// is an integer multiple of the data length. Furthermore, all stores are ordered:
// a store which occurs conceptually before another store becomes visible to other CPUs
// before the other store becomes visible.
-inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
-inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
-
-inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
-inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
-
//------------
// Atomic::add
@@ -192,219 +178,6 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I inc, D volatile* dest) const {
}
-//------------
-// Atomic::inc
-//------------
-// These methods force the value in memory to be incremented (augmented by 1).
-// Both, memory value and increment, are treated as 32bit signed binary integers.
-// No overflow exceptions are recognized, and the condition code does not hold
-// information about the value in memory.
-//
-// The value in memory is updated by using a compare-and-swap instruction. The
-// instruction is retried as often as required.
-
-inline void Atomic::inc(volatile jint* dest) {
- unsigned int old, upd;
-
- if (VM_Version::has_LoadAndALUAtomicV1()) {
-// tty->print_cr("Atomic::inc called... dest @%p", dest);
- __asm__ __volatile__ (
- " LGHI 2,1 \n\t" // load increment
- " LA 3,%[mem] \n\t" // force data address into ARG2
-// " LAA %[upd],%[inc],%[mem] \n\t" // increment and get old value
-// " LAA 2,2,0(3) \n\t" // actually coded instruction
- " .byte 0xeb \n\t" // LAA main opcode
- " .byte 0x22 \n\t" // R1,R3
- " .byte 0x30 \n\t" // R2,disp1
- " .byte 0x00 \n\t" // disp2,disp3
- " .byte 0x00 \n\t" // disp4,disp5
- " .byte 0xf8 \n\t" // LAA minor opcode
- " AGHI 2,1 \n\t" // calc new value in register
- " LR %[upd],2 \n\t" // move to result register
- //---< outputs >---
- : [upd] "=&d" (upd) // write-only, updated counter value
- , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
- //---< inputs >---
- :
-// : [inc] "a" (inc) // read-only.
- //---< clobbered >---
- : "cc", "r2", "r3", "memory"
- );
- } else {
- __asm__ __volatile__ (
- " LLGF %[old],%[mem] \n\t" // get old value
- "0: LA %[upd],1(,%[old]) \n\t" // calc result
- " CS %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
- " JNE 0b \n\t" // no success? -> retry
- //---< outputs >---
- : [old] "=&a" (old) // write-only, old counter value
- , [upd] "=&d" (upd) // write-only, updated counter value
- , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
- //---< inputs >---
- :
- //---< clobbered >---
- : "cc", "memory"
- );
- }
-}
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
- unsigned long old, upd;
-
- if (VM_Version::has_LoadAndALUAtomicV1()) {
- __asm__ __volatile__ (
- " LGHI 2,1 \n\t" // load increment
- " LA 3,%[mem] \n\t" // force data address into ARG2
-// " LAAG %[upd],%[inc],%[mem] \n\t" // increment and get old value
-// " LAAG 2,2,0(3) \n\t" // actually coded instruction
- " .byte 0xeb \n\t" // LAA main opcode
- " .byte 0x22 \n\t" // R1,R3
- " .byte 0x30 \n\t" // R2,disp1
- " .byte 0x00 \n\t" // disp2,disp3
- " .byte 0x00 \n\t" // disp4,disp5
- " .byte 0xe8 \n\t" // LAA minor opcode
- " AGHI 2,1 \n\t" // calc new value in register
- " LR %[upd],2 \n\t" // move to result register
- //---< outputs >---
- : [upd] "=&d" (upd) // write-only, updated counter value
- , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
- //---< inputs >---
- :
-// : [inc] "a" (inc) // read-only.
- //---< clobbered >---
- : "cc", "r2", "r3", "memory"
- );
- } else {
- __asm__ __volatile__ (
- " LG %[old],%[mem] \n\t" // get old value
- "0: LA %[upd],1(,%[old]) \n\t" // calc result
- " CSG %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
- " JNE 0b \n\t" // no success? -> retry
- //---< outputs >---
- : [old] "=&a" (old) // write-only, old counter value
- , [upd] "=&d" (upd) // write-only, updated counter value
- , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
- //---< inputs >---
- :
- //---< clobbered >---
- : "cc", "memory"
- );
- }
-}
-
-inline void Atomic::inc_ptr(volatile void* dest) {
- inc_ptr((volatile intptr_t*)dest);
-}
-
-//------------
-// Atomic::dec
-//------------
-// These methods force the value in memory to be decremented (augmented by -1).
-// Both, memory value and decrement, are treated as 32bit signed binary integers.
-// No overflow exceptions are recognized, and the condition code does not hold
-// information about the value in memory.
-//
-// The value in memory is updated by using a compare-and-swap instruction. The
-// instruction is retried as often as required.
-
-inline void Atomic::dec(volatile jint* dest) {
- unsigned int old, upd;
-
- if (VM_Version::has_LoadAndALUAtomicV1()) {
- __asm__ __volatile__ (
- " LGHI 2,-1 \n\t" // load increment
- " LA 3,%[mem] \n\t" // force data address into ARG2
-// " LAA %[upd],%[inc],%[mem] \n\t" // increment and get old value
-// " LAA 2,2,0(3) \n\t" // actually coded instruction
- " .byte 0xeb \n\t" // LAA main opcode
- " .byte 0x22 \n\t" // R1,R3
- " .byte 0x30 \n\t" // R2,disp1
- " .byte 0x00 \n\t" // disp2,disp3
- " .byte 0x00 \n\t" // disp4,disp5
- " .byte 0xf8 \n\t" // LAA minor opcode
- " AGHI 2,-1 \n\t" // calc new value in register
- " LR %[upd],2 \n\t" // move to result register
- //---< outputs >---
- : [upd] "=&d" (upd) // write-only, updated counter value
- , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
- //---< inputs >---
- :
-// : [inc] "a" (inc) // read-only.
- //---< clobbered >---
- : "cc", "r2", "r3", "memory"
- );
- } else {
- __asm__ __volatile__ (
- " LLGF %[old],%[mem] \n\t" // get old value
- // LAY not supported by inline assembler
- // "0: LAY %[upd],-1(,%[old]) \n\t" // calc result
- "0: LR %[upd],%[old] \n\t" // calc result
- " AHI %[upd],-1 \n\t"
- " CS %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
- " JNE 0b \n\t" // no success? -> retry
- //---< outputs >---
- : [old] "=&a" (old) // write-only, old counter value
- , [upd] "=&d" (upd) // write-only, updated counter value
- , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
- //---< inputs >---
- :
- //---< clobbered >---
- : "cc", "memory"
- );
- }
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
- unsigned long old, upd;
-
- if (VM_Version::has_LoadAndALUAtomicV1()) {
- __asm__ __volatile__ (
- " LGHI 2,-1 \n\t" // load increment
- " LA 3,%[mem] \n\t" // force data address into ARG2
-// " LAAG %[upd],%[inc],%[mem] \n\t" // increment and get old value
-// " LAAG 2,2,0(3) \n\t" // actually coded instruction
- " .byte 0xeb \n\t" // LAA main opcode
- " .byte 0x22 \n\t" // R1,R3
- " .byte 0x30 \n\t" // R2,disp1
- " .byte 0x00 \n\t" // disp2,disp3
- " .byte 0x00 \n\t" // disp4,disp5
- " .byte 0xe8 \n\t" // LAA minor opcode
- " AGHI 2,-1 \n\t" // calc new value in register
- " LR %[upd],2 \n\t" // move to result register
- //---< outputs >---
- : [upd] "=&d" (upd) // write-only, updated counter value
- , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
- //---< inputs >---
- :
-// : [inc] "a" (inc) // read-only.
- //---< clobbered >---
- : "cc", "r2", "r3", "memory"
- );
- } else {
- __asm__ __volatile__ (
- " LG %[old],%[mem] \n\t" // get old value
-// LAY not supported by inline assembler
-// "0: LAY %[upd],-1(,%[old]) \n\t" // calc result
- "0: LGR %[upd],%[old] \n\t" // calc result
- " AGHI %[upd],-1 \n\t"
- " CSG %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
- " JNE 0b \n\t" // no success? -> retry
- //---< outputs >---
- : [old] "=&a" (old) // write-only, old counter value
- , [upd] "=&d" (upd) // write-only, updated counter value
- , [mem] "+Q" (*dest) // read/write, memory to be updated atomically
- //---< inputs >---
- :
- //---< clobbered >---
- : "cc", "memory"
- );
- }
-}
-
-inline void Atomic::dec_ptr(volatile void* dest) {
- dec_ptr((volatile intptr_t*)dest);
-}
-
//-------------
// Atomic::xchg
//-------------
@@ -421,8 +194,12 @@ inline void Atomic::dec_ptr(volatile void* dest) {
//
// The return value is the (unchanged) value from memory as it was when the
// replacement succeeded.
-inline jint Atomic::xchg (jint xchg_val, volatile jint* dest) {
- unsigned int old;
+template<>
+template
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(4 == sizeof(T));
+ T old;
__asm__ __volatile__ (
" LLGF %[old],%[mem] \n\t" // get old value
@@ -432,16 +209,20 @@ inline jint Atomic::xchg (jint xchg_val, volatile jint* dest) {
: [old] "=&d" (old) // write-only, prev value irrelevant
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
- : [upd] "d" (xchg_val) // read-only, value to be written to memory
+ : [upd] "d" (exchange_value) // read-only, value to be written to memory
//---< clobbered >---
: "cc", "memory"
);
- return (jint)old;
+ return old;
}
-inline intptr_t Atomic::xchg_ptr(intptr_t xchg_val, volatile intptr_t* dest) {
- unsigned long old;
+template<>
+template
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(8 == sizeof(T));
+ T old;
__asm__ __volatile__ (
" LG %[old],%[mem] \n\t" // get old value
@@ -451,16 +232,12 @@ inline intptr_t Atomic::xchg_ptr(intptr_t xchg_val, volatile intptr_t* dest) {
: [old] "=&d" (old) // write-only, init from memory
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
- : [upd] "d" (xchg_val) // read-only, value to be written to memory
+ : [upd] "d" (exchange_value) // read-only, value to be written to memory
//---< clobbered >---
: "cc", "memory"
);
- return (intptr_t)old;
-}
-
-inline void *Atomic::xchg_ptr(void *exchange_value, volatile void *dest) {
- return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
+ return old;
}
//----------------
@@ -544,6 +321,4 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T xchg_val,
return old;
}
-inline jlong Atomic::load(const volatile jlong* src) { return *src; }
-
#endif // OS_CPU_LINUX_S390_VM_ATOMIC_LINUX_S390_INLINE_HPP
diff --git a/src/hotspot/os_cpu/linux_s390/orderAccess_linux_s390.inline.hpp b/src/hotspot/os_cpu/linux_s390/orderAccess_linux_s390.inline.hpp
index 2de7fb6a81f..89401f03553 100644
--- a/src/hotspot/os_cpu/linux_s390/orderAccess_linux_s390.inline.hpp
+++ b/src/hotspot/os_cpu/linux_s390/orderAccess_linux_s390.inline.hpp
@@ -74,10 +74,13 @@ inline void OrderAccess::acquire() { inlasm_zarch_acquire(); }
inline void OrderAccess::release() { inlasm_zarch_release(); }
inline void OrderAccess::fence() { inlasm_zarch_sync(); }
-template<> inline jbyte OrderAccess::specialized_load_acquire (const volatile jbyte* p) { register jbyte t = *p; inlasm_zarch_acquire(); return t; }
-template<> inline jshort OrderAccess::specialized_load_acquire(const volatile jshort* p) { register jshort t = *p; inlasm_zarch_acquire(); return t; }
-template<> inline jint OrderAccess::specialized_load_acquire (const volatile jint* p) { register jint t = *p; inlasm_zarch_acquire(); return t; }
-template<> inline jlong OrderAccess::specialized_load_acquire (const volatile jlong* p) { register jlong t = *p; inlasm_zarch_acquire(); return t; }
+template
+struct OrderAccess::PlatformOrderedLoad
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ T operator()(const volatile T* p) const { register T t = *p; inlasm_zarch_acquire(); return t; }
+};
#undef inlasm_compiler_barrier
#undef inlasm_zarch_sync
@@ -85,8 +88,4 @@ template<> inline jlong OrderAccess::specialized_load_acquire (const vol
#undef inlasm_zarch_acquire
#undef inlasm_zarch_fence
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
-
#endif // OS_CPU_LINUX_S390_VM_ORDERACCESS_LINUX_S390_INLINE_HPP
-
-
diff --git a/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp b/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp
index fa53de045d0..272c3e0f46b 100644
--- a/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp
+++ b/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -448,11 +448,17 @@ JVM_handle_linux_signal(int sig,
}
else { // thread->thread_state() != _thread_in_Java
- if (sig == SIGILL && VM_Version::is_determine_features_test_running()) {
- // SIGILL must be caused by VM_Version::determine_features().
+ if ((sig == SIGILL) && VM_Version::is_determine_features_test_running()) {
+ // SIGILL must be caused by VM_Version::determine_features()
+ // when attempting to execute a non-existing instruction.
//*(int *) (pc-6)=0; // Patch instruction to 0 to indicate that it causes a SIGILL.
// Flushing of icache is not necessary.
stub = pc; // Continue with next instruction.
+ } else if ((sig == SIGFPE) && VM_Version::is_determine_features_test_running()) {
+ // SIGFPE is known to be caused by trying to execute a vector instruction
+ // when the vector facility is installed, but operating system support is missing.
+ VM_Version::reset_has_VectorFacility();
+ stub = pc; // Continue with next instruction.
} else if (thread->thread_state() == _thread_in_vm &&
sig == SIGBUS && thread->doing_unsafe_access()) {
// We don't really need a stub here! Just set the pending exeption and
@@ -471,7 +477,7 @@ JVM_handle_linux_signal(int sig,
// Info->si_addr need not be the exact address, it is only
// guaranteed to be on the same page as the address that caused
// the SIGSEGV.
- if ((sig == SIGSEGV) &&
+ if ((sig == SIGSEGV) && !UseMembar &&
(os::get_memory_serialize_page() ==
(address)((uintptr_t)info->si_addr & ~(os::vm_page_size()-1)))) {
return true;
@@ -510,7 +516,7 @@ JVM_handle_linux_signal(int sig,
// Note: this should be combined with the trap_pc handling above,
// because it handles the same issue.
if (sig == SIGILL || sig == SIGFPE) {
- pc = (address) info->si_addr;
+ pc = (address)info->si_addr;
}
VMError::report_and_die(t, sig, pc, info, ucVoid);
diff --git a/src/hotspot/os_cpu/linux_sparc/atomic_linux_sparc.hpp b/src/hotspot/os_cpu/linux_sparc/atomic_linux_sparc.hpp
index 3ea20f8789d..46a1268347a 100644
--- a/src/hotspot/os_cpu/linux_sparc/atomic_linux_sparc.hpp
+++ b/src/hotspot/os_cpu/linux_sparc/atomic_linux_sparc.hpp
@@ -27,30 +27,6 @@
// Implementation of class atomic
-inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
-inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
-
-inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
-inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
-
-inline void Atomic::inc (volatile jint* dest) { (void)add (1, dest); }
-inline void Atomic::inc_ptr(volatile intptr_t* dest) { (void)add_ptr(1, dest); }
-inline void Atomic::inc_ptr(volatile void* dest) { (void)add_ptr(1, dest); }
-
-inline void Atomic::dec (volatile jint* dest) { (void)add (-1, dest); }
-inline void Atomic::dec_ptr(volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
-inline void Atomic::dec_ptr(volatile void* dest) { (void)add_ptr(-1, dest); }
-
-inline jlong Atomic::load(const volatile jlong* src) { return *src; }
-
template
struct Atomic::PlatformAdd
: Atomic::AddAndFetch >
@@ -103,9 +79,12 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
return rv;
}
-
-inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
- intptr_t rv = exchange_value;
+template<>
+template
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(4 == sizeof(T));
+ T rv = exchange_value;
__asm__ volatile(
" swap [%2],%1\n\t"
: "=r" (rv)
@@ -114,8 +93,12 @@ inline jint Atomic::xchg (jint exchange_value, volatile jint* des
return rv;
}
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
- intptr_t rv = exchange_value;
+template<>
+template
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(8 == sizeof(T));
+ T rv = exchange_value;
__asm__ volatile(
"1:\n\t"
" mov %1, %%o3\n\t"
@@ -131,10 +114,6 @@ inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* des
return rv;
}
-inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
- return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
-}
-
// No direct support for cmpxchg of bytes; emulate using int.
template<>
struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};
diff --git a/src/hotspot/os_cpu/linux_sparc/orderAccess_linux_sparc.inline.hpp b/src/hotspot/os_cpu/linux_sparc/orderAccess_linux_sparc.inline.hpp
index fd6078fcd31..c9fde925f7e 100644
--- a/src/hotspot/os_cpu/linux_sparc/orderAccess_linux_sparc.inline.hpp
+++ b/src/hotspot/os_cpu/linux_sparc/orderAccess_linux_sparc.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -48,6 +48,4 @@ inline void OrderAccess::fence() {
__asm__ volatile ("membar #StoreLoad" : : : "memory");
}
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
-
#endif // OS_CPU_LINUX_SPARC_VM_ORDERACCESS_LINUX_SPARC_INLINE_HPP
diff --git a/src/hotspot/os_cpu/linux_x86/atomic_linux_x86.hpp b/src/hotspot/os_cpu/linux_x86/atomic_linux_x86.hpp
index f19bfa767a9..be5649dc401 100644
--- a/src/hotspot/os_cpu/linux_x86/atomic_linux_x86.hpp
+++ b/src/hotspot/os_cpu/linux_x86/atomic_linux_x86.hpp
@@ -27,19 +27,6 @@
// Implementation of class atomic
-inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
-
-inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
-
-
template
struct Atomic::PlatformAdd
: Atomic::FetchAndAdd >
@@ -61,25 +48,11 @@ inline D Atomic::PlatformAdd<4>::fetch_and_add(I add_value, D volatile* dest) co
return old_value;
}
-inline void Atomic::inc (volatile jint* dest) {
- __asm__ volatile ( "lock addl $1,(%0)" :
- : "r" (dest) : "cc", "memory");
-}
-
-inline void Atomic::inc_ptr(volatile void* dest) {
- inc_ptr((volatile intptr_t*)dest);
-}
-
-inline void Atomic::dec (volatile jint* dest) {
- __asm__ volatile ( "lock subl $1,(%0)" :
- : "r" (dest) : "cc", "memory");
-}
-
-inline void Atomic::dec_ptr(volatile void* dest) {
- dec_ptr((volatile intptr_t*)dest);
-}
-
-inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
+template<>
+template
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(4 == sizeof(T));
__asm__ volatile ( "xchgl (%2),%0"
: "=r" (exchange_value)
: "0" (exchange_value), "r" (dest)
@@ -87,10 +60,6 @@ inline jint Atomic::xchg (jint exchange_value, volatile jint* des
return exchange_value;
}
-inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
- return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
-}
-
template<>
template
inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
@@ -120,8 +89,6 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
}
#ifdef AMD64
-inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
-inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
template<>
template
@@ -136,21 +103,11 @@ inline D Atomic::PlatformAdd<8>::fetch_and_add(I add_value, D volatile* dest) co
return old_value;
}
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
- __asm__ __volatile__ ("lock addq $1,(%0)"
- :
- : "r" (dest)
- : "cc", "memory");
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
- __asm__ __volatile__ ("lock subq $1,(%0)"
- :
- : "r" (dest)
- : "cc", "memory");
-}
-
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
+template<>
+template
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(8 == sizeof(T));
__asm__ __volatile__ ("xchgq (%2),%0"
: "=r" (exchange_value)
: "0" (exchange_value), "r" (dest)
@@ -172,22 +129,8 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
return exchange_value;
}
-inline jlong Atomic::load(const volatile jlong* src) { return *src; }
-
#else // !AMD64
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
- inc((volatile jint*)dest);
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
- dec((volatile jint*)dest);
-}
-
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
- return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
-}
-
extern "C" {
// defined in linux_x86.s
jlong _Atomic_cmpxchg_long(jlong, volatile jlong*, jlong);
@@ -204,18 +147,21 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
return cmpxchg_using_helper(_Atomic_cmpxchg_long, exchange_value, dest, compare_value);
}
-inline jlong Atomic::load(const volatile jlong* src) {
+template<>
+template
+inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
+ STATIC_ASSERT(8 == sizeof(T));
volatile jlong dest;
- _Atomic_move_long(src, &dest);
- return dest;
+ _Atomic_move_long(reinterpret_cast(src), reinterpret_cast(&dest));
+ return PrimitiveConversions::cast(dest);
}
-inline void Atomic::store(jlong store_value, jlong* dest) {
- _Atomic_move_long((volatile jlong*)&store_value, (volatile jlong*)dest);
-}
-
-inline void Atomic::store(jlong store_value, volatile jlong* dest) {
- _Atomic_move_long((volatile jlong*)&store_value, dest);
+template<>
+template
+inline void Atomic::PlatformStore<8>::operator()(T store_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(8 == sizeof(T));
+ _Atomic_move_long(reinterpret_cast(&store_value), reinterpret_cast(dest));
}
#endif // AMD64
diff --git a/src/hotspot/os_cpu/linux_x86/orderAccess_linux_x86.inline.hpp b/src/hotspot/os_cpu/linux_x86/orderAccess_linux_x86.inline.hpp
index 0f564216e13..0d5585b6e70 100644
--- a/src/hotspot/os_cpu/linux_x86/orderAccess_linux_x86.inline.hpp
+++ b/src/hotspot/os_cpu/linux_x86/orderAccess_linux_x86.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -60,46 +60,57 @@ inline void OrderAccess::fence() {
}
template<>
-inline void OrderAccess::specialized_release_store_fence (volatile jbyte* p, jbyte v) {
- __asm__ volatile ( "xchgb (%2),%0"
- : "=q" (v)
- : "0" (v), "r" (p)
- : "memory");
-}
+struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE>
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ void operator()(T v, volatile T* p) const {
+ __asm__ volatile ( "xchgb (%2),%0"
+ : "=q" (v)
+ : "0" (v), "r" (p)
+ : "memory");
+ }
+};
+
template<>
-inline void OrderAccess::specialized_release_store_fence(volatile jshort* p, jshort v) {
- __asm__ volatile ( "xchgw (%2),%0"
- : "=r" (v)
- : "0" (v), "r" (p)
- : "memory");
-}
+struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE>
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ void operator()(T v, volatile T* p) const {
+ __asm__ volatile ( "xchgw (%2),%0"
+ : "=r" (v)
+ : "0" (v), "r" (p)
+ : "memory");
+ }
+};
+
template<>
-inline void OrderAccess::specialized_release_store_fence (volatile jint* p, jint v) {
- __asm__ volatile ( "xchgl (%2),%0"
- : "=r" (v)
- : "0" (v), "r" (p)
- : "memory");
-}
+struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE>
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ void operator()(T v, volatile T* p) const {
+ __asm__ volatile ( "xchgl (%2),%0"
+ : "=r" (v)
+ : "0" (v), "r" (p)
+ : "memory");
+ }
+};
#ifdef AMD64
template<>
-inline void OrderAccess::specialized_release_store_fence (volatile jlong* p, jlong v) {
- __asm__ volatile ( "xchgq (%2), %0"
- : "=r" (v)
- : "0" (v), "r" (p)
- : "memory");
-}
+struct OrderAccess::PlatformOrderedStore<8, RELEASE_X_FENCE>
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ void operator()(T v, volatile T* p) const {
+ __asm__ volatile ( "xchgq (%2), %0"
+ : "=r" (v)
+ : "0" (v), "r" (p)
+ : "memory");
+ }
+};
#endif // AMD64
-template<>
-inline void OrderAccess::specialized_release_store_fence (volatile jfloat* p, jfloat v) {
- release_store_fence((volatile jint*)p, jint_cast(v));
-}
-template<>
-inline void OrderAccess::specialized_release_store_fence(volatile jdouble* p, jdouble v) {
- release_store_fence((volatile jlong*)p, jlong_cast(v));
-}
-
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
-
#endif // OS_CPU_LINUX_X86_VM_ORDERACCESS_LINUX_X86_INLINE_HPP
diff --git a/src/hotspot/os_cpu/linux_zero/atomic_linux_zero.hpp b/src/hotspot/os_cpu/linux_zero/atomic_linux_zero.hpp
index 22af8a7fbb8..0713b6de460 100644
--- a/src/hotspot/os_cpu/linux_zero/atomic_linux_zero.hpp
+++ b/src/hotspot/os_cpu/linux_zero/atomic_linux_zero.hpp
@@ -87,7 +87,7 @@ static inline int m68k_add_and_fetch(int add_value, volatile int *ptr) {
/* Atomically write VALUE into `*PTR' and returns the previous
contents of `*PTR'. */
-static inline int m68k_lock_test_and_set(volatile int *ptr, int newval) {
+static inline int m68k_lock_test_and_set(int newval, volatile int *ptr) {
for (;;) {
// Loop until success.
int prev = *ptr;
@@ -148,7 +148,7 @@ static inline int arm_add_and_fetch(int add_value, volatile int *ptr) {
/* Atomically write VALUE into `*PTR' and returns the previous
contents of `*PTR'. */
-static inline int arm_lock_test_and_set(volatile int *ptr, int newval) {
+static inline int arm_lock_test_and_set(int newval, volatile int *ptr) {
for (;;) {
// Loop until a __kernel_cmpxchg succeeds.
int prev = *ptr;
@@ -159,14 +159,6 @@ static inline int arm_lock_test_and_set(volatile int *ptr, int newval) {
}
#endif // ARM
-inline void Atomic::store(jint store_value, volatile jint* dest) {
- *dest = store_value;
-}
-
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) {
- *dest = store_value;
-}
-
template
struct Atomic::PlatformAdd
: Atomic::AddAndFetch >
@@ -201,42 +193,22 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
return __sync_add_and_fetch(dest, add_value);
}
-inline void Atomic::inc(volatile jint* dest) {
- add(1, dest);
-}
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
- add_ptr(1, dest);
-}
-
-inline void Atomic::inc_ptr(volatile void* dest) {
- add_ptr(1, dest);
-}
-
-inline void Atomic::dec(volatile jint* dest) {
- add(-1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
- add_ptr(-1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile void* dest) {
- add_ptr(-1, dest);
-}
-
-inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
+template<>
+template
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(4 == sizeof(T));
#ifdef ARM
- return arm_lock_test_and_set(dest, exchange_value);
+ return xchg_using_helper(arm_lock_test_and_set, exchange_value, dest);
#else
#ifdef M68K
- return m68k_lock_test_and_set(dest, exchange_value);
+ return xchg_using_helper(m68k_lock_test_and_set, exchange_value, dest);
#else
// __sync_lock_test_and_set is a bizarrely named atomic exchange
// operation. Note that some platforms only support this with the
// limitation that the only valid value to store is the immediate
// constant 1. There is a test for this in JNI_CreateJavaVM().
- jint result = __sync_lock_test_and_set (dest, exchange_value);
+ T result = __sync_lock_test_and_set (dest, exchange_value);
// All atomic operations are expected to be full memory barriers
// (see atomic.hpp). However, __sync_lock_test_and_set is not
// a full memory barrier, but an acquire barrier. Hence, this added
@@ -247,24 +219,14 @@ inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
#endif // ARM
}
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value,
- volatile intptr_t* dest) {
-#ifdef ARM
- return arm_lock_test_and_set(dest, exchange_value);
-#else
-#ifdef M68K
- return m68k_lock_test_and_set(dest, exchange_value);
-#else
- intptr_t result = __sync_lock_test_and_set (dest, exchange_value);
+template<>
+template
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(8 == sizeof(T));
+ T result = __sync_lock_test_and_set (dest, exchange_value);
__sync_synchronize();
return result;
-#endif // M68K
-#endif // ARM
-}
-
-inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
- return (void *) xchg_ptr((intptr_t) exchange_value,
- (volatile intptr_t*) dest);
}
// No direct support for cmpxchg of bytes; emulate using int.
@@ -299,18 +261,21 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
}
-inline jlong Atomic::load(const volatile jlong* src) {
+template<>
+template
+inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
+ STATIC_ASSERT(8 == sizeof(T));
volatile jlong dest;
- os::atomic_copy64(src, &dest);
- return dest;
+ os::atomic_copy64(reinterpret_cast(src), reinterpret_cast(&dest));
+ return PrimitiveConversions::cast(dest);
}
-inline void Atomic::store(jlong store_value, jlong* dest) {
- os::atomic_copy64((volatile jlong*)&store_value, (volatile jlong*)dest);
-}
-
-inline void Atomic::store(jlong store_value, volatile jlong* dest) {
- os::atomic_copy64((volatile jlong*)&store_value, dest);
+template<>
+template
+inline void Atomic::PlatformStore<8>::operator()(T store_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(8 == sizeof(T));
+ os::atomic_copy64(reinterpret_cast(&store_value), reinterpret_cast(dest));
}
#endif // OS_CPU_LINUX_ZERO_VM_ATOMIC_LINUX_ZERO_HPP
diff --git a/src/hotspot/os_cpu/linux_zero/orderAccess_linux_zero.inline.hpp b/src/hotspot/os_cpu/linux_zero/orderAccess_linux_zero.inline.hpp
index 20d851c9234..8c4cd1c7a1c 100644
--- a/src/hotspot/os_cpu/linux_zero/orderAccess_linux_zero.inline.hpp
+++ b/src/hotspot/os_cpu/linux_zero/orderAccess_linux_zero.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright 2007, 2008, 2009 Red Hat, Inc.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -56,8 +56,16 @@ typedef void (__kernel_dmb_t) (void);
#else // PPC
+#ifdef ALPHA
+
+#define LIGHT_MEM_BARRIER __sync_synchronize()
+
+#else // ALPHA
+
#define LIGHT_MEM_BARRIER __asm __volatile ("":::"memory")
+#endif // ALPHA
+
#endif // PPC
#endif // ARM
@@ -75,6 +83,4 @@ inline void OrderAccess::release() { LIGHT_MEM_BARRIER; }
inline void OrderAccess::fence() { FULL_MEM_BARRIER; }
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
-
#endif // OS_CPU_LINUX_ZERO_VM_ORDERACCESS_LINUX_ZERO_INLINE_HPP
diff --git a/src/hotspot/os_cpu/solaris_sparc/atomic_solaris_sparc.hpp b/src/hotspot/os_cpu/solaris_sparc/atomic_solaris_sparc.hpp
index 5314e931cf9..a8e00217da5 100644
--- a/src/hotspot/os_cpu/solaris_sparc/atomic_solaris_sparc.hpp
+++ b/src/hotspot/os_cpu/solaris_sparc/atomic_solaris_sparc.hpp
@@ -27,41 +27,6 @@
// Implementation of class atomic
-inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
-
-inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
-
-inline void Atomic::inc (volatile jint* dest) { (void)add (1, dest); }
-inline void Atomic::inc_ptr(volatile intptr_t* dest) { (void)add_ptr(1, dest); }
-inline void Atomic::inc_ptr(volatile void* dest) { (void)add_ptr(1, dest); }
-
-inline void Atomic::dec (volatile jint* dest) { (void)add (-1, dest); }
-inline void Atomic::dec_ptr(volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
-inline void Atomic::dec_ptr(volatile void* dest) { (void)add_ptr(-1, dest); }
-
-
-inline void Atomic::store(jlong store_value, jlong* dest) { *dest = store_value; }
-inline void Atomic::store(jlong store_value, volatile jlong* dest) { *dest = store_value; }
-inline jlong Atomic::load(const volatile jlong* src) { return *src; }
-
-
-// This is the interface to the atomic instructions in solaris_sparc.il.
-// It's very messy because we need to support v8 and these instructions
-// are illegal there. When sparc v8 is dropped, we can drop out lots of
-// this code. Also compiler2 does not support v8 so the conditional code
-// omits the instruction set check.
-
-extern "C" jint _Atomic_swap32(jint exchange_value, volatile jint* dest);
-extern "C" intptr_t _Atomic_swap64(intptr_t exchange_value, volatile intptr_t* dest);
-
// Implement ADD using a CAS loop.
template
struct Atomic::PlatformAdd VALUE_OBJ_CLASS_SPEC {
@@ -78,16 +43,30 @@ struct Atomic::PlatformAdd VALUE_OBJ_CLASS_SPEC {
}
};
-inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
- return _Atomic_swap32(exchange_value, dest);
+template<>
+template
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(4 == sizeof(T));
+ __asm__ volatile ( "swap [%2],%0"
+ : "=r" (exchange_value)
+ : "0" (exchange_value), "r" (dest)
+ : "memory");
+ return exchange_value;
}
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
- return _Atomic_swap64(exchange_value, dest);
-}
-
-inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
- return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
+template<>
+template
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(8 == sizeof(T));
+ T old_value = *dest;
+ while (true) {
+ T result = cmpxchg(exchange_value, dest, old_value);
+ if (result == old_value) break;
+ old_value = result;
+ }
+ return old_value;
}
// No direct support for cmpxchg of bytes; emulate using int.
diff --git a/src/hotspot/os_cpu/solaris_sparc/orderAccess_solaris_sparc.inline.hpp b/src/hotspot/os_cpu/solaris_sparc/orderAccess_solaris_sparc.inline.hpp
index 7a74147d6a0..b60cd092c50 100644
--- a/src/hotspot/os_cpu/solaris_sparc/orderAccess_solaris_sparc.inline.hpp
+++ b/src/hotspot/os_cpu/solaris_sparc/orderAccess_solaris_sparc.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -52,6 +52,4 @@ inline void OrderAccess::fence() {
__asm__ volatile ("membar #StoreLoad" : : : "memory");
}
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
-
#endif // OS_CPU_SOLARIS_SPARC_VM_ORDERACCESS_SOLARIS_SPARC_INLINE_HPP
diff --git a/src/hotspot/os_cpu/solaris_sparc/solaris_sparc.il b/src/hotspot/os_cpu/solaris_sparc/solaris_sparc.il
index 39ac68a7ec8..1f25542e5d5 100644
--- a/src/hotspot/os_cpu/solaris_sparc/solaris_sparc.il
+++ b/src/hotspot/os_cpu/solaris_sparc/solaris_sparc.il
@@ -32,47 +32,6 @@
.end
- // Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest).
- //
- // Arguments:
- // exchange_value: O0
- // dest: O1
- //
- // Results:
- // O0: the value previously stored in dest
-
- .inline _Atomic_swap32, 2
- .volatile
- swap [%o1],%o0
- .nonvolatile
- .end
-
-
- // Support for intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t * dest).
- //
- // 64-bit
- //
- // Arguments:
- // exchange_value: O0
- // dest: O1
- //
- // Results:
- // O0: the value previously stored in dest
-
- .inline _Atomic_swap64, 2
- .volatile
- 1:
- mov %o0, %o3
- ldx [%o1], %o2
- casx [%o1], %o2, %o3
- cmp %o2, %o3
- bne %xcc, 1b
- nop
- mov %o2, %o0
- .nonvolatile
- .end
-
-
// Support for jlong Atomic::load and Atomic::store on v9.
//
// void _Atomic_move_long_v9(volatile jlong* src, volatile jlong* dst)
diff --git a/src/hotspot/os_cpu/solaris_sparc/vm_version_solaris_sparc.cpp b/src/hotspot/os_cpu/solaris_sparc/vm_version_solaris_sparc.cpp
index da767cbf642..7cbf60ee92a 100644
--- a/src/hotspot/os_cpu/solaris_sparc/vm_version_solaris_sparc.cpp
+++ b/src/hotspot/os_cpu/solaris_sparc/vm_version_solaris_sparc.cpp
@@ -380,7 +380,7 @@ void VM_Version::platform_features() {
if (av & AV_SPARC_CRC32C) features |= ISA_crc32c_msk;
#ifndef AV2_SPARC_FJATHPLUS
-#define AV2_SPARC_FJATHPLUS 0x00000001 // Fujitsu Athena+
+#define AV2_SPARC_FJATHPLUS 0x00000001 // Fujitsu Athena+ insns
#endif
#ifndef AV2_SPARC_VIS3B
#define AV2_SPARC_VIS3B 0x00000002 // VIS3 present on multiple chips
@@ -405,6 +405,34 @@ void VM_Version::platform_features() {
#endif
#ifndef AV2_SPARC_VAMASK
#define AV2_SPARC_VAMASK 0x00000100 // Virtual Address masking
+#endif
+
+#ifndef AV2_SPARC_SPARC6
+#define AV2_SPARC_SPARC6 0x00000200 // REVB*, FPSLL*, RDENTROPY, LDM* and STM*
+#endif
+#ifndef AV2_SPARC_DICTUNP
+#define AV2_SPARC_DICTUNP 0x00002000 // Dictionary unpack instruction
+#endif
+#ifndef AV2_SPARC_FPCMPSHL
+#define AV2_SPARC_FPCMPSHL 0x00004000 // Partition compare with shifted result
+#endif
+#ifndef AV2_SPARC_RLE
+#define AV2_SPARC_RLE 0x00008000 // Run-length encoded burst and length
+#endif
+#ifndef AV2_SPARC_SHA3
+#define AV2_SPARC_SHA3 0x00010000 // SHA3 instructions
+#endif
+#ifndef AV2_SPARC_FJATHPLUS2
+#define AV2_SPARC_FJATHPLUS2 0x00020000 // Fujitsu Athena++ insns
+#endif
+#ifndef AV2_SPARC_VIS3C
+#define AV2_SPARC_VIS3C 0x00040000 // Subset of VIS3 insns provided by Athena++
+#endif
+#ifndef AV2_SPARC_SPARC5B
+#define AV2_SPARC_SPARC5B 0x00080000 // subset of SPARC5 insns (fpadd8, fpsub8)
+#endif
+#ifndef AV2_SPARC_MME
+#define AV2_SPARC_MME 0x00100000 // Misaligned Mitigation Enable
#endif
if (avn > 1) {
@@ -419,19 +447,30 @@ void VM_Version::platform_features() {
if (av2 & AV2_SPARC_XMONT) features |= ISA_xmont_msk;
if (av2 & AV2_SPARC_PAUSE_NSEC) features |= ISA_pause_nsec_msk;
if (av2 & AV2_SPARC_VAMASK) features |= ISA_vamask_msk;
+
+ if (av2 & AV2_SPARC_SPARC6) features |= ISA_sparc6_msk;
+ if (av2 & AV2_SPARC_DICTUNP) features |= ISA_dictunp_msk;
+ if (av2 & AV2_SPARC_FPCMPSHL) features |= ISA_fpcmpshl_msk;
+ if (av2 & AV2_SPARC_RLE) features |= ISA_rle_msk;
+ if (av2 & AV2_SPARC_SHA3) features |= ISA_sha3_msk;
+ if (av2 & AV2_SPARC_FJATHPLUS2) features |= ISA_fjathplus2_msk;
+ if (av2 & AV2_SPARC_VIS3C) features |= ISA_vis3c_msk;
+ if (av2 & AV2_SPARC_SPARC5B) features |= ISA_sparc5b_msk;
+ if (av2 & AV2_SPARC_MME) features |= ISA_mme_msk;
}
_features = features; // ISA feature set completed, update state.
Sysinfo machine(SI_MACHINE);
- bool is_sun4v = machine.match("sun4v"); // All Oracle SPARC + Fujitsu Athena+
+ bool is_sun4v = machine.match("sun4v"); // All Oracle SPARC + Fujitsu Athena+/++
bool is_sun4u = machine.match("sun4u"); // All other Fujitsu
- // Handle Athena+ conservatively (simply because we are lacking info.).
+ // Handle Athena+/++ conservatively (simply because we are lacking info.).
- bool do_sun4v = is_sun4v && !has_athena_plus();
- bool do_sun4u = is_sun4u || has_athena_plus();
+ bool an_athena = has_athena_plus() || has_athena_plus2();
+ bool do_sun4v = is_sun4v && !an_athena;
+ bool do_sun4u = is_sun4u || an_athena;
uint64_t synthetic = 0;
@@ -441,16 +480,16 @@ void VM_Version::platform_features() {
// Fast IDIV, BIS and LD available on Niagara Plus.
if (has_vis2()) {
synthetic |= (CPU_fast_idiv_msk | CPU_fast_ld_msk);
- // ...on Core S4 however, we prefer not to use BIS.
+ // ...on Core C4 however, we prefer not to use BIS.
if (!has_sparc5()) {
synthetic |= CPU_fast_bis_msk;
}
}
- // Niagara Core S3 supports fast RDPC and block zeroing.
+ // SPARC Core C3 supports fast RDPC and block zeroing.
if (has_ima()) {
synthetic |= (CPU_fast_rdpc_msk | CPU_blk_zeroing_msk);
}
- // Niagara Core S3 and S4 have slow CMOVE.
+ // SPARC Core C3 and C4 have slow CMOVE.
if (!has_ima()) {
synthetic |= CPU_fast_cmove_msk;
}
diff --git a/src/hotspot/os_cpu/solaris_x86/atomic_solaris_x86.hpp b/src/hotspot/os_cpu/solaris_x86/atomic_solaris_x86.hpp
index c6919fbf38b..4acd7df025d 100644
--- a/src/hotspot/os_cpu/solaris_x86/atomic_solaris_x86.hpp
+++ b/src/hotspot/os_cpu/solaris_x86/atomic_solaris_x86.hpp
@@ -25,28 +25,6 @@
#ifndef OS_CPU_SOLARIS_X86_VM_ATOMIC_SOLARIS_X86_HPP
#define OS_CPU_SOLARIS_X86_VM_ATOMIC_SOLARIS_X86_HPP
-inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
-
-
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
-
-inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
-
-inline void Atomic::inc (volatile jint* dest) { (void)add (1, dest); }
-inline void Atomic::inc_ptr(volatile intptr_t* dest) { (void)add_ptr(1, dest); }
-inline void Atomic::inc_ptr(volatile void* dest) { (void)add_ptr(1, dest); }
-
-inline void Atomic::dec (volatile jint* dest) { (void)add (-1, dest); }
-inline void Atomic::dec_ptr(volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
-inline void Atomic::dec_ptr(volatile void* dest) { (void)add_ptr(-1, dest); }
-
// For Sun Studio - implementation is in solaris_x86_64.il.
extern "C" {
@@ -92,8 +70,26 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
reinterpret_cast(dest)));
}
-inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
- return _Atomic_xchg(exchange_value, dest);
+template<>
+template
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(4 == sizeof(T));
+ return PrimitiveConversions::cast(
+ _Atomic_xchg(PrimitiveConversions::cast(exchange_value),
+ reinterpret_cast(dest)));
+}
+
+extern "C" jlong _Atomic_xchg_long(jlong exchange_value, volatile jlong* dest);
+
+template<>
+template
+inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(8 == sizeof(T));
+ return PrimitiveConversions::cast(
+ _Atomic_xchg_long(PrimitiveConversions::cast(exchange_value),
+ reinterpret_cast(dest)));
}
// Not using cmpxchg_using_helper here, because some configurations of
@@ -141,18 +137,4 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
PrimitiveConversions::cast(compare_value)));
}
-inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
-inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
-extern "C" jlong _Atomic_xchg_long(jlong exchange_value, volatile jlong* dest);
-
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
- return (intptr_t)_Atomic_xchg_long((jlong)exchange_value, (volatile jlong*)dest);
-}
-
-inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
- return (void*)_Atomic_xchg_long((jlong)exchange_value, (volatile jlong*)dest);
-}
-
-inline jlong Atomic::load(const volatile jlong* src) { return *src; }
-
#endif // OS_CPU_SOLARIS_X86_VM_ATOMIC_SOLARIS_X86_HPP
diff --git a/src/hotspot/os_cpu/solaris_x86/orderAccess_solaris_x86.inline.hpp b/src/hotspot/os_cpu/solaris_x86/orderAccess_solaris_x86.inline.hpp
index b88e715e4d4..bd676dbe62f 100644
--- a/src/hotspot/os_cpu/solaris_x86/orderAccess_solaris_x86.inline.hpp
+++ b/src/hotspot/os_cpu/solaris_x86/orderAccess_solaris_x86.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -58,6 +58,4 @@ inline void OrderAccess::fence() {
compiler_barrier();
}
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
-
#endif // OS_CPU_SOLARIS_X86_VM_ORDERACCESS_SOLARIS_X86_INLINE_HPP
diff --git a/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp b/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp
index abf266917a1..5b72577fc6f 100644
--- a/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp
+++ b/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp
@@ -42,21 +42,6 @@
#pragma warning(disable: 4035) // Disables warnings reporting missing return statement
-inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
-
-inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
-
-inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
-inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
-inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
-
-
-inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
-inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
-
template
struct Atomic::PlatformAdd
: Atomic::AddAndFetch >
@@ -66,9 +51,6 @@ struct Atomic::PlatformAdd
};
#ifdef AMD64
-inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
-inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
-
template<>
template
inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) const {
@@ -81,41 +63,19 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
return add_using_helper(os::atomic_add_ptr_func, add_value, dest);
}
-inline void Atomic::inc (volatile jint* dest) {
- (void)add (1, dest);
-}
+#define DEFINE_STUB_XCHG(ByteSize, StubType, StubName) \
+ template<> \
+ template \
+ inline T Atomic::PlatformXchg::operator()(T exchange_value, \
+ T volatile* dest) const { \
+ STATIC_ASSERT(ByteSize == sizeof(T)); \
+ return xchg_using_helper(StubName, exchange_value, dest); \
+ }
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
- (void)add_ptr(1, dest);
-}
+DEFINE_STUB_XCHG(4, jint, os::atomic_xchg_func)
+DEFINE_STUB_XCHG(8, jlong, os::atomic_xchg_long_func)
-inline void Atomic::inc_ptr(volatile void* dest) {
- (void)add_ptr(1, dest);
-}
-
-inline void Atomic::dec (volatile jint* dest) {
- (void)add (-1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
- (void)add_ptr(-1, dest);
-}
-
-inline void Atomic::dec_ptr(volatile void* dest) {
- (void)add_ptr(-1, dest);
-}
-
-inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
- return (jint)(*os::atomic_xchg_func)(exchange_value, dest);
-}
-
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
- return (intptr_t)(os::atomic_xchg_ptr_func)(exchange_value, dest);
-}
-
-inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
- return (void *)(os::atomic_xchg_ptr_func)((intptr_t)exchange_value, (volatile intptr_t*)dest);
-}
+#undef DEFINE_STUB_XCHG
#define DEFINE_STUB_CMPXCHG(ByteSize, StubType, StubName) \
template<> \
@@ -134,8 +94,6 @@ DEFINE_STUB_CMPXCHG(8, jlong, os::atomic_cmpxchg_long_func)
#undef DEFINE_STUB_CMPXCHG
-inline jlong Atomic::load(const volatile jlong* src) { return *src; }
-
#else // !AMD64
template<>
@@ -152,39 +110,11 @@ inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) co
}
}
-inline void Atomic::inc (volatile jint* dest) {
- // alternative for InterlockedIncrement
- __asm {
- mov edx, dest;
- lock add dword ptr [edx], 1;
- }
-}
-
-inline void Atomic::inc_ptr(volatile intptr_t* dest) {
- inc((volatile jint*)dest);
-}
-
-inline void Atomic::inc_ptr(volatile void* dest) {
- inc((volatile jint*)dest);
-}
-
-inline void Atomic::dec (volatile jint* dest) {
- // alternative for InterlockedDecrement
- __asm {
- mov edx, dest;
- lock sub dword ptr [edx], 1;
- }
-}
-
-inline void Atomic::dec_ptr(volatile intptr_t* dest) {
- dec((volatile jint*)dest);
-}
-
-inline void Atomic::dec_ptr(volatile void* dest) {
- dec((volatile jint*)dest);
-}
-
-inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
+template<>
+template
+inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(4 == sizeof(T));
// alternative for InterlockedExchange
__asm {
mov eax, exchange_value;
@@ -193,14 +123,6 @@ inline jint Atomic::xchg (jint exchange_value, volatile jint* des
}
}
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
- return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
-}
-
-inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
- return (void*)xchg((jint)exchange_value, (volatile jint*)dest);
-}
-
template<>
template
inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
@@ -258,9 +180,12 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
}
}
-inline jlong Atomic::load(const volatile jlong* src) {
- volatile jlong dest;
- volatile jlong* pdest = &dest;
+template<>
+template
+inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
+ STATIC_ASSERT(8 == sizeof(T));
+ volatile T dest;
+ volatile T* pdest = &dest;
__asm {
mov eax, src
fild qword ptr [eax]
@@ -270,8 +195,12 @@ inline jlong Atomic::load(const volatile jlong* src) {
return dest;
}
-inline void Atomic::store(jlong store_value, volatile jlong* dest) {
- volatile jlong* src = &store_value;
+template<>
+template
+inline void Atomic::PlatformStore<8>::operator()(T store_value,
+ T volatile* dest) const {
+ STATIC_ASSERT(8 == sizeof(T));
+ volatile T* src = &store_value;
__asm {
mov eax, src
fild qword ptr [eax]
@@ -280,10 +209,6 @@ inline void Atomic::store(jlong store_value, volatile jlong* dest) {
}
}
-inline void Atomic::store(jlong store_value, jlong* dest) {
- Atomic::store(store_value, (volatile jlong*)dest);
-}
-
#endif // AMD64
#pragma warning(default: 4035) // Enables warnings reporting missing return statement
diff --git a/src/hotspot/os_cpu/windows_x86/orderAccess_windows_x86.inline.hpp b/src/hotspot/os_cpu/windows_x86/orderAccess_windows_x86.inline.hpp
index 36c1c4a42c0..57488f4acf7 100644
--- a/src/hotspot/os_cpu/windows_x86/orderAccess_windows_x86.inline.hpp
+++ b/src/hotspot/os_cpu/windows_x86/orderAccess_windows_x86.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -74,42 +74,46 @@ inline void OrderAccess::fence() {
#ifndef AMD64
template<>
-inline void OrderAccess::specialized_release_store_fence (volatile jbyte* p, jbyte v) {
- __asm {
- mov edx, p;
- mov al, v;
- xchg al, byte ptr [edx];
+struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE>
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ void operator()(T v, volatile T* p) const {
+ __asm {
+ mov edx, p;
+ mov al, v;
+ xchg al, byte ptr [edx];
+ }
}
-}
+};
template<>
-inline void OrderAccess::specialized_release_store_fence(volatile jshort* p, jshort v) {
- __asm {
- mov edx, p;
- mov ax, v;
- xchg ax, word ptr [edx];
+struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE>
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ void operator()(T v, volatile T* p) const {
+ __asm {
+ mov edx, p;
+ mov ax, v;
+ xchg ax, word ptr [edx];
+ }
}
-}
+};
template<>
-inline void OrderAccess::specialized_release_store_fence (volatile jint* p, jint v) {
- __asm {
- mov edx, p;
- mov eax, v;
- xchg eax, dword ptr [edx];
+struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE>
+ VALUE_OBJ_CLASS_SPEC
+{
+ template
+ void operator()(T v, volatile T* p) const {
+ __asm {
+ mov edx, p;
+ mov eax, v;
+ xchg eax, dword ptr [edx];
+ }
}
-}
+};
#endif // AMD64
-template<>
-inline void OrderAccess::specialized_release_store_fence(volatile jfloat* p, jfloat v) {
- release_store_fence((volatile jint*)p, jint_cast(v));
-}
-template<>
-inline void OrderAccess::specialized_release_store_fence(volatile jdouble* p, jdouble v) {
- release_store_fence((volatile jlong*)p, jlong_cast(v));
-}
-
-#define VM_HAS_GENERALIZED_ORDER_ACCESS 1
-
#endif // OS_CPU_WINDOWS_X86_VM_ORDERACCESS_WINDOWS_X86_INLINE_HPP
diff --git a/src/hotspot/os_cpu/windows_x86/os_windows_x86.cpp b/src/hotspot/os_cpu/windows_x86/os_windows_x86.cpp
index d37dcff95b2..3dc70699bfc 100644
--- a/src/hotspot/os_cpu/windows_x86/os_windows_x86.cpp
+++ b/src/hotspot/os_cpu/windows_x86/os_windows_x86.cpp
@@ -50,6 +50,7 @@
#include "runtime/stubRoutines.hpp"
#include "runtime/thread.inline.hpp"
#include "runtime/timer.hpp"
+#include "symbolengine.hpp"
#include "unwind_windows_x86.hpp"
#include "utilities/events.hpp"
#include "utilities/vmError.hpp"
@@ -219,7 +220,7 @@ void os::initialize_thread(Thread* thr) {
// Atomics and Stub Functions
typedef jint xchg_func_t (jint, volatile jint*);
-typedef intptr_t xchg_ptr_func_t (intptr_t, volatile intptr_t*);
+typedef intptr_t xchg_long_func_t (jlong, volatile jlong*);
typedef jint cmpxchg_func_t (jint, volatile jint*, jint);
typedef jbyte cmpxchg_byte_func_t (jbyte, volatile jbyte*, jbyte);
typedef jlong cmpxchg_long_func_t (jlong, volatile jlong*, jlong);
@@ -243,12 +244,12 @@ jint os::atomic_xchg_bootstrap(jint exchange_value, volatile jint* dest) {
return old_value;
}
-intptr_t os::atomic_xchg_ptr_bootstrap(intptr_t exchange_value, volatile intptr_t* dest) {
+intptr_t os::atomic_xchg_long_bootstrap(jlong exchange_value, volatile jlong* dest) {
// try to use the stub:
- xchg_ptr_func_t* func = CAST_TO_FN_PTR(xchg_ptr_func_t*, StubRoutines::atomic_xchg_ptr_entry());
+ xchg_long_func_t* func = CAST_TO_FN_PTR(xchg_long_func_t*, StubRoutines::atomic_xchg_long_entry());
if (func != NULL) {
- os::atomic_xchg_ptr_func = func;
+ os::atomic_xchg_long_func = func;
return (*func)(exchange_value, dest);
}
assert(Threads::number_of_threads() == 0, "for bootstrap only");
@@ -338,7 +339,7 @@ intptr_t os::atomic_add_ptr_bootstrap(intptr_t add_value, volatile intptr_t* des
}
xchg_func_t* os::atomic_xchg_func = os::atomic_xchg_bootstrap;
-xchg_ptr_func_t* os::atomic_xchg_ptr_func = os::atomic_xchg_ptr_bootstrap;
+xchg_long_func_t* os::atomic_xchg_long_func = os::atomic_xchg_long_bootstrap;
cmpxchg_func_t* os::atomic_cmpxchg_func = os::atomic_cmpxchg_bootstrap;
cmpxchg_byte_func_t* os::atomic_cmpxchg_byte_func = os::atomic_cmpxchg_byte_bootstrap;
add_func_t* os::atomic_add_func = os::atomic_add_bootstrap;
@@ -397,6 +398,12 @@ bool os::platform_print_native_stack(outputStream* st, const void* context,
// may not contain what Java expects, and may cause the frame() constructor
// to crash. Let's just print out the symbolic address.
frame::print_C_frame(st, buf, buf_size, pc);
+ // print source file and line, if available
+ char buf[128];
+ int line_no;
+ if (SymbolEngine::get_source_info(pc, buf, sizeof(buf), &line_no)) {
+ st->print(" (%s:%d)", buf, line_no);
+ }
st->cr();
}
lastpc = pc;
diff --git a/src/hotspot/os_cpu/windows_x86/os_windows_x86.hpp b/src/hotspot/os_cpu/windows_x86/os_windows_x86.hpp
index 306f983d6bd..495ad4ff071 100644
--- a/src/hotspot/os_cpu/windows_x86/os_windows_x86.hpp
+++ b/src/hotspot/os_cpu/windows_x86/os_windows_x86.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -30,7 +30,7 @@
//
#ifdef AMD64
static jint (*atomic_xchg_func) (jint, volatile jint*);
- static intptr_t (*atomic_xchg_ptr_func) (intptr_t, volatile intptr_t*);
+ static intptr_t (*atomic_xchg_long_func) (jlong, volatile jlong*);
static jint (*atomic_cmpxchg_func) (jint, volatile jint*, jint);
static jbyte (*atomic_cmpxchg_byte_func) (jbyte, volatile jbyte*, jbyte);
@@ -40,7 +40,7 @@
static intptr_t (*atomic_add_ptr_func) (intptr_t, volatile intptr_t*);
static jint atomic_xchg_bootstrap (jint, volatile jint*);
- static intptr_t atomic_xchg_ptr_bootstrap (intptr_t, volatile intptr_t*);
+ static intptr_t atomic_xchg_long_bootstrap (jlong, volatile jlong*);
static jint atomic_cmpxchg_bootstrap (jint, volatile jint*, jint);
static jbyte atomic_cmpxchg_byte_bootstrap(jbyte, volatile jbyte*, jbyte);
diff --git a/src/hotspot/share/adlc/output_c.cpp b/src/hotspot/share/adlc/output_c.cpp
index 3e78d62e5d7..302e67bf5fb 100644
--- a/src/hotspot/share/adlc/output_c.cpp
+++ b/src/hotspot/share/adlc/output_c.cpp
@@ -2276,6 +2276,10 @@ private:
if (strcmp(rep_var,"$XMMRegister") == 0) return "as_XMMRegister";
#endif
if (strcmp(rep_var,"$CondRegister") == 0) return "as_ConditionRegister";
+#if defined(PPC64)
+ if (strcmp(rep_var,"$VectorRegister") == 0) return "as_VectorRegister";
+ if (strcmp(rep_var,"$VectorSRegister") == 0) return "as_VectorSRegister";
+#endif
return NULL;
}
diff --git a/src/hotspot/share/aot/aotCodeHeap.cpp b/src/hotspot/share/aot/aotCodeHeap.cpp
index 15030057fdc..15c1e696ca4 100644
--- a/src/hotspot/share/aot/aotCodeHeap.cpp
+++ b/src/hotspot/share/aot/aotCodeHeap.cpp
@@ -60,7 +60,14 @@ Klass* AOTCodeHeap::get_klass_from_got(const char* klass_name, int klass_len, co
fatal("Shared file %s error: klass %s should be resolved already", _lib->name(), klass_name);
vm_exit(1);
}
+ // Patch now to avoid extra runtime lookup
_klasses_got[klass_data->_got_index] = k;
+ if (k->is_instance_klass()) {
+ InstanceKlass* ik = InstanceKlass::cast(k);
+ if (ik->is_initialized()) {
+ _klasses_got[klass_data->_got_index - 1] = ik;
+ }
+ }
}
return k;
}
@@ -433,6 +440,7 @@ void AOTCodeHeap::link_shared_runtime_symbols() {
SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_exception_handler_for_return_address", address, SharedRuntime::exception_handler_for_return_address);
SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_register_finalizer", address, SharedRuntime::register_finalizer);
SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_OSR_migration_end", address, SharedRuntime::OSR_migration_end);
+ SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_resolve_dynamic_invoke", address, CompilerRuntime::resolve_dynamic_invoke);
SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_resolve_string_by_symbol", address, CompilerRuntime::resolve_string_by_symbol);
SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_resolve_klass_by_symbol", address, CompilerRuntime::resolve_klass_by_symbol);
SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_resolve_method_by_symbol_and_load_counters", address, CompilerRuntime::resolve_method_by_symbol_and_load_counters);
@@ -609,9 +617,13 @@ Method* AOTCodeHeap::find_method(Klass* klass, Thread* thread, const char* metho
return m;
}
+AOTKlassData* AOTCodeHeap::find_klass(const char *name) {
+ return (AOTKlassData*) os::dll_lookup(_lib->dl_handle(), name);
+}
+
AOTKlassData* AOTCodeHeap::find_klass(InstanceKlass* ik) {
ResourceMark rm;
- AOTKlassData* klass_data = (AOTKlassData*) os::dll_lookup(_lib->dl_handle(), ik->signature_name());
+ AOTKlassData* klass_data = find_klass(ik->signature_name());
return klass_data;
}
@@ -640,35 +652,52 @@ bool AOTCodeHeap::is_dependent_method(Klass* dependee, AOTCompiledMethod* aot) {
return false;
}
+void AOTCodeHeap::sweep_dependent_methods(int* indexes, int methods_cnt) {
+ int marked = 0;
+ for (int i = 0; i < methods_cnt; ++i) {
+ int code_id = indexes[i];
+ // Invalidate aot code.
+ if (Atomic::cmpxchg(invalid, &_code_to_aot[code_id]._state, not_set) != not_set) {
+ if (_code_to_aot[code_id]._state == in_use) {
+ AOTCompiledMethod* aot = _code_to_aot[code_id]._aot;
+ assert(aot != NULL, "aot should be set");
+ if (!aot->is_runtime_stub()) { // Something is wrong - should not invalidate stubs.
+ aot->mark_for_deoptimization(false);
+ marked++;
+ }
+ }
+ }
+ }
+ if (marked > 0) {
+ VM_Deoptimize op;
+ VMThread::execute(&op);
+ }
+}
+
void AOTCodeHeap::sweep_dependent_methods(AOTKlassData* klass_data) {
// Make dependent methods non_entrant forever.
int methods_offset = klass_data->_dependent_methods_offset;
if (methods_offset >= 0) {
- int marked = 0;
address methods_cnt_adr = _dependencies + methods_offset;
int methods_cnt = *(int*)methods_cnt_adr;
int* indexes = (int*)(methods_cnt_adr + 4);
- for (int i = 0; i < methods_cnt; ++i) {
- int code_id = indexes[i];
- // Invalidate aot code.
- if (Atomic::cmpxchg(invalid, &_code_to_aot[code_id]._state, not_set) != not_set) {
- if (_code_to_aot[code_id]._state == in_use) {
- AOTCompiledMethod* aot = _code_to_aot[code_id]._aot;
- assert(aot != NULL, "aot should be set");
- if (!aot->is_runtime_stub()) { // Something is wrong - should not invalidate stubs.
- aot->mark_for_deoptimization(false);
- marked++;
- }
- }
- }
- }
- if (marked > 0) {
- VM_Deoptimize op;
- VMThread::execute(&op);
- }
+ sweep_dependent_methods(indexes, methods_cnt);
}
}
+void AOTCodeHeap::sweep_dependent_methods(InstanceKlass* ik) {
+ AOTKlassData* klass_data = find_klass(ik);
+ vmassert(klass_data != NULL, "dependency data missing");
+ sweep_dependent_methods(klass_data);
+}
+
+void AOTCodeHeap::sweep_method(AOTCompiledMethod *aot) {
+ int indexes[] = {aot->method_index()};
+ sweep_dependent_methods(indexes, 1);
+ vmassert(aot->method()->code() != aot && aot->method()->aot_code() == NULL, "method still active");
+}
+
+
bool AOTCodeHeap::load_klass_data(InstanceKlass* ik, Thread* thread) {
ResourceMark rm;
@@ -718,6 +747,9 @@ bool AOTCodeHeap::load_klass_data(InstanceKlass* ik, Thread* thread) {
aot_class->_classloader = ik->class_loader_data();
// Set klass's Resolve (second) got cell.
_klasses_got[klass_data->_got_index] = ik;
+ if (ik->is_initialized()) {
+ _klasses_got[klass_data->_got_index - 1] = ik;
+ }
// Initialize global symbols of the DSO to the corresponding VM symbol values.
link_global_lib_symbols();
@@ -837,7 +869,7 @@ void AOTCodeHeap::got_metadata_do(void f(Metadata*)) {
f(md);
} else {
intptr_t meta = (intptr_t)md;
- fatal("Invalid value in _metaspace_got[%d] = " INTPTR_FORMAT, i, meta);
+ fatal("Invalid value in _klasses_got[%d] = " INTPTR_FORMAT, i, meta);
}
}
}
@@ -886,6 +918,127 @@ void AOTCodeHeap::metadata_do(void f(Metadata*)) {
aot->metadata_do(f);
}
}
- // Scan metaspace_got cells.
+ // Scan klasses_got cells.
got_metadata_do(f);
}
+
+bool AOTCodeHeap::reconcile_dynamic_klass(AOTCompiledMethod *caller, InstanceKlass* holder, int index, Klass *dyno_klass, const char *descriptor1, const char *descriptor2) {
+ const char * const descriptors[2] = {descriptor1, descriptor2};
+ JavaThread *thread = JavaThread::current();
+ ResourceMark rm(thread);
+
+ AOTKlassData* holder_data = find_klass(holder);
+ vmassert(holder_data != NULL, "klass %s not found", holder->signature_name());
+ vmassert(is_dependent_method(holder, caller), "sanity");
+
+ AOTKlassData* dyno_data = NULL;
+ bool adapter_failed = false;
+ char buf[64];
+ int descriptor_index = 0;
+ // descriptors[0] specific name ("adapter:") for matching
+ // descriptors[1] fall-back name ("adapter") for depdencies
+ while (descriptor_index < 2) {
+ const char *descriptor = descriptors[descriptor_index];
+ if (descriptor == NULL) {
+ break;
+ }
+ jio_snprintf(buf, sizeof buf, "%s<%d:%d>", descriptor, holder_data->_class_id, index);
+ dyno_data = find_klass(buf);
+ if (dyno_data != NULL) {
+ break;
+ }
+ // If match failed then try fall-back for dependencies
+ ++descriptor_index;
+ adapter_failed = true;
+ }
+
+ if (dyno_data == NULL && dyno_klass == NULL) {
+ // all is well, no (appendix) at compile-time, and still none
+ return true;
+ }
+
+ if (dyno_data == NULL) {
+ // no (appendix) at build-time, but now there is
+ sweep_dependent_methods(holder_data);
+ return false;
+ }
+
+ if (adapter_failed) {
+ // adapter method mismatch
+ sweep_dependent_methods(holder_data);
+ sweep_dependent_methods(dyno_data);
+ return false;
+ }
+
+ if (dyno_klass == NULL) {
+ // (appendix) at build-time, none now
+ sweep_dependent_methods(holder_data);
+ sweep_dependent_methods(dyno_data);
+ return false;
+ }
+
+ // TODO: support array appendix object
+ if (!dyno_klass->is_instance_klass()) {
+ sweep_dependent_methods(holder_data);
+ sweep_dependent_methods(dyno_data);
+ return false;
+ }
+
+ InstanceKlass* dyno = InstanceKlass::cast(dyno_klass);
+
+ if (!dyno->is_anonymous()) {
+ if (_klasses_got[dyno_data->_got_index] != dyno) {
+ // compile-time class different from runtime class, fail and deoptimize
+ sweep_dependent_methods(holder_data);
+ sweep_dependent_methods(dyno_data);
+ return false;
+ }
+
+ if (dyno->is_initialized()) {
+ _klasses_got[dyno_data->_got_index - 1] = dyno;
+ }
+ return true;
+ }
+
+ // TODO: support anonymous supers
+ if (!dyno->supers_have_passed_fingerprint_checks() || dyno->get_stored_fingerprint() != dyno_data->_fingerprint) {
+ NOT_PRODUCT( aot_klasses_fp_miss++; )
+ log_trace(aot, class, fingerprint)("class %s%s has bad fingerprint in %s tid=" INTPTR_FORMAT,
+ dyno->internal_name(), dyno->is_shared() ? " (shared)" : "",
+ _lib->name(), p2i(thread));
+ sweep_dependent_methods(holder_data);
+ sweep_dependent_methods(dyno_data);
+ return false;
+ }
+
+ _klasses_got[dyno_data->_got_index] = dyno;
+ if (dyno->is_initialized()) {
+ _klasses_got[dyno_data->_got_index - 1] = dyno;
+ }
+
+ // TODO: hook up any AOT code
+ // load_klass_data(dyno_data, thread);
+ return true;
+}
+
+bool AOTCodeHeap::reconcile_dynamic_method(AOTCompiledMethod *caller, InstanceKlass* holder, int index, Method *adapter_method) {
+ InstanceKlass *adapter_klass = adapter_method->method_holder();
+ char buf[64];
+ jio_snprintf(buf, sizeof buf, "adapter:%d", adapter_method->method_idnum());
+ if (!reconcile_dynamic_klass(caller, holder, index, adapter_klass, buf, "adapter")) {
+ return false;
+ }
+ return true;
+}
+
+bool AOTCodeHeap::reconcile_dynamic_invoke(AOTCompiledMethod* caller, InstanceKlass* holder, int index, Method* adapter_method, Klass *appendix_klass) {
+ if (!reconcile_dynamic_klass(caller, holder, index, appendix_klass, "appendix")) {
+ return false;
+ }
+
+ if (!reconcile_dynamic_method(caller, holder, index, adapter_method)) {
+ return false;
+ }
+
+ return true;
+}
diff --git a/src/hotspot/share/aot/aotCodeHeap.hpp b/src/hotspot/share/aot/aotCodeHeap.hpp
index 684f955c7b5..7bfd5c67531 100644
--- a/src/hotspot/share/aot/aotCodeHeap.hpp
+++ b/src/hotspot/share/aot/aotCodeHeap.hpp
@@ -241,13 +241,14 @@ public:
AOTKlassData* find_klass(InstanceKlass* ik);
bool load_klass_data(InstanceKlass* ik, Thread* thread);
Klass* get_klass_from_got(const char* klass_name, int klass_len, const Method* method);
- void sweep_dependent_methods(AOTKlassData* klass_data);
+
bool is_dependent_method(Klass* dependee, AOTCompiledMethod* aot);
const char* get_name_at(int offset) {
return _metaspace_names + offset;
}
+
void oops_do(OopClosure* f);
void metadata_do(void f(Metadata*));
void got_metadata_do(void f(Metadata*));
@@ -294,6 +295,21 @@ public:
static void print_statistics();
#endif
+
+ bool reconcile_dynamic_invoke(AOTCompiledMethod* caller, InstanceKlass* holder, int index, Method* adapter_method, Klass *appendix_klass);
+
+private:
+ AOTKlassData* find_klass(const char* name);
+
+ void sweep_dependent_methods(int* indexes, int methods_cnt);
+ void sweep_dependent_methods(AOTKlassData* klass_data);
+ void sweep_dependent_methods(InstanceKlass* ik);
+ void sweep_method(AOTCompiledMethod* aot);
+
+ bool reconcile_dynamic_klass(AOTCompiledMethod *caller, InstanceKlass* holder, int index, Klass *dyno, const char *descriptor1, const char *descriptor2 = NULL);
+
+ bool reconcile_dynamic_method(AOTCompiledMethod *caller, InstanceKlass* holder, int index, Method *adapter_method);
+
};
#endif // SHARE_VM_AOT_AOTCODEHEAP_HPP
diff --git a/src/hotspot/share/aot/aotLoader.cpp b/src/hotspot/share/aot/aotLoader.cpp
index 5e1597d9b39..531a67f6e9e 100644
--- a/src/hotspot/share/aot/aotLoader.cpp
+++ b/src/hotspot/share/aot/aotLoader.cpp
@@ -40,6 +40,10 @@ GrowableArray* AOTLoader::_libraries = new(ResourceObj::C_HEAP, mtCode)
#define FOR_ALL_AOT_LIBRARIES(lib) for (GrowableArrayIterator lib = libraries()->begin(); lib != libraries()->end(); ++lib)
void AOTLoader::load_for_klass(InstanceKlass* ik, Thread* thread) {
+ if (ik->is_anonymous()) {
+ // don't even bother
+ return;
+ }
if (UseAOT) {
FOR_ALL_AOT_HEAPS(heap) {
(*heap)->load_klass_data(ik, thread);
@@ -48,6 +52,10 @@ void AOTLoader::load_for_klass(InstanceKlass* ik, Thread* thread) {
}
uint64_t AOTLoader::get_saved_fingerprint(InstanceKlass* ik) {
+ if (ik->is_anonymous()) {
+ // don't even bother
+ return 0;
+ }
FOR_ALL_AOT_HEAPS(heap) {
AOTKlassData* klass_data = (*heap)->find_klass(ik);
if (klass_data != NULL) {
@@ -259,3 +267,34 @@ void AOTLoader::print_statistics() {
}
}
#endif
+
+
+bool AOTLoader::reconcile_dynamic_invoke(InstanceKlass* holder, int index, Method* adapter_method, Klass* appendix_klass) {
+ if (!UseAOT) {
+ return true;
+ }
+ JavaThread* thread = JavaThread::current();
+ ResourceMark rm(thread);
+ RegisterMap map(thread, false);
+ frame caller_frame = thread->last_frame().sender(&map); // Skip stub
+ CodeBlob* caller_cb = caller_frame.cb();
+ guarantee(caller_cb != NULL && caller_cb->is_compiled(), "must be called from compiled method");
+ CompiledMethod* cm = caller_cb->as_compiled_method();
+
+ if (!cm->is_aot()) {
+ return true;
+ }
+ AOTCompiledMethod* aot = (AOTCompiledMethod*)cm;
+
+ AOTCodeHeap* caller_heap = NULL;
+ FOR_ALL_AOT_HEAPS(heap) {
+ if ((*heap)->contains_blob(aot)) {
+ caller_heap = *heap;
+ break;
+ }
+ }
+ guarantee(caller_heap != NULL, "CodeHeap not found");
+ bool success = caller_heap->reconcile_dynamic_invoke(aot, holder, index, adapter_method, appendix_klass);
+ vmassert(success || thread->last_frame().sender(&map).is_deoptimized_frame(), "caller not deoptimized on failure");
+ return success;
+}
diff --git a/src/hotspot/share/aot/aotLoader.hpp b/src/hotspot/share/aot/aotLoader.hpp
index 01ff4102b62..87f745d46f0 100644
--- a/src/hotspot/share/aot/aotLoader.hpp
+++ b/src/hotspot/share/aot/aotLoader.hpp
@@ -28,6 +28,7 @@
#include "runtime/handles.hpp"
class AOTCodeHeap;
+class AOTCompiledMethod;
class AOTLib;
class CodeBlob;
template class GrowableArray;
@@ -71,6 +72,7 @@ public:
static void flush_evol_dependents_on(InstanceKlass* dependee) NOT_AOT_RETURN;
#endif // HOTSWAP
+ static bool reconcile_dynamic_invoke(InstanceKlass* holder, int index, Method* adapter_method, Klass *appendix_klass) NOT_AOT({ return true; });
};
#endif // SHARE_VM_AOT_AOTLOADER_HPP
diff --git a/src/hotspot/share/asm/assembler.cpp b/src/hotspot/share/asm/assembler.cpp
index 84fb55bb3ff..b685f750f69 100644
--- a/src/hotspot/share/asm/assembler.cpp
+++ b/src/hotspot/share/asm/assembler.cpp
@@ -236,11 +236,9 @@ DelayedConstant* DelayedConstant::add(BasicType type,
if (dcon->match(type, cfn))
return dcon;
if (dcon->value_fn == NULL) {
- // (cmpxchg not because this is multi-threaded but because I'm paranoid)
- if (Atomic::cmpxchg_ptr(CAST_FROM_FN_PTR(void*, cfn), &dcon->value_fn, NULL) == NULL) {
+ dcon->value_fn = cfn;
dcon->type = type;
return dcon;
- }
}
}
// If this assert is hit (in pre-integration testing!) then re-evaluate
diff --git a/src/hotspot/share/c1/c1_Compilation.cpp b/src/hotspot/share/c1/c1_Compilation.cpp
index 156e73014db..fd1902b13d8 100644
--- a/src/hotspot/share/c1/c1_Compilation.cpp
+++ b/src/hotspot/share/c1/c1_Compilation.cpp
@@ -500,18 +500,22 @@ void Compilation::generate_exception_handler_table() {
scope_depths->trunc_to(0);
pcos->trunc_to(0);
+ int prev_scope = 0;
for (int i = 0; i < handlers->length(); i++) {
XHandler* handler = handlers->handler_at(i);
assert(handler->entry_pco() != -1, "must have been generated");
+ assert(handler->scope_count() >= prev_scope, "handlers should be sorted by scope");
- int e = bcis->find(handler->handler_bci());
- if (e >= 0 && scope_depths->at(e) == handler->scope_count()) {
- // two different handlers are declared to dispatch to the same
- // catch bci. During parsing we created edges for each
- // handler but we really only need one. The exception handler
- // table will also get unhappy if we try to declare both since
- // it's nonsensical. Just skip this handler.
- continue;
+ if (handler->scope_count() == prev_scope) {
+ int e = bcis->find_from_end(handler->handler_bci());
+ if (e >= 0 && scope_depths->at(e) == handler->scope_count()) {
+ // two different handlers are declared to dispatch to the same
+ // catch bci. During parsing we created edges for each
+ // handler but we really only need one. The exception handler
+ // table will also get unhappy if we try to declare both since
+ // it's nonsensical. Just skip this handler.
+ continue;
+ }
}
bcis->append(handler->handler_bci());
@@ -521,13 +525,14 @@ void Compilation::generate_exception_handler_table() {
scope_depths->append(0);
} else {
scope_depths->append(handler->scope_count());
- }
+ }
pcos->append(handler->entry_pco());
// stop processing once we hit a catch any
if (handler->is_catch_all()) {
assert(i == handlers->length() - 1, "catch all must be last handler");
- }
+ }
+ prev_scope = handler->scope_count();
}
exception_handler_table()->add_subtable(info->pco(), bcis, scope_depths, pcos);
}
diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp
index a9c073d32b0..757c5e79274 100644
--- a/src/hotspot/share/c1/c1_LIR.hpp
+++ b/src/hotspot/share/c1/c1_LIR.hpp
@@ -1913,6 +1913,12 @@ class LIR_OpProfileCall : public LIR_Op {
virtual void emit_code(LIR_Assembler* masm);
virtual LIR_OpProfileCall* as_OpProfileCall() { return this; }
virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+ bool should_profile_receiver_type() const {
+ bool callee_is_static = _profiled_callee->is_loaded() && _profiled_callee->is_static();
+ Bytecodes::Code bc = _profiled_method->java_code_at_bci(_profiled_bci);
+ bool call_is_virtual = (bc == Bytecodes::_invokevirtual && !_profiled_callee->can_be_statically_bound()) || bc == Bytecodes::_invokeinterface;
+ return C1ProfileVirtualCalls && call_is_virtual && !callee_is_static;
+ }
};
// LIR_OpProfileType
diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp
index 37bd62a6929..ed64f686a92 100644
--- a/src/hotspot/share/c1/c1_LIRGenerator.cpp
+++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp
@@ -1304,7 +1304,9 @@ void LIRGenerator::do_getClass(Intrinsic* x) {
// FIXME T_ADDRESS should actually be T_METADATA but it can't because the
// meaning of these two is mixed up (see JDK-8026837).
__ move(new LIR_Address(rcvr.result(), oopDesc::klass_offset_in_bytes(), T_ADDRESS), temp, info);
- __ move_wide(new LIR_Address(temp, in_bytes(Klass::java_mirror_offset()), T_OBJECT), result);
+ __ move_wide(new LIR_Address(temp, in_bytes(Klass::java_mirror_offset()), T_ADDRESS), result);
+ // mirror = ((OopHandle)mirror)->resolve();
+ __ move_wide(new LIR_Address(result, T_OBJECT), result);
}
// java.lang.Class::isPrimitive()
diff --git a/src/hotspot/share/c1/c1_Runtime1.cpp b/src/hotspot/share/c1/c1_Runtime1.cpp
index c66a69dd689..e1d502bd5b9 100644
--- a/src/hotspot/share/c1/c1_Runtime1.cpp
+++ b/src/hotspot/share/c1/c1_Runtime1.cpp
@@ -1221,11 +1221,6 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* thread, Runtime1::StubID stub_i
MutexLockerEx ml_code (CodeCache_lock, Mutex::_no_safepoint_check_flag);
nmethod* nm = CodeCache::find_nmethod(caller_frame.pc());
guarantee(nm != NULL, "only nmethods can contain non-perm oops");
- if (!nm->on_scavenge_root_list() &&
- ((mirror.not_null() && mirror()->is_scavengable()) ||
- (appendix.not_null() && appendix->is_scavengable()))) {
- CodeCache::add_scavenge_root_nmethod(nm);
- }
// Since we've patched some oops in the nmethod,
// (re)register it with the heap.
@@ -1377,8 +1372,6 @@ template int obj_arraycopy_work(oopDesc* src, T* src_addr,
// barrier. The assert will fail if this is not the case.
// Note that we use the non-virtual inlineable variant of write_ref_array.
BarrierSet* bs = Universe::heap()->barrier_set();
- assert(bs->has_write_ref_array_opt(), "Barrier set must have ref array opt");
- assert(bs->has_write_ref_array_pre_opt(), "For pre-barrier as well.");
if (src == dst) {
// same object, no check
bs->write_ref_array_pre(dst_addr, length);
diff --git a/src/hotspot/share/ci/ciEnv.cpp b/src/hotspot/share/ci/ciEnv.cpp
index 0c0e9e4e27b..bfde1da61b8 100644
--- a/src/hotspot/share/ci/ciEnv.cpp
+++ b/src/hotspot/share/ci/ciEnv.cpp
@@ -1218,12 +1218,12 @@ void ciEnv::dump_compile_data(outputStream* out) {
method->signature()->as_quoted_ascii(),
entry_bci, comp_level);
if (compiler_data() != NULL) {
- if (is_c2_compile(comp_level)) { // C2 or Shark
+ if (is_c2_compile(comp_level)) {
#ifdef COMPILER2
// Dump C2 inlining data.
((Compile*)compiler_data())->dump_inline_data(out);
#endif
- } else if (is_c1_compile(comp_level)) { // C1
+ } else if (is_c1_compile(comp_level)) {
#ifdef COMPILER1
// Dump C1 inlining data.
((Compilation*)compiler_data())->dump_inline_data(out);
diff --git a/src/hotspot/share/ci/ciInstanceKlass.cpp b/src/hotspot/share/ci/ciInstanceKlass.cpp
index e0b910cc2d3..00f4a3f14dd 100644
--- a/src/hotspot/share/ci/ciInstanceKlass.cpp
+++ b/src/hotspot/share/ci/ciInstanceKlass.cpp
@@ -665,9 +665,8 @@ class StaticFinalFieldPrinter : public FieldClosure {
_out->print_cr("null");
} else if (value->is_instance()) {
if (value->is_a(SystemDictionary::String_klass())) {
- _out->print("\"");
- _out->print_raw(java_lang_String::as_quoted_ascii(value));
- _out->print_cr("\"");
+ const char* ascii_value = java_lang_String::as_quoted_ascii(value);
+ _out->print("\"%s\"", (ascii_value != NULL) ? ascii_value : "");
} else {
const char* klass_name = value->klass()->name()->as_quoted_ascii();
_out->print_cr("%s", klass_name);
diff --git a/src/hotspot/share/ci/ciMethod.cpp b/src/hotspot/share/ci/ciMethod.cpp
index a960f17db52..d878fa93bab 100644
--- a/src/hotspot/share/ci/ciMethod.cpp
+++ b/src/hotspot/share/ci/ciMethod.cpp
@@ -53,10 +53,6 @@
#include "ci/ciTypeFlow.hpp"
#include "oops/method.hpp"
#endif
-#ifdef SHARK
-#include "ci/ciTypeFlow.hpp"
-#include "oops/method.hpp"
-#endif
// ciMethod
//
@@ -97,10 +93,10 @@ ciMethod::ciMethod(const methodHandle& h_m, ciInstanceKlass* holder) :
_exception_handlers = NULL;
_liveness = NULL;
_method_blocks = NULL;
-#if defined(COMPILER2) || defined(SHARK)
+#if defined(COMPILER2)
_flow = NULL;
_bcea = NULL;
-#endif // COMPILER2 || SHARK
+#endif // COMPILER2
ciEnv *env = CURRENT_ENV;
if (env->jvmti_can_hotswap_or_post_breakpoint() && can_be_compiled()) {
@@ -173,12 +169,12 @@ ciMethod::ciMethod(ciInstanceKlass* holder,
_can_be_statically_bound(false),
_method_blocks( NULL),
_method_data( NULL)
-#if defined(COMPILER2) || defined(SHARK)
+#if defined(COMPILER2)
,
_flow( NULL),
_bcea( NULL),
_instructions_size(-1)
-#endif // COMPILER2 || SHARK
+#endif // COMPILER2
{
// Usually holder and accessor are the same type but in some cases
// the holder has the wrong class loader (e.g. invokedynamic call
@@ -287,23 +283,6 @@ int ciMethod::vtable_index() {
}
-#ifdef SHARK
-// ------------------------------------------------------------------
-// ciMethod::itable_index
-//
-// Get the position of this method's entry in the itable, if any.
-int ciMethod::itable_index() {
- check_is_loaded();
- assert(holder()->is_linked(), "must be linked");
- VM_ENTRY_MARK;
- Method* m = get_Method();
- if (!m->has_itable_index())
- return Method::nonvirtual_vtable_index;
- return m->itable_index();
-}
-#endif // SHARK
-
-
// ------------------------------------------------------------------
// ciMethod::native_entry
//
@@ -369,34 +348,34 @@ bool ciMethod::has_balanced_monitors() {
// ------------------------------------------------------------------
// ciMethod::get_flow_analysis
ciTypeFlow* ciMethod::get_flow_analysis() {
-#if defined(COMPILER2) || defined(SHARK)
+#if defined(COMPILER2)
if (_flow == NULL) {
ciEnv* env = CURRENT_ENV;
_flow = new (env->arena()) ciTypeFlow(env, this);
_flow->do_flow();
}
return _flow;
-#else // COMPILER2 || SHARK
+#else // COMPILER2
ShouldNotReachHere();
return NULL;
-#endif // COMPILER2 || SHARK
+#endif // COMPILER2
}
// ------------------------------------------------------------------
// ciMethod::get_osr_flow_analysis
ciTypeFlow* ciMethod::get_osr_flow_analysis(int osr_bci) {
-#if defined(COMPILER2) || defined(SHARK)
+#if defined(COMPILER2)
// OSR entry points are always place after a call bytecode of some sort
assert(osr_bci >= 0, "must supply valid OSR entry point");
ciEnv* env = CURRENT_ENV;
ciTypeFlow* flow = new (env->arena()) ciTypeFlow(env, this, osr_bci);
flow->do_flow();
return flow;
-#else // COMPILER2 || SHARK
+#else // COMPILER2
ShouldNotReachHere();
return NULL;
-#endif // COMPILER2 || SHARK
+#endif // COMPILER2
}
// ------------------------------------------------------------------
diff --git a/src/hotspot/share/ci/ciMethod.hpp b/src/hotspot/share/ci/ciMethod.hpp
index 2d55771f131..bc77829a163 100644
--- a/src/hotspot/share/ci/ciMethod.hpp
+++ b/src/hotspot/share/ci/ciMethod.hpp
@@ -96,7 +96,7 @@ class ciMethod : public ciMetadata {
// Optional liveness analyzer.
MethodLiveness* _liveness;
-#if defined(COMPILER2) || defined(SHARK)
+#if defined(COMPILER2)
ciTypeFlow* _flow;
BCEscapeAnalyzer* _bcea;
#endif
@@ -216,9 +216,6 @@ class ciMethod : public ciMetadata {
// Runtime information.
int vtable_index();
-#ifdef SHARK
- int itable_index();
-#endif // SHARK
address native_entry();
address interpreter_entry();
diff --git a/src/hotspot/share/ci/ciTypeFlow.hpp b/src/hotspot/share/ci/ciTypeFlow.hpp
index 381fff86a14..f9ffaafb93e 100644
--- a/src/hotspot/share/ci/ciTypeFlow.hpp
+++ b/src/hotspot/share/ci/ciTypeFlow.hpp
@@ -30,12 +30,6 @@
#include "ci/ciKlass.hpp"
#include "ci/ciMethodBlocks.hpp"
#endif
-#ifdef SHARK
-#include "ci/ciEnv.hpp"
-#include "ci/ciKlass.hpp"
-#include "ci/ciMethodBlocks.hpp"
-#include "shark/shark_globals.hpp"
-#endif
class ciTypeFlow : public ResourceObj {
diff --git a/src/hotspot/share/classfile/altHashing.hpp b/src/hotspot/share/classfile/altHashing.hpp
index 43af02f39ce..15c6a34b25a 100644
--- a/src/hotspot/share/classfile/altHashing.hpp
+++ b/src/hotspot/share/classfile/altHashing.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
#ifndef SHARE_VM_CLASSFILE_ALTHASHING_HPP
#define SHARE_VM_CLASSFILE_ALTHASHING_HPP
-#include "prims/jni.h"
+#include "jni.h"
#include "classfile/symbolTable.hpp"
/**
diff --git a/src/hotspot/share/classfile/classFileParser.cpp b/src/hotspot/share/classfile/classFileParser.cpp
index 369dd26f04c..73ab562145e 100644
--- a/src/hotspot/share/classfile/classFileParser.cpp
+++ b/src/hotspot/share/classfile/classFileParser.cpp
@@ -5924,20 +5924,31 @@ void ClassFileParser::parse_stream(const ClassFileStream* const stream,
#if INCLUDE_CDS
if (DumpLoadedClassList != NULL && stream->source() != NULL && classlist_file->is_open()) {
- // Only dump the classes that can be stored into CDS archive.
- // Anonymous classes such as generated LambdaForm classes are also not included.
- if (SystemDictionaryShared::is_sharing_possible(_loader_data) &&
+ if (!ClassLoader::has_jrt_entry()) {
+ warning("DumpLoadedClassList and CDS are not supported in exploded build");
+ DumpLoadedClassList = NULL;
+ } else if (SystemDictionaryShared::is_sharing_possible(_loader_data) &&
_host_klass == NULL) {
+ // Only dump the classes that can be stored into CDS archive.
+ // Anonymous classes such as generated LambdaForm classes are also not included.
oop class_loader = _loader_data->class_loader();
ResourceMark rm(THREAD);
- // For the boot and platform class loaders, check if the class is not found in the
- // java runtime image. Additional check for the boot class loader is if the class
- // is not found in the boot loader's appended entries. This indicates that the class
- // is not useable during run time, such as the ones found in the --patch-module entries,
- // so it should not be included in the classlist file.
- if (((class_loader == NULL && !ClassLoader::contains_append_entry(stream->source())) ||
- SystemDictionary::is_platform_class_loader(class_loader)) &&
- !ClassLoader::is_jrt(stream->source())) {
+ bool skip = false;
+ if (class_loader == NULL || SystemDictionary::is_platform_class_loader(class_loader)) {
+ // For the boot and platform class loaders, skip classes that are not found in the
+ // java runtime image, such as those found in the --patch-module entries.
+ // These classes can't be loaded from the archive during runtime.
+ if (!ClassLoader::is_modules_image(stream->source()) && strncmp(stream->source(), "jrt:", 4) != 0) {
+ skip = true;
+ }
+
+ if (class_loader == NULL && ClassLoader::contains_append_entry(stream->source())) {
+ // .. but don't skip the boot classes that are loaded from -Xbootclasspath/a
+ // as they can be loaded from the archive during runtime.
+ skip = false;
+ }
+ }
+ if (skip) {
tty->print_cr("skip writing class %s from source %s to classlist file",
_class_name->as_C_string(), stream->source());
} else {
diff --git a/src/hotspot/share/classfile/classLoader.cpp b/src/hotspot/share/classfile/classLoader.cpp
index 8592616311f..56bfed2b268 100644
--- a/src/hotspot/share/classfile/classLoader.cpp
+++ b/src/hotspot/share/classfile/classLoader.cpp
@@ -578,8 +578,8 @@ void ClassPathImageEntry::compile_the_world(Handle loader, TRAPS) {
}
#endif
-bool ClassPathImageEntry::is_jrt() {
- return ClassLoader::is_jrt(name());
+bool ClassPathImageEntry::is_modules_image() const {
+ return ClassLoader::is_modules_image(name());
}
#if INCLUDE_CDS
@@ -795,14 +795,13 @@ void ClassLoader::setup_search_path(const char *class_path, bool bootstrap_searc
// Check for a jimage
if (Arguments::has_jimage()) {
assert(_jrt_entry == NULL, "should not setup bootstrap class search path twice");
- assert(new_entry != NULL && new_entry->is_jrt(), "No java runtime image present");
+ assert(new_entry != NULL && new_entry->is_modules_image(), "No java runtime image present");
_jrt_entry = new_entry;
++_num_entries;
#if INCLUDE_CDS
if (DumpSharedSpaces) {
JImageFile *jimage = _jrt_entry->jimage();
assert(jimage != NULL, "No java runtime image file present");
- ClassLoader::initialize_module_loader_map(jimage);
}
#endif
}
@@ -1144,61 +1143,6 @@ int ClassLoader::crc32(int crc, const char* buf, int len) {
return (*Crc32)(crc, (const jbyte*)buf, len);
}
-#if INCLUDE_CDS
-void ClassLoader::initialize_module_loader_map(JImageFile* jimage) {
- if (!DumpSharedSpaces) {
- return; // only needed for CDS dump time
- }
-
- ResourceMark rm;
- jlong size;
- JImageLocationRef location = (*JImageFindResource)(jimage, JAVA_BASE_NAME, get_jimage_version_string(), MODULE_LOADER_MAP, &size);
- if (location == 0) {
- vm_exit_during_initialization(
- "Cannot find ModuleLoaderMap location from modules jimage.", NULL);
- }
- char* buffer = NEW_RESOURCE_ARRAY(char, size + 1);
- buffer[size] = '\0';
- jlong read = (*JImageGetResource)(jimage, location, buffer, size);
- if (read != size) {
- vm_exit_during_initialization(
- "Cannot find ModuleLoaderMap resource from modules jimage.", NULL);
- }
- char* char_buf = (char*)buffer;
- int buflen = (int)strlen(char_buf);
- char* begin_ptr = char_buf;
- char* end_ptr = strchr(begin_ptr, '\n');
- bool process_boot_modules = false;
- _boot_modules_array = new (ResourceObj::C_HEAP, mtModule)
- GrowableArray(INITIAL_BOOT_MODULES_ARRAY_SIZE, true);
- _platform_modules_array = new (ResourceObj::C_HEAP, mtModule)
- GrowableArray(INITIAL_PLATFORM_MODULES_ARRAY_SIZE, true);
- while (end_ptr != NULL && (end_ptr - char_buf) < buflen) {
- // Allocate a buffer from the C heap to be appended to the _boot_modules_array
- // or the _platform_modules_array.
- char* temp_name = NEW_C_HEAP_ARRAY(char, (size_t)(end_ptr - begin_ptr + 1), mtInternal);
- strncpy(temp_name, begin_ptr, end_ptr - begin_ptr);
- temp_name[end_ptr - begin_ptr] = '\0';
- if (strncmp(temp_name, "BOOT", 4) == 0) {
- process_boot_modules = true;
- FREE_C_HEAP_ARRAY(char, temp_name);
- } else if (strncmp(temp_name, "PLATFORM", 8) == 0) {
- process_boot_modules = false;
- FREE_C_HEAP_ARRAY(char, temp_name);
- } else {
- // module name
- if (process_boot_modules) {
- _boot_modules_array->append(temp_name);
- } else {
- _platform_modules_array->append(temp_name);
- }
- }
- begin_ptr = ++end_ptr;
- end_ptr = strchr(begin_ptr, '\n');
- }
-}
-#endif
-
// Function add_package extracts the package from the fully qualified class name
// and checks if the package is in the boot loader's package entry table. If so,
// then it sets the classpath_index in the package entry record.
@@ -1290,58 +1234,6 @@ objArrayOop ClassLoader::get_system_packages(TRAPS) {
return result();
}
-#if INCLUDE_CDS
-s2 ClassLoader::module_to_classloader(const char* module_name) {
-
- assert(DumpSharedSpaces, "dump time only");
- assert(_boot_modules_array != NULL, "_boot_modules_array is NULL");
- assert(_platform_modules_array != NULL, "_platform_modules_array is NULL");
-
- int array_size = _boot_modules_array->length();
- for (int i = 0; i < array_size; i++) {
- if (strcmp(module_name, _boot_modules_array->at(i)) == 0) {
- return BOOT_LOADER;
- }
- }
-
- array_size = _platform_modules_array->length();
- for (int i = 0; i < array_size; i++) {
- if (strcmp(module_name, _platform_modules_array->at(i)) == 0) {
- return PLATFORM_LOADER;
- }
- }
-
- return APP_LOADER;
-}
-
-s2 ClassLoader::classloader_type(Symbol* class_name, ClassPathEntry* e, int classpath_index, TRAPS) {
- assert(DumpSharedSpaces, "Only used for CDS dump time");
-
- // obtain the classloader type based on the class name.
- // First obtain the package name based on the class name. Then obtain
- // the classloader type based on the package name from the jimage using
- // a jimage API. If the classloader type cannot be found from the
- // jimage, it is determined by the class path entry.
- jshort loader_type = ClassLoader::APP_LOADER;
- if (e->is_jrt()) {
- ResourceMark rm;
- TempNewSymbol pkg_name = InstanceKlass::package_from_name(class_name, CHECK_0);
- if (pkg_name != NULL) {
- const char* pkg_name_C_string = (const char*)(pkg_name->as_C_string());
- ClassPathImageEntry* cpie = (ClassPathImageEntry*)e;
- JImageFile* jimage = cpie->jimage();
- char* module_name = (char*)(*JImagePackageToModule)(jimage, pkg_name_C_string);
- if (module_name != NULL) {
- loader_type = ClassLoader::module_to_classloader(module_name);
- }
- }
- } else if (ClassLoaderExt::is_boot_classpath(classpath_index)) {
- loader_type = ClassLoader::BOOT_LOADER;
- }
- return loader_type;
-}
-#endif
-
// caller needs ResourceMark
const char* ClassLoader::file_name_for_class_name(const char* class_name,
int class_name_len) {
@@ -1954,7 +1846,7 @@ void ClassLoader::compile_the_world() {
// Iterate over all bootstrap class path appended entries
ClassPathEntry* e = _first_append_entry;
while (e != NULL) {
- assert(!e->is_jrt(), "A modular java runtime image is present on the list of appended entries");
+ assert(!e->is_modules_image(), "A modular java runtime image is present on the list of appended entries");
e->compile_the_world(system_class_loader, CATCH);
e = e->next();
}
diff --git a/src/hotspot/share/classfile/classLoader.hpp b/src/hotspot/share/classfile/classLoader.hpp
index b5633962008..deb3d0ecc23 100644
--- a/src/hotspot/share/classfile/classLoader.hpp
+++ b/src/hotspot/share/classfile/classLoader.hpp
@@ -37,13 +37,6 @@
// Name of boot "modules" image
#define MODULES_IMAGE_NAME "modules"
-// Name of the resource containing mapping from module names to defining class loader type
-#define MODULE_LOADER_MAP "jdk/internal/vm/cds/resources/ModuleLoaderMap.dat"
-
-// Initial sizes of the following arrays are based on the generated ModuleLoaderMap.dat
-#define INITIAL_BOOT_MODULES_ARRAY_SIZE 30
-#define INITIAL_PLATFORM_MODULES_ARRAY_SIZE 15
-
// Class path entry (directory or zip file)
class JImageFile;
@@ -55,15 +48,13 @@ private:
ClassPathEntry* volatile _next;
public:
// Next entry in class path
- ClassPathEntry* next() const {
- return (ClassPathEntry*) OrderAccess::load_ptr_acquire(&_next);
- }
+ ClassPathEntry* next() const { return OrderAccess::load_acquire(&_next); }
virtual ~ClassPathEntry() {}
void set_next(ClassPathEntry* next) {
// may have unlocked readers, so ensure visibility.
- OrderAccess::release_store_ptr(&_next, next);
+ OrderAccess::release_store(&_next, next);
}
- virtual bool is_jrt() = 0;
+ virtual bool is_modules_image() const = 0;
virtual bool is_jar_file() const = 0;
virtual const char* name() const = 0;
virtual JImageFile* jimage() const = 0;
@@ -80,7 +71,7 @@ class ClassPathDirEntry: public ClassPathEntry {
private:
const char* _dir; // Name of directory
public:
- bool is_jrt() { return false; }
+ bool is_modules_image() const { return false; }
bool is_jar_file() const { return false; }
const char* name() const { return _dir; }
JImageFile* jimage() const { return NULL; }
@@ -118,7 +109,7 @@ class ClassPathZipEntry: public ClassPathEntry {
u1 _multi_versioned; // indicates if the jar file has multi-versioned entries.
// It can have value of "_unknown", "_yes", or "_no"
public:
- bool is_jrt() { return false; }
+ bool is_modules_image() const { return false; }
bool is_jar_file() const { return true; }
const char* name() const { return _zip_name; }
JImageFile* jimage() const { return NULL; }
@@ -140,7 +131,7 @@ private:
JImageFile* _jimage;
const char* _name;
public:
- bool is_jrt();
+ bool is_modules_image() const;
bool is_jar_file() const { return false; }
bool is_open() const { return _jimage != NULL; }
const char* name() const { return _name == NULL ? "" : _name; }
@@ -403,7 +394,8 @@ class ClassLoader: AllStatic {
static int compute_Object_vtable();
static ClassPathEntry* classpath_entry(int n) {
- assert(n >= 0 && n < _num_entries, "sanity");
+ assert(n >= 0, "sanity");
+ assert(!has_jrt_entry() || n < _num_entries, "sanity");
if (n == 0) {
assert(has_jrt_entry(), "No class path entry at 0 for exploded module builds");
return ClassLoader::_jrt_entry;
@@ -438,10 +430,6 @@ class ClassLoader: AllStatic {
static bool check_shared_paths_misc_info(void* info, int size);
static void exit_with_path_failure(const char* error, const char* message);
- static s2 module_to_classloader(const char* module_name);
- static void initialize_module_loader_map(JImageFile* jimage);
- static s2 classloader_type(Symbol* class_name, ClassPathEntry* e,
- int classpath_index, TRAPS);
static void record_shared_class_loader_type(InstanceKlass* ik, const ClassFileStream* stream);
#endif
static JImageLocationRef jimage_find_resource(JImageFile* jf, const char* module_name,
@@ -479,7 +467,7 @@ class ClassLoader: AllStatic {
// distinguish from a class_name with no package name, as both cases have a NULL return value
static const char* package_from_name(const char* const class_name, bool* bad_class_name = NULL);
- static bool is_jrt(const char* name) { return string_ends_with(name, MODULES_IMAGE_NAME); }
+ static bool is_modules_image(const char* name) { return string_ends_with(name, MODULES_IMAGE_NAME); }
// Debugging
static void verify() PRODUCT_RETURN;
diff --git a/src/hotspot/share/classfile/classLoaderData.cpp b/src/hotspot/share/classfile/classLoaderData.cpp
index 8aaa0ee5044..761782bb35b 100644
--- a/src/hotspot/share/classfile/classLoaderData.cpp
+++ b/src/hotspot/share/classfile/classLoaderData.cpp
@@ -82,11 +82,6 @@
#include "trace/tracing.hpp"
#endif
-// helper function to avoid in-line casts
-template static T* load_ptr_acquire(T* volatile *p) {
- return static_cast(OrderAccess::load_ptr_acquire(p));
-}
-
ClassLoaderData * ClassLoaderData::_the_null_class_loader_data = NULL;
ClassLoaderData::ClassLoaderData(Handle h_class_loader, bool is_anonymous, Dependencies dependencies) :
@@ -98,7 +93,8 @@ ClassLoaderData::ClassLoaderData(Handle h_class_loader, bool is_anonymous, Depen
_keep_alive((is_anonymous || h_class_loader.is_null()) ? 1 : 0),
_metaspace(NULL), _unloading(false), _klasses(NULL),
_modules(NULL), _packages(NULL),
- _claimed(0), _jmethod_ids(NULL), _handles(), _deallocate_list(NULL),
+ _claimed(0), _modified_oops(true), _accumulated_modified_oops(false),
+ _jmethod_ids(NULL), _handles(), _deallocate_list(NULL),
_next(NULL), _dependencies(dependencies),
_metaspace_lock(new Mutex(Monitor::leaf+1, "Metaspace allocation lock", true,
Monitor::_safepoint_check_never)) {
@@ -151,7 +147,7 @@ ClassLoaderData::ChunkedHandleList::~ChunkedHandleList() {
oop* ClassLoaderData::ChunkedHandleList::add(oop o) {
if (_head == NULL || _head->_size == Chunk::CAPACITY) {
Chunk* next = new Chunk(_head);
- OrderAccess::release_store_ptr(&_head, next);
+ OrderAccess::release_store(&_head, next);
}
oop* handle = &_head->_data[_head->_size];
*handle = o;
@@ -168,7 +164,7 @@ inline void ClassLoaderData::ChunkedHandleList::oops_do_chunk(OopClosure* f, Chu
}
void ClassLoaderData::ChunkedHandleList::oops_do(OopClosure* f) {
- Chunk* head = (Chunk*) OrderAccess::load_ptr_acquire(&_head);
+ Chunk* head = OrderAccess::load_acquire(&_head);
if (head != NULL) {
// Must be careful when reading size of head
oops_do_chunk(f, head, OrderAccess::load_acquire(&head->_size));
@@ -207,7 +203,7 @@ bool ClassLoaderData::ChunkedHandleList::contains(oop* p) {
oops_do(&cl);
return cl.found();
}
-#endif
+#endif // ASSERT
bool ClassLoaderData::claim() {
if (_claimed == 1) {
@@ -236,19 +232,19 @@ void ClassLoaderData::dec_keep_alive() {
}
}
-void ClassLoaderData::oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim) {
+void ClassLoaderData::oops_do(OopClosure* f, bool must_claim, bool clear_mod_oops) {
if (must_claim && !claim()) {
return;
}
+ // Only clear modified_oops after the ClassLoaderData is claimed.
+ if (clear_mod_oops) {
+ clear_modified_oops();
+ }
+
f->do_oop(&_class_loader);
_dependencies.oops_do(f);
-
_handles.oops_do(f);
-
- if (klass_closure != NULL) {
- classes_do(klass_closure);
- }
}
void ClassLoaderData::Dependencies::oops_do(OopClosure* f) {
@@ -256,24 +252,24 @@ void ClassLoaderData::Dependencies::oops_do(OopClosure* f) {
}
void ClassLoaderData::classes_do(KlassClosure* klass_closure) {
- // Lock-free access requires load_ptr_acquire
- for (Klass* k = load_ptr_acquire(&_klasses); k != NULL; k = k->next_link()) {
+ // Lock-free access requires load_acquire
+ for (Klass* k = OrderAccess::load_acquire(&_klasses); k != NULL; k = k->next_link()) {
klass_closure->do_klass(k);
assert(k != k->next_link(), "no loops!");
}
}
void ClassLoaderData::classes_do(void f(Klass * const)) {
- // Lock-free access requires load_ptr_acquire
- for (Klass* k = load_ptr_acquire(&_klasses); k != NULL; k = k->next_link()) {
+ // Lock-free access requires load_acquire
+ for (Klass* k = OrderAccess::load_acquire(&_klasses); k != NULL; k = k->next_link()) {
f(k);
assert(k != k->next_link(), "no loops!");
}
}
void ClassLoaderData::methods_do(void f(Method*)) {
- // Lock-free access requires load_ptr_acquire
- for (Klass* k = load_ptr_acquire(&_klasses); k != NULL; k = k->next_link()) {
+ // Lock-free access requires load_acquire
+ for (Klass* k = OrderAccess::load_acquire(&_klasses); k != NULL; k = k->next_link()) {
if (k->is_instance_klass() && InstanceKlass::cast(k)->is_loaded()) {
InstanceKlass::cast(k)->methods_do(f);
}
@@ -281,8 +277,8 @@ void ClassLoaderData::methods_do(void f(Method*)) {
}
void ClassLoaderData::loaded_classes_do(KlassClosure* klass_closure) {
- // Lock-free access requires load_ptr_acquire
- for (Klass* k = load_ptr_acquire(&_klasses); k != NULL; k = k->next_link()) {
+ // Lock-free access requires load_acquire
+ for (Klass* k = OrderAccess::load_acquire(&_klasses); k != NULL; k = k->next_link()) {
// Do not filter ArrayKlass oops here...
if (k->is_array_klass() || (k->is_instance_klass() && InstanceKlass::cast(k)->is_loaded())) {
klass_closure->do_klass(k);
@@ -291,8 +287,8 @@ void ClassLoaderData::loaded_classes_do(KlassClosure* klass_closure) {
}
void ClassLoaderData::classes_do(void f(InstanceKlass*)) {
- // Lock-free access requires load_ptr_acquire
- for (Klass* k = load_ptr_acquire(&_klasses); k != NULL; k = k->next_link()) {
+ // Lock-free access requires load_acquire
+ for (Klass* k = OrderAccess::load_acquire(&_klasses); k != NULL; k = k->next_link()) {
if (k->is_instance_klass()) {
f(InstanceKlass::cast(k));
}
@@ -368,6 +364,9 @@ void ClassLoaderData::record_dependency(const Klass* k, TRAPS) {
// Must handle over GC point.
Handle dependency(THREAD, to);
from_cld->_dependencies.add(dependency, CHECK);
+
+ // Added a potentially young gen oop to the ClassLoaderData
+ record_modified_oops();
}
@@ -445,7 +444,7 @@ void ClassLoaderData::add_class(Klass* k, bool publicize /* true */) {
k->set_next_link(old_value);
// Link the new item into the list, making sure the linked class is stable
// since the list can be walked without a lock
- OrderAccess::release_store_ptr(&_klasses, k);
+ OrderAccess::release_store(&_klasses, k);
}
if (publicize && k->class_loader_data() != NULL) {
@@ -585,8 +584,8 @@ void ClassLoaderData::unload() {
ModuleEntryTable* ClassLoaderData::modules() {
// Lazily create the module entry table at first request.
- // Lock-free access requires load_ptr_acquire.
- ModuleEntryTable* modules = load_ptr_acquire(&_modules);
+ // Lock-free access requires load_acquire.
+ ModuleEntryTable* modules = OrderAccess::load_acquire(&_modules);
if (modules == NULL) {
MutexLocker m1(Module_lock);
// Check if _modules got allocated while we were waiting for this lock.
@@ -596,7 +595,7 @@ ModuleEntryTable* ClassLoaderData::modules() {
{
MutexLockerEx m1(metaspace_lock(), Mutex::_no_safepoint_check_flag);
// Ensure _modules is stable, since it is examined without a lock
- OrderAccess::release_store_ptr(&_modules, modules);
+ OrderAccess::release_store(&_modules, modules);
}
}
}
@@ -733,8 +732,8 @@ Metaspace* ClassLoaderData::metaspace_non_null() {
// to create smaller arena for Reflection class loaders also.
// The reason for the delayed allocation is because some class loaders are
// simply for delegating with no metadata of their own.
- // Lock-free access requires load_ptr_acquire.
- Metaspace* metaspace = load_ptr_acquire(&_metaspace);
+ // Lock-free access requires load_acquire.
+ Metaspace* metaspace = OrderAccess::load_acquire(&_metaspace);
if (metaspace == NULL) {
MutexLockerEx ml(_metaspace_lock, Mutex::_no_safepoint_check_flag);
// Check if _metaspace got allocated while we were waiting for this lock.
@@ -756,7 +755,7 @@ Metaspace* ClassLoaderData::metaspace_non_null() {
metaspace = new Metaspace(_metaspace_lock, Metaspace::StandardMetaspaceType);
}
// Ensure _metaspace is stable, since it is examined without a lock
- OrderAccess::release_store_ptr(&_metaspace, metaspace);
+ OrderAccess::release_store(&_metaspace, metaspace);
}
}
return metaspace;
@@ -764,6 +763,7 @@ Metaspace* ClassLoaderData::metaspace_non_null() {
OopHandle ClassLoaderData::add_handle(Handle h) {
MutexLockerEx ml(metaspace_lock(), Mutex::_no_safepoint_check_flag);
+ record_modified_oops();
return OopHandle(_handles.add(h()));
}
@@ -875,8 +875,7 @@ void ClassLoaderData::dump(outputStream * const out) {
if (Verbose) {
Klass* k = _klasses;
while (k != NULL) {
- out->print_cr("klass " PTR_FORMAT ", %s, CT: %d, MUT: %d", k, k->name()->as_C_string(),
- k->has_modified_oops(), k->has_accumulated_modified_oops());
+ out->print_cr("klass " PTR_FORMAT ", %s", p2i(k), k->name()->as_C_string());
assert(k != k->next_link(), "no loops!");
k = k->next_link();
}
@@ -910,8 +909,8 @@ void ClassLoaderData::verify() {
}
bool ClassLoaderData::contains_klass(Klass* klass) {
- // Lock-free access requires load_ptr_acquire
- for (Klass* k = load_ptr_acquire(&_klasses); k != NULL; k = k->next_link()) {
+ // Lock-free access requires load_acquire
+ for (Klass* k = OrderAccess::load_acquire(&_klasses); k != NULL; k = k->next_link()) {
if (k == klass) return true;
}
return false;
@@ -944,7 +943,7 @@ ClassLoaderData* ClassLoaderDataGraph::add(Handle loader, bool is_anonymous, TRA
if (!is_anonymous) {
ClassLoaderData** cld_addr = java_lang_ClassLoader::loader_data_addr(loader());
// First, Atomically set it
- ClassLoaderData* old = (ClassLoaderData*) Atomic::cmpxchg_ptr(cld, cld_addr, NULL);
+ ClassLoaderData* old = Atomic::cmpxchg(cld, cld_addr, (ClassLoaderData*)NULL);
if (old != NULL) {
delete cld;
// Returns the data.
@@ -959,7 +958,7 @@ ClassLoaderData* ClassLoaderDataGraph::add(Handle loader, bool is_anonymous, TRA
do {
cld->set_next(next);
- ClassLoaderData* exchanged = (ClassLoaderData*)Atomic::cmpxchg_ptr(cld, list_head, next);
+ ClassLoaderData* exchanged = Atomic::cmpxchg(cld, list_head, next);
if (exchanged == next) {
LogTarget(Debug, class, loader, data) lt;
if (lt.is_enabled()) {
@@ -1003,25 +1002,25 @@ void ClassLoaderDataGraph::print_creation(outputStream* out, Handle loader, Clas
}
-void ClassLoaderDataGraph::oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim) {
+void ClassLoaderDataGraph::oops_do(OopClosure* f, bool must_claim) {
for (ClassLoaderData* cld = _head; cld != NULL; cld = cld->next()) {
- cld->oops_do(f, klass_closure, must_claim);
+ cld->oops_do(f, must_claim);
}
}
-void ClassLoaderDataGraph::keep_alive_oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim) {
+void ClassLoaderDataGraph::keep_alive_oops_do(OopClosure* f, bool must_claim) {
for (ClassLoaderData* cld = _head; cld != NULL; cld = cld->next()) {
if (cld->keep_alive()) {
- cld->oops_do(f, klass_closure, must_claim);
+ cld->oops_do(f, must_claim);
}
}
}
-void ClassLoaderDataGraph::always_strong_oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim) {
+void ClassLoaderDataGraph::always_strong_oops_do(OopClosure* f, bool must_claim) {
if (ClassUnloading) {
- keep_alive_oops_do(f, klass_closure, must_claim);
+ keep_alive_oops_do(f, must_claim);
} else {
- oops_do(f, klass_closure, must_claim);
+ oops_do(f, must_claim);
}
}
@@ -1383,7 +1382,7 @@ Klass* ClassLoaderDataGraphKlassIteratorAtomic::next_klass() {
while (head != NULL) {
Klass* next = next_klass_in_cldg(head);
- Klass* old_head = (Klass*)Atomic::cmpxchg_ptr(next, &_next_klass, head);
+ Klass* old_head = Atomic::cmpxchg(next, &_next_klass, head);
if (old_head == head) {
return head; // Won the CAS.
diff --git a/src/hotspot/share/classfile/classLoaderData.hpp b/src/hotspot/share/classfile/classLoaderData.hpp
index 524c985dea2..7c912ba57bb 100644
--- a/src/hotspot/share/classfile/classLoaderData.hpp
+++ b/src/hotspot/share/classfile/classLoaderData.hpp
@@ -87,9 +87,9 @@ class ClassLoaderDataGraph : public AllStatic {
static void purge();
static void clear_claimed_marks();
// oops do
- static void oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim);
- static void keep_alive_oops_do(OopClosure* blk, KlassClosure* klass_closure, bool must_claim);
- static void always_strong_oops_do(OopClosure* blk, KlassClosure* klass_closure, bool must_claim);
+ static void oops_do(OopClosure* f, bool must_claim);
+ static void keep_alive_oops_do(OopClosure* blk, bool must_claim);
+ static void always_strong_oops_do(OopClosure* blk, bool must_claim);
// cld do
static void cld_do(CLDClosure* cl);
static void cld_unloading_do(CLDClosure* cl);
@@ -194,7 +194,7 @@ class ClassLoaderData : public CHeapObj {
Chunk(Chunk* c) : _next(c), _size(0) { }
};
- Chunk* _head;
+ Chunk* volatile _head;
void oops_do_chunk(OopClosure* f, Chunk* c, const juint size);
@@ -230,10 +230,16 @@ class ClassLoaderData : public CHeapObj {
Mutex* _metaspace_lock; // Locks the metaspace for allocations and setup.
bool _unloading; // true if this class loader goes away
bool _is_anonymous; // if this CLD is for an anonymous class
+
+ // Remembered sets support for the oops in the class loader data.
+ bool _modified_oops; // Card Table Equivalent (YC/CMS support)
+ bool _accumulated_modified_oops; // Mod Union Equivalent (CMS support)
+
s2 _keep_alive; // if this CLD is kept alive without a keep_alive_object().
// Used for anonymous classes and the boot class
// loader. _keep_alive does not need to be volatile or
// atomic since there is one unique CLD per anonymous class.
+
volatile int _claimed; // true if claimed, for example during GC traces.
// To avoid applying oop closure more than once.
// Has to be an int because we cas it.
@@ -276,6 +282,19 @@ class ClassLoaderData : public CHeapObj {
bool claimed() const { return _claimed == 1; }
bool claim();
+ // The CLD are not placed in the Heap, so the Card Table or
+ // the Mod Union Table can't be used to mark when CLD have modified oops.
+ // The CT and MUT bits saves this information for the whole class loader data.
+ void clear_modified_oops() { _modified_oops = false; }
+ public:
+ void record_modified_oops() { _modified_oops = true; }
+ bool has_modified_oops() { return _modified_oops; }
+
+ void accumulate_modified_oops() { if (has_modified_oops()) _accumulated_modified_oops = true; }
+ void clear_accumulated_modified_oops() { _accumulated_modified_oops = false; }
+ bool has_accumulated_modified_oops() { return _accumulated_modified_oops; }
+ private:
+
void unload();
bool keep_alive() const { return _keep_alive > 0; }
void classes_do(void f(Klass*));
@@ -346,8 +365,7 @@ class ClassLoaderData : public CHeapObj {
inline unsigned int identity_hash() const { return (unsigned int)(((intptr_t)this) >> 3); }
- // Used when tracing from klasses.
- void oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim);
+ void oops_do(OopClosure* f, bool must_claim, bool clear_modified_oops = false);
void classes_do(KlassClosure* klass_closure);
Klass* klasses() { return _klasses; }
diff --git a/src/hotspot/share/classfile/defaultMethods.cpp b/src/hotspot/share/classfile/defaultMethods.cpp
index 596b567db49..b09d552bcf4 100644
--- a/src/hotspot/share/classfile/defaultMethods.cpp
+++ b/src/hotspot/share/classfile/defaultMethods.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -767,15 +767,14 @@ static void create_default_methods( InstanceKlass* klass,
// This is the guts of the default methods implementation. This is called just
// after the classfile has been parsed if some ancestor has default methods.
//
-// First if finds any name/signature slots that need any implementation (either
+// First it finds any name/signature slots that need any implementation (either
// because they are miranda or a superclass's implementation is an overpass
// itself). For each slot, iterate over the hierarchy, to see if they contain a
// signature that matches the slot we are looking at.
//
-// For each slot filled, we generate an overpass method that either calls the
-// unique default method candidate using invokespecial, or throws an exception
-// (in the case of no default method candidates, or more than one valid
-// candidate). These methods are then added to the class's method list.
+// For each slot filled, we either record the default method candidate in the
+// klass default_methods list or, only to handle exception cases, we create an
+// overpass method that throws an exception and add it to the klass methods list.
// The JVM does not create bridges nor handle generic signatures here.
void DefaultMethods::generate_default_methods(
InstanceKlass* klass, const GrowableArray* mirandas, TRAPS) {
@@ -901,6 +900,11 @@ static void switchover_constant_pool(BytecodeConstantPool* bpool,
// This allows virtual methods to override the overpass, but ensures
// that a local method search will find the exception rather than an abstract
// or default method that is not a valid candidate.
+//
+// Note that if overpass method are ever created that are not exception
+// throwing methods then the loader constraint checking logic for vtable and
+// itable creation needs to be changed to check loader constraints for the
+// overpass methods that do not throw exceptions.
static void create_defaults_and_exceptions(
GrowableArray* slots,
InstanceKlass* klass, TRAPS) {
diff --git a/src/hotspot/share/classfile/dictionary.hpp b/src/hotspot/share/classfile/dictionary.hpp
index 8b3099ab074..b2662a1b4c5 100644
--- a/src/hotspot/share/classfile/dictionary.hpp
+++ b/src/hotspot/share/classfile/dictionary.hpp
@@ -161,10 +161,10 @@ class DictionaryEntry : public HashtableEntry {
void set_pd_set(ProtectionDomainEntry* new_head) { _pd_set = new_head; }
ProtectionDomainEntry* pd_set_acquire() const {
- return (ProtectionDomainEntry*)OrderAccess::load_ptr_acquire(&_pd_set);
+ return OrderAccess::load_acquire(&_pd_set);
}
void release_set_pd_set(ProtectionDomainEntry* new_head) {
- OrderAccess::release_store_ptr(&_pd_set, new_head);
+ OrderAccess::release_store(&_pd_set, new_head);
}
// Tells whether the initiating class' protection domain can access the klass in this entry
diff --git a/src/hotspot/share/classfile/javaClasses.cpp b/src/hotspot/share/classfile/javaClasses.cpp
index 26720f2b258..3499d3e9d01 100644
--- a/src/hotspot/share/classfile/javaClasses.cpp
+++ b/src/hotspot/share/classfile/javaClasses.cpp
@@ -889,7 +889,7 @@ void java_lang_Class::create_mirror(Klass* k, Handle class_loader,
// Setup indirection from klass->mirror
// after any exceptions can happen during allocations.
- k->set_java_mirror(mirror());
+ k->set_java_mirror(mirror);
// Set the module field in the java_lang_Class instance. This must be done
// after the mirror is set.
diff --git a/src/hotspot/share/classfile/jimage.hpp b/src/hotspot/share/classfile/jimage.hpp
index e2268e91510..298bc85d472 100644
--- a/src/hotspot/share/classfile/jimage.hpp
+++ b/src/hotspot/share/classfile/jimage.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -22,7 +22,7 @@
*
*/
-#include "prims/jni.h"
+#include "jni.h"
// Opaque reference to a JImage file.
class JImageFile;
diff --git a/src/hotspot/share/classfile/klassFactory.cpp b/src/hotspot/share/classfile/klassFactory.cpp
index e16c6bd5490..3c687a45b1d 100644
--- a/src/hotspot/share/classfile/klassFactory.cpp
+++ b/src/hotspot/share/classfile/klassFactory.cpp
@@ -223,8 +223,8 @@ InstanceKlass* KlassFactory::create_from_stream(ClassFileStream* stream,
result->set_cached_class_file(cached_class_file);
}
- if (InstanceKlass::should_store_fingerprint()) {
- result->store_fingerprint(!result->is_anonymous() ? stream->compute_fingerprint() : 0);
+ if (result->should_store_fingerprint()) {
+ result->store_fingerprint(stream->compute_fingerprint());
}
TRACE_KLASS_CREATION(result, parser, THREAD);
diff --git a/src/hotspot/share/classfile/moduleEntry.cpp b/src/hotspot/share/classfile/moduleEntry.cpp
index 543800a2864..f12ff916e06 100644
--- a/src/hotspot/share/classfile/moduleEntry.cpp
+++ b/src/hotspot/share/classfile/moduleEntry.cpp
@@ -23,13 +23,13 @@
*/
#include "precompiled.hpp"
+#include "jni.h"
#include "classfile/classLoaderData.hpp"
#include "classfile/javaClasses.hpp"
#include "classfile/moduleEntry.hpp"
#include "logging/log.hpp"
#include "memory/resourceArea.hpp"
#include "oops/symbol.hpp"
-#include "prims/jni.h"
#include "runtime/handles.inline.hpp"
#include "runtime/safepoint.hpp"
#include "trace/traceMacros.hpp"
diff --git a/src/hotspot/share/classfile/moduleEntry.hpp b/src/hotspot/share/classfile/moduleEntry.hpp
index 253f209b22c..b23dadd6f01 100644
--- a/src/hotspot/share/classfile/moduleEntry.hpp
+++ b/src/hotspot/share/classfile/moduleEntry.hpp
@@ -25,11 +25,11 @@
#ifndef SHARE_VM_CLASSFILE_MODULEENTRY_HPP
#define SHARE_VM_CLASSFILE_MODULEENTRY_HPP
+#include "jni.h"
#include "classfile/classLoaderData.hpp"
#include "classfile/vmSymbols.hpp"
#include "oops/oopHandle.hpp"
#include "oops/symbol.hpp"
-#include "prims/jni.h"
#include "runtime/jniHandles.hpp"
#include "runtime/mutexLocker.hpp"
#include "trace/traceMacros.hpp"
diff --git a/src/hotspot/share/classfile/resolutionErrors.hpp b/src/hotspot/share/classfile/resolutionErrors.hpp
index 866a4b122a3..bba235453a3 100644
--- a/src/hotspot/share/classfile/resolutionErrors.hpp
+++ b/src/hotspot/share/classfile/resolutionErrors.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -33,6 +33,12 @@ class ResolutionErrorEntry;
// ResolutionError objects are used to record errors encountered during
// constant pool resolution (JVMS 5.4.3).
+// This value is added to the cpCache index of an invokedynamic instruction when
+// storing the resolution error resulting from that invokedynamic instruction.
+// This prevents issues where the cpCache index is the same as the constant pool
+// index of another entry in the table.
+const int CPCACHE_INDEX_MANGLE_VALUE = 1000000;
+
class ResolutionErrorTable : public Hashtable {
public:
@@ -73,6 +79,14 @@ public:
// RedefineClasses support - remove obsolete constant pool entry
void delete_entry(ConstantPool* c);
+
+ // This function is used to encode an index to differentiate it from a
+ // constant pool index. It assumes it is being called with a cpCache index
+ // (that is less than 0).
+ static int encode_cpcache_index(int index) {
+ assert(index < 0, "Unexpected non-negative cpCache index");
+ return index + CPCACHE_INDEX_MANGLE_VALUE;
+ }
};
diff --git a/src/hotspot/share/classfile/systemDictionary.cpp b/src/hotspot/share/classfile/systemDictionary.cpp
index ededdc3f2d0..22bf5b037e7 100644
--- a/src/hotspot/share/classfile/systemDictionary.cpp
+++ b/src/hotspot/share/classfile/systemDictionary.cpp
@@ -104,6 +104,7 @@ InstanceKlass* SystemDictionary::_well_known_klasses[SystemDictionary::WKID
InstanceKlass* SystemDictionary::_box_klasses[T_VOID+1] = { NULL /*, NULL...*/ };
oop SystemDictionary::_java_system_loader = NULL;
+oop SystemDictionary::_java_platform_loader = NULL;
bool SystemDictionary::_has_loadClassInternal = false;
bool SystemDictionary::_has_checkPackageAccess = false;
@@ -117,27 +118,38 @@ const int defaultProtectionDomainCacheSize = 1009;
// ----------------------------------------------------------------------------
-// Java-level SystemLoader
+// Java-level SystemLoader and PlatformLoader
oop SystemDictionary::java_system_loader() {
return _java_system_loader;
}
-void SystemDictionary::compute_java_system_loader(TRAPS) {
- Klass* system_klass = WK_KLASS(ClassLoader_klass);
+oop SystemDictionary::java_platform_loader() {
+ return _java_platform_loader;
+}
+
+void SystemDictionary::compute_java_loaders(TRAPS) {
JavaValue result(T_OBJECT);
+ InstanceKlass* class_loader_klass = SystemDictionary::ClassLoader_klass();
JavaCalls::call_static(&result,
- WK_KLASS(ClassLoader_klass),
+ class_loader_klass,
vmSymbols::getSystemClassLoader_name(),
vmSymbols::void_classloader_signature(),
CHECK);
_java_system_loader = (oop)result.get_jobject();
+ JavaCalls::call_static(&result,
+ class_loader_klass,
+ vmSymbols::getPlatformClassLoader_name(),
+ vmSymbols::void_classloader_signature(),
+ CHECK);
+
+ _java_platform_loader = (oop)result.get_jobject();
+
CDS_ONLY(SystemDictionaryShared::initialize(CHECK);)
}
-
ClassLoaderData* SystemDictionary::register_loader(Handle class_loader, TRAPS) {
if (class_loader() == NULL) return ClassLoaderData::the_null_class_loader_data();
return ClassLoaderDataGraph::find_or_create(class_loader, THREAD);
@@ -169,7 +181,7 @@ bool SystemDictionary::is_system_class_loader(oop class_loader) {
return false;
}
return (class_loader->klass() == SystemDictionary::jdk_internal_loader_ClassLoaders_AppClassLoader_klass() ||
- class_loader == _java_system_loader);
+ class_loader == _java_system_loader);
}
// Returns true if the passed class loader is the platform class loader.
@@ -1238,7 +1250,7 @@ bool SystemDictionary::is_shared_class_visible(Symbol* class_name,
SharedClassPathEntry* ent =
(SharedClassPathEntry*)FileMapInfo::shared_classpath(path_index);
if (!Universe::is_module_initialized()) {
- assert(ent != NULL && ent->is_jrt(),
+ assert(ent != NULL && ent->is_modules_image(),
"Loading non-bootstrap classes before the module system is initialized");
assert(class_loader.is_null(), "sanity");
return true;
@@ -1274,7 +1286,7 @@ bool SystemDictionary::is_shared_class_visible(Symbol* class_name,
if (mod_entry != NULL) {
// PackageEntry/ModuleEntry is found in the classloader. Check if the
// ModuleEntry's location agrees with the archived class' origination.
- if (ent->is_jrt() && mod_entry->location()->starts_with("jrt:")) {
+ if (ent->is_modules_image() && mod_entry->location()->starts_with("jrt:")) {
return true; // Module class from the "module" jimage
}
}
@@ -1285,7 +1297,7 @@ bool SystemDictionary::is_shared_class_visible(Symbol* class_name,
// 1. the class is from the unamed package
// 2. or, the class is not from a module defined in the NULL classloader
// 3. or, the class is from an unamed module
- if (!ent->is_jrt() && ik->is_shared_boot_class()) {
+ if (!ent->is_modules_image() && ik->is_shared_boot_class()) {
// the class is from the -Xbootclasspath/a
if (pkg_string == NULL ||
pkg_entry == NULL ||
@@ -1940,6 +1952,7 @@ bool SystemDictionary::do_unloading(BoolObjectClosure* is_alive,
void SystemDictionary::roots_oops_do(OopClosure* strong, OopClosure* weak) {
strong->do_oop(&_java_system_loader);
+ strong->do_oop(&_java_platform_loader);
strong->do_oop(&_system_loader_lock_obj);
CDS_ONLY(SystemDictionaryShared::roots_oops_do(strong);)
@@ -1964,6 +1977,7 @@ void SystemDictionary::roots_oops_do(OopClosure* strong, OopClosure* weak) {
void SystemDictionary::oops_do(OopClosure* f) {
f->do_oop(&_java_system_loader);
+ f->do_oop(&_java_platform_loader);
f->do_oop(&_system_loader_lock_obj);
CDS_ONLY(SystemDictionaryShared::oops_do(f);)
diff --git a/src/hotspot/share/classfile/systemDictionary.hpp b/src/hotspot/share/classfile/systemDictionary.hpp
index 4bd30563527..17da32a2859 100644
--- a/src/hotspot/share/classfile/systemDictionary.hpp
+++ b/src/hotspot/share/classfile/systemDictionary.hpp
@@ -484,11 +484,14 @@ public:
static bool Object_klass_loaded() { return WK_KLASS(Object_klass) != NULL; }
static bool ClassLoader_klass_loaded() { return WK_KLASS(ClassLoader_klass) != NULL; }
- // Returns default system loader
+ // Returns java system loader
static oop java_system_loader();
- // Compute the default system loader
- static void compute_java_system_loader(TRAPS);
+ // Returns java platform loader
+ static oop java_platform_loader();
+
+ // Compute the java system and platform loaders
+ static void compute_java_loaders(TRAPS);
// Register a new class loader
static ClassLoaderData* register_loader(Handle class_loader, TRAPS);
@@ -700,6 +703,7 @@ protected:
static InstanceKlass* _box_klasses[T_VOID+1];
static oop _java_system_loader;
+ static oop _java_platform_loader;
static bool _has_loadClassInternal;
static bool _has_checkPackageAccess;
diff --git a/src/hotspot/share/classfile/verifier.cpp b/src/hotspot/share/classfile/verifier.cpp
index 37daeb5547f..ef4a749ff58 100644
--- a/src/hotspot/share/classfile/verifier.cpp
+++ b/src/hotspot/share/classfile/verifier.cpp
@@ -69,14 +69,14 @@ static void* volatile _verify_byte_codes_fn = NULL;
static volatile jint _is_new_verify_byte_codes_fn = (jint) true;
static void* verify_byte_codes_fn() {
- if (OrderAccess::load_ptr_acquire(&_verify_byte_codes_fn) == NULL) {
+ if (OrderAccess::load_acquire(&_verify_byte_codes_fn) == NULL) {
void *lib_handle = os::native_java_library();
void *func = os::dll_lookup(lib_handle, "VerifyClassCodesForMajorVersion");
- OrderAccess::release_store_ptr(&_verify_byte_codes_fn, func);
+ OrderAccess::release_store(&_verify_byte_codes_fn, func);
if (func == NULL) {
_is_new_verify_byte_codes_fn = false;
func = os::dll_lookup(lib_handle, "VerifyClassCodes");
- OrderAccess::release_store_ptr(&_verify_byte_codes_fn, func);
+ OrderAccess::release_store(&_verify_byte_codes_fn, func);
}
}
return (void*)_verify_byte_codes_fn;
diff --git a/src/hotspot/share/classfile/vmSymbols.hpp b/src/hotspot/share/classfile/vmSymbols.hpp
index 6bd7412cfb2..88f03e5a205 100644
--- a/src/hotspot/share/classfile/vmSymbols.hpp
+++ b/src/hotspot/share/classfile/vmSymbols.hpp
@@ -371,6 +371,7 @@
template(deadChild_name, "deadChild") \
template(getFromClass_name, "getFromClass") \
template(dispatch_name, "dispatch") \
+ template(getPlatformClassLoader_name, "getPlatformClassLoader") \
template(getSystemClassLoader_name, "getSystemClassLoader") \
template(fillInStackTrace_name, "fillInStackTrace") \
template(getCause_name, "getCause") \
@@ -461,6 +462,8 @@
template(getProtectionDomain_signature, "(Ljava/security/CodeSource;)Ljava/security/ProtectionDomain;") \
template(url_code_signer_array_void_signature, "(Ljava/net/URL;[Ljava/security/CodeSigner;)V") \
template(module_entry_name, "module_entry") \
+ template(resolved_references_name, "") \
+ template(init_lock_name, "") \
\
/* name symbols needed by intrinsics */ \
VM_INTRINSICS_DO(VM_INTRINSIC_IGNORE, VM_SYMBOL_IGNORE, template, VM_SYMBOL_IGNORE, VM_ALIAS_IGNORE) \
@@ -779,6 +782,7 @@
do_name(decrementExact_name,"decrementExact") \
do_name(incrementExact_name,"incrementExact") \
do_name(multiplyExact_name,"multiplyExact") \
+ do_name(multiplyHigh_name,"multiplyHigh") \
do_name(negateExact_name,"negateExact") \
do_name(subtractExact_name,"subtractExact") \
do_name(fma_name, "fma") \
@@ -803,6 +807,7 @@
do_intrinsic(_incrementExactL, java_lang_Math, incrementExact_name, long_long_signature, F_S) \
do_intrinsic(_multiplyExactI, java_lang_Math, multiplyExact_name, int2_int_signature, F_S) \
do_intrinsic(_multiplyExactL, java_lang_Math, multiplyExact_name, long2_long_signature, F_S) \
+ do_intrinsic(_multiplyHigh, java_lang_Math, multiplyHigh_name, long2_long_signature, F_S) \
do_intrinsic(_negateExactI, java_lang_Math, negateExact_name, int_int_signature, F_S) \
do_intrinsic(_negateExactL, java_lang_Math, negateExact_name, long_long_signature, F_S) \
do_intrinsic(_subtractExactI, java_lang_Math, subtractExact_name, int2_int_signature, F_S) \
diff --git a/src/hotspot/share/code/codeBlob.hpp b/src/hotspot/share/code/codeBlob.hpp
index 773352b1101..35b356b7d37 100644
--- a/src/hotspot/share/code/codeBlob.hpp
+++ b/src/hotspot/share/code/codeBlob.hpp
@@ -125,7 +125,6 @@ public:
inline bool is_compiled_by_c1() const { return _type == compiler_c1; };
inline bool is_compiled_by_c2() const { return _type == compiler_c2; };
inline bool is_compiled_by_jvmci() const { return _type == compiler_jvmci; };
- inline bool is_compiled_by_shark() const { return _type == compiler_shark; };
const char* compiler_name() const;
// Casting
@@ -157,6 +156,13 @@ public:
int relocation_size() const { return (address) relocation_end() - (address) relocation_begin(); }
int content_size() const { return content_end() - content_begin(); }
int code_size() const { return code_end() - code_begin(); }
+ // Only used from CodeCache::free_unused_tail() after the Interpreter blob was trimmed
+ void adjust_size(size_t used) {
+ _size = (int)used;
+ _data_offset = (int)used;
+ _code_end = (address)this + used;
+ _data_end = (address)this + used;
+ }
// Containment
bool blob_contains(address addr) const { return header_begin() <= addr && addr < data_end(); }
diff --git a/src/hotspot/share/code/codeCache.cpp b/src/hotspot/share/code/codeCache.cpp
index 56c230e3bbf..b49892b1bf0 100644
--- a/src/hotspot/share/code/codeCache.cpp
+++ b/src/hotspot/share/code/codeCache.cpp
@@ -569,6 +569,21 @@ void CodeCache::free(CodeBlob* cb) {
assert(heap->blob_count() >= 0, "sanity check");
}
+void CodeCache::free_unused_tail(CodeBlob* cb, size_t used) {
+ assert_locked_or_safepoint(CodeCache_lock);
+ guarantee(cb->is_buffer_blob() && strncmp("Interpreter", cb->name(), 11) == 0, "Only possible for interpreter!");
+ print_trace("free_unused_tail", cb);
+
+ // We also have to account for the extra space (i.e. header) used by the CodeBlob
+ // which provides the memory (see BufferBlob::create() in codeBlob.cpp).
+ used += CodeBlob::align_code_offset(cb->header_size());
+
+ // Get heap for given CodeBlob and deallocate its unused tail
+ get_code_heap(cb)->deallocate_tail(cb, used);
+ // Adjust the sizes of the CodeBlob
+ cb->adjust_size(used);
+}
+
void CodeCache::commit(CodeBlob* cb) {
// this is called by nmethod::nmethod, which must already own CodeCache_lock
assert_locked_or_safepoint(CodeCache_lock);
@@ -683,22 +698,19 @@ void CodeCache::blobs_do(CodeBlobClosure* f) {
if (cb->is_alive()) {
f->do_code_blob(cb);
#ifdef ASSERT
- if (cb->is_nmethod())
- ((nmethod*)cb)->verify_scavenge_root_oops();
+ if (cb->is_nmethod()) {
+ Universe::heap()->verify_nmethod((nmethod*)cb);
+ }
#endif //ASSERT
}
}
}
}
-// Walk the list of methods which might contain non-perm oops.
+// Walk the list of methods which might contain oops to the java heap.
void CodeCache::scavenge_root_nmethods_do(CodeBlobToOopClosure* f) {
assert_locked_or_safepoint(CodeCache_lock);
- if (UseG1GC) {
- return;
- }
-
const bool fix_relocations = f->fix_relocations();
debug_only(mark_scavenge_root_nmethods());
@@ -735,13 +747,20 @@ void CodeCache::scavenge_root_nmethods_do(CodeBlobToOopClosure* f) {
debug_only(verify_perm_nmethods(NULL));
}
+void CodeCache::register_scavenge_root_nmethod(nmethod* nm) {
+ assert_locked_or_safepoint(CodeCache_lock);
+ if (!nm->on_scavenge_root_list() && nm->detect_scavenge_root_oops()) {
+ add_scavenge_root_nmethod(nm);
+ }
+}
+
+void CodeCache::verify_scavenge_root_nmethod(nmethod* nm) {
+ nm->verify_scavenge_root_oops();
+}
+
void CodeCache::add_scavenge_root_nmethod(nmethod* nm) {
assert_locked_or_safepoint(CodeCache_lock);
- if (UseG1GC) {
- return;
- }
-
nm->set_on_scavenge_root_list();
nm->set_scavenge_root_link(_scavenge_root_nmethods);
set_scavenge_root_nmethods(nm);
@@ -754,8 +773,6 @@ void CodeCache::unlink_scavenge_root_nmethod(nmethod* nm, nmethod* prev) {
assert((prev == NULL && scavenge_root_nmethods() == nm) ||
(prev != NULL && prev->scavenge_root_link() == nm), "precondition");
- assert(!UseG1GC, "G1 does not use the scavenge_root_nmethods list");
-
print_trace("unlink_scavenge_root", nm);
if (prev == NULL) {
set_scavenge_root_nmethods(nm->scavenge_root_link());
@@ -769,10 +786,6 @@ void CodeCache::unlink_scavenge_root_nmethod(nmethod* nm, nmethod* prev) {
void CodeCache::drop_scavenge_root_nmethod(nmethod* nm) {
assert_locked_or_safepoint(CodeCache_lock);
- if (UseG1GC) {
- return;
- }
-
print_trace("drop_scavenge_root", nm);
nmethod* prev = NULL;
for (nmethod* cur = scavenge_root_nmethods(); cur != NULL; cur = cur->scavenge_root_link()) {
@@ -788,10 +801,6 @@ void CodeCache::drop_scavenge_root_nmethod(nmethod* nm) {
void CodeCache::prune_scavenge_root_nmethods() {
assert_locked_or_safepoint(CodeCache_lock);
- if (UseG1GC) {
- return;
- }
-
debug_only(mark_scavenge_root_nmethods());
nmethod* last = NULL;
@@ -820,10 +829,6 @@ void CodeCache::prune_scavenge_root_nmethods() {
#ifndef PRODUCT
void CodeCache::asserted_non_scavengable_nmethods_do(CodeBlobClosure* f) {
- if (UseG1GC) {
- return;
- }
-
// While we are here, verify the integrity of the list.
mark_scavenge_root_nmethods();
for (nmethod* cur = scavenge_root_nmethods(); cur != NULL; cur = cur->scavenge_root_link()) {
@@ -833,7 +838,7 @@ void CodeCache::asserted_non_scavengable_nmethods_do(CodeBlobClosure* f) {
verify_perm_nmethods(f);
}
-// Temporarily mark nmethods that are claimed to be on the non-perm list.
+// Temporarily mark nmethods that are claimed to be on the scavenge list.
void CodeCache::mark_scavenge_root_nmethods() {
NMethodIterator iter;
while(iter.next_alive()) {
@@ -854,7 +859,7 @@ void CodeCache::verify_perm_nmethods(CodeBlobClosure* f_or_null) {
assert(nm->scavenge_root_not_marked(), "must be already processed");
if (nm->on_scavenge_root_list())
call_f = false; // don't show this one to the client
- nm->verify_scavenge_root_oops();
+ Universe::heap()->verify_nmethod(nm);
if (call_f) f_or_null->do_code_blob(nm);
}
}
@@ -1640,4 +1645,3 @@ void CodeCache::log_state(outputStream* st) {
blob_count(), nmethod_count(), adapter_count(),
unallocated_capacity());
}
-
diff --git a/src/hotspot/share/code/codeCache.hpp b/src/hotspot/share/code/codeCache.hpp
index 37e63e7089c..2749acd05b7 100644
--- a/src/hotspot/share/code/codeCache.hpp
+++ b/src/hotspot/share/code/codeCache.hpp
@@ -143,6 +143,7 @@ class CodeCache : AllStatic {
static int alignment_unit(); // guaranteed alignment of all CodeBlobs
static int alignment_offset(); // guaranteed offset of first CodeBlob byte within alignment unit (i.e., allocation header)
static void free(CodeBlob* cb); // frees a CodeBlob
+ static void free_unused_tail(CodeBlob* cb, size_t used); // frees the unused tail of a CodeBlob (only used by TemplateInterpreter::initialize())
static bool contains(void *p); // returns whether p is included
static bool contains(nmethod* nm); // returns whether nm is included
static void blobs_do(void f(CodeBlob* cb)); // iterates over all CodeBlobs
@@ -181,6 +182,10 @@ class CodeCache : AllStatic {
static void scavenge_root_nmethods_do(CodeBlobToOopClosure* f);
static nmethod* scavenge_root_nmethods() { return _scavenge_root_nmethods; }
+ // register_scavenge_root_nmethod() conditionally adds the nmethod to the list
+ // if it is not already on the list and has a scavengeable root
+ static void register_scavenge_root_nmethod(nmethod* nm);
+ static void verify_scavenge_root_nmethod(nmethod* nm);
static void add_scavenge_root_nmethod(nmethod* nm);
static void drop_scavenge_root_nmethod(nmethod* nm);
diff --git a/src/hotspot/share/code/compiledMethod.cpp b/src/hotspot/share/code/compiledMethod.cpp
index 130c32f2138..07f7ac8ab74 100644
--- a/src/hotspot/share/code/compiledMethod.cpp
+++ b/src/hotspot/share/code/compiledMethod.cpp
@@ -294,7 +294,6 @@ int CompiledMethod::verify_icholder_relocations() {
// Method that knows how to preserve outgoing arguments at call. This method must be
// called with a frame corresponding to a Java invoke
void CompiledMethod::preserve_callee_argument_oops(frame fr, const RegisterMap *reg_map, OopClosure* f) {
-#ifndef SHARK
if (method() != NULL && !method()->is_native()) {
address pc = fr.pc();
SimpleScopeDesc ssd(this, pc);
@@ -314,7 +313,6 @@ void CompiledMethod::preserve_callee_argument_oops(frame fr, const RegisterMap *
fr.oops_compiled_arguments_do(signature, has_receiver, has_appendix, reg_map, f);
}
-#endif // !SHARK
}
Method* CompiledMethod::attached_method(address call_instr) {
diff --git a/src/hotspot/share/code/compiledMethod.hpp b/src/hotspot/share/code/compiledMethod.hpp
index 65f64469a0c..0c0bb37a42e 100644
--- a/src/hotspot/share/code/compiledMethod.hpp
+++ b/src/hotspot/share/code/compiledMethod.hpp
@@ -288,7 +288,7 @@ public:
// Note: _exception_cache may be read concurrently. We rely on memory_order_consume here.
ExceptionCache* exception_cache() const { return _exception_cache; }
void set_exception_cache(ExceptionCache *ec) { _exception_cache = ec; }
- void release_set_exception_cache(ExceptionCache *ec) { OrderAccess::release_store_ptr(&_exception_cache, ec); }
+ void release_set_exception_cache(ExceptionCache *ec) { OrderAccess::release_store(&_exception_cache, ec); }
address handler_for_exception_and_pc(Handle exception, address pc);
void add_handler_for_exception_and_pc(Handle exception, address pc, address handler);
void clean_exception_cache(BoolObjectClosure* is_alive);
diff --git a/src/hotspot/share/code/jvmticmlr.h b/src/hotspot/share/code/jvmticmlr.h
deleted file mode 100644
index c2106d3a7dc..00000000000
--- a/src/hotspot/share/code/jvmticmlr.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation. Oracle designates this
- * particular file as subject to the "Classpath" exception as provided
- * by Oracle in the LICENSE file that accompanied this code.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-/*
- * This header file defines the data structures sent by the VM
- * through the JVMTI CompiledMethodLoad callback function via the
- * "void * compile_info" parameter. The memory pointed to by the
- * compile_info parameter may not be referenced after returning from
- * the CompiledMethodLoad callback. These are VM implementation
- * specific data structures that may evolve in future releases. A
- * JVMTI agent should interpret a non-NULL compile_info as a pointer
- * to a region of memory containing a list of records. In a typical
- * usage scenario, a JVMTI agent would cast each record to a
- * jvmtiCompiledMethodLoadRecordHeader, a struct that represents
- * arbitrary information. This struct contains a kind field to indicate
- * the kind of information being passed, and a pointer to the next
- * record. If the kind field indicates inlining information, then the
- * agent would cast the record to a jvmtiCompiledMethodLoadInlineRecord.
- * This record contains an array of PCStackInfo structs, which indicate
- * for every pc address what are the methods on the invocation stack.
- * The "methods" and "bcis" fields in each PCStackInfo struct specify a
- * 1-1 mapping between these inlined methods and their bytecode indices.
- * This can be used to derive the proper source lines of the inlined
- * methods.
- */
-
-#ifndef _JVMTI_CMLR_H_
-#define _JVMTI_CMLR_H_
-
-enum {
- JVMTI_CMLR_MAJOR_VERSION_1 = 0x00000001,
- JVMTI_CMLR_MINOR_VERSION_0 = 0x00000000,
-
- JVMTI_CMLR_MAJOR_VERSION = 0x00000001,
- JVMTI_CMLR_MINOR_VERSION = 0x00000000
-
- /*
- * This comment is for the "JDK import from HotSpot" sanity check:
- * version: 1.0.0
- */
-};
-
-typedef enum {
- JVMTI_CMLR_DUMMY = 1,
- JVMTI_CMLR_INLINE_INFO = 2
-} jvmtiCMLRKind;
-
-/*
- * Record that represents arbitrary information passed through JVMTI
- * CompiledMethodLoadEvent void pointer.
- */
-typedef struct _jvmtiCompiledMethodLoadRecordHeader {
- jvmtiCMLRKind kind; /* id for the kind of info passed in the record */
- jint majorinfoversion; /* major and minor info version values. Init'ed */
- jint minorinfoversion; /* to current version value in jvmtiExport.cpp. */
-
- struct _jvmtiCompiledMethodLoadRecordHeader* next;
-} jvmtiCompiledMethodLoadRecordHeader;
-
-/*
- * Record that gives information about the methods on the compile-time
- * stack at a specific pc address of a compiled method. Each element in
- * the methods array maps to same element in the bcis array.
- */
-typedef struct _PCStackInfo {
- void* pc; /* the pc address for this compiled method */
- jint numstackframes; /* number of methods on the stack */
- jmethodID* methods; /* array of numstackframes method ids */
- jint* bcis; /* array of numstackframes bytecode indices */
-} PCStackInfo;
-
-/*
- * Record that contains inlining information for each pc address of
- * an nmethod.
- */
-typedef struct _jvmtiCompiledMethodLoadInlineRecord {
- jvmtiCompiledMethodLoadRecordHeader header; /* common header for casting */
- jint numpcs; /* number of pc descriptors in this nmethod */
- PCStackInfo* pcinfo; /* array of numpcs pc descriptors */
-} jvmtiCompiledMethodLoadInlineRecord;
-
-/*
- * Dummy record used to test that we can pass records with different
- * information through the void pointer provided that they can be cast
- * to a jvmtiCompiledMethodLoadRecordHeader.
- */
-
-typedef struct _jvmtiCompiledMethodLoadDummyRecord {
- jvmtiCompiledMethodLoadRecordHeader header; /* common header for casting */
- char message[50];
-} jvmtiCompiledMethodLoadDummyRecord;
-
-#endif
diff --git a/src/hotspot/share/code/nmethod.cpp b/src/hotspot/share/code/nmethod.cpp
index 1772feedde1..ba0c418aa2b 100644
--- a/src/hotspot/share/code/nmethod.cpp
+++ b/src/hotspot/share/code/nmethod.cpp
@@ -53,9 +53,6 @@
#include "utilities/events.hpp"
#include "utilities/resourceHash.hpp"
#include "utilities/xmlstream.hpp"
-#ifdef SHARK
-#include "shark/sharkCompiler.hpp"
-#endif
#if INCLUDE_JVMCI
#include "jvmci/jvmciJavaClasses.hpp"
#endif
@@ -200,9 +197,6 @@ static java_nmethod_stats_struct c2_java_nmethod_stats;
#if INCLUDE_JVMCI
static java_nmethod_stats_struct jvmci_java_nmethod_stats;
#endif
-#ifdef SHARK
-static java_nmethod_stats_struct shark_java_nmethod_stats;
-#endif
static java_nmethod_stats_struct unknown_java_nmethod_stats;
static native_nmethod_stats_struct native_nmethod_stats;
@@ -223,11 +217,6 @@ static void note_java_nmethod(nmethod* nm) {
if (nm->is_compiled_by_jvmci()) {
jvmci_java_nmethod_stats.note_nmethod(nm);
} else
-#endif
-#ifdef SHARK
- if (nm->is_compiled_by_shark()) {
- shark_java_nmethod_stats.note_nmethod(nm);
- } else
#endif
{
unknown_java_nmethod_stats.note_nmethod(nm);
@@ -411,11 +400,8 @@ void nmethod::init_defaults() {
_oops_do_mark_link = NULL;
_jmethod_id = NULL;
_osr_link = NULL;
- if (UseG1GC) {
- _unloading_next = NULL;
- } else {
- _scavenge_root_link = NULL;
- }
+ _unloading_next = NULL;
+ _scavenge_root_link = NULL;
_scavenge_root_state = 0;
#if INCLUDE_RTM_OPT
_rtm_state = NoRTM;
@@ -599,12 +585,9 @@ nmethod::nmethod(
code_buffer->copy_code_and_locs_to(this);
code_buffer->copy_values_to(this);
if (ScavengeRootsInCode) {
- if (detect_scavenge_root_oops()) {
- CodeCache::add_scavenge_root_nmethod(this);
- }
Universe::heap()->register_nmethod(this);
}
- debug_only(verify_scavenge_root_oops());
+ debug_only(Universe::heap()->verify_nmethod(this));
CodeCache::commit(this);
}
@@ -754,12 +737,9 @@ nmethod::nmethod(
debug_info->copy_to(this);
dependencies->copy_to(this);
if (ScavengeRootsInCode) {
- if (detect_scavenge_root_oops()) {
- CodeCache::add_scavenge_root_nmethod(this);
- }
Universe::heap()->register_nmethod(this);
}
- debug_only(verify_scavenge_root_oops());
+ debug_only(Universe::heap()->verify_nmethod(this));
CodeCache::commit(this);
@@ -1334,10 +1314,6 @@ void nmethod::flush() {
CodeCache::drop_scavenge_root_nmethod(this);
}
-#ifdef SHARK
- ((SharkCompiler *) compiler())->free_compiled_method(insts_begin());
-#endif // SHARK
-
CodeBlob::flush();
CodeCache::free(this);
}
@@ -1661,20 +1637,16 @@ nmethod* volatile nmethod::_oops_do_mark_nmethods;
// This code must be MP safe, because it is used from parallel GC passes.
bool nmethod::test_set_oops_do_mark() {
assert(nmethod::oops_do_marking_is_active(), "oops_do_marking_prologue must be called");
- nmethod* observed_mark_link = _oops_do_mark_link;
- if (observed_mark_link == NULL) {
+ if (_oops_do_mark_link == NULL) {
// Claim this nmethod for this thread to mark.
- observed_mark_link = (nmethod*)
- Atomic::cmpxchg_ptr(NMETHOD_SENTINEL, &_oops_do_mark_link, NULL);
- if (observed_mark_link == NULL) {
-
+ if (Atomic::cmpxchg(NMETHOD_SENTINEL, &_oops_do_mark_link, (nmethod*)NULL) == NULL) {
// Atomically append this nmethod (now claimed) to the head of the list:
nmethod* observed_mark_nmethods = _oops_do_mark_nmethods;
for (;;) {
nmethod* required_mark_nmethods = observed_mark_nmethods;
_oops_do_mark_link = required_mark_nmethods;
- observed_mark_nmethods = (nmethod*)
- Atomic::cmpxchg_ptr(this, &_oops_do_mark_nmethods, required_mark_nmethods);
+ observed_mark_nmethods =
+ Atomic::cmpxchg(this, &_oops_do_mark_nmethods, required_mark_nmethods);
if (observed_mark_nmethods == required_mark_nmethods)
break;
}
@@ -1690,9 +1662,9 @@ bool nmethod::test_set_oops_do_mark() {
void nmethod::oops_do_marking_prologue() {
if (TraceScavenge) { tty->print_cr("[oops_do_marking_prologue"); }
assert(_oops_do_mark_nmethods == NULL, "must not call oops_do_marking_prologue twice in a row");
- // We use cmpxchg_ptr instead of regular assignment here because the user
+ // We use cmpxchg instead of regular assignment here because the user
// may fork a bunch of threads, and we need them all to see the same state.
- void* observed = Atomic::cmpxchg_ptr(NMETHOD_SENTINEL, &_oops_do_mark_nmethods, NULL);
+ nmethod* observed = Atomic::cmpxchg(NMETHOD_SENTINEL, &_oops_do_mark_nmethods, (nmethod*)NULL);
guarantee(observed == NULL, "no races in this sequential code");
}
@@ -1707,8 +1679,8 @@ void nmethod::oops_do_marking_epilogue() {
NOT_PRODUCT(if (TraceScavenge) cur->print_on(tty, "oops_do, unmark"));
cur = next;
}
- void* required = _oops_do_mark_nmethods;
- void* observed = Atomic::cmpxchg_ptr(NULL, &_oops_do_mark_nmethods, required);
+ nmethod* required = _oops_do_mark_nmethods;
+ nmethod* observed = Atomic::cmpxchg((nmethod*)NULL, &_oops_do_mark_nmethods, required);
guarantee(observed == required, "no races in this sequential code");
if (TraceScavenge) { tty->print_cr("oops_do_marking_epilogue]"); }
}
@@ -2137,7 +2109,7 @@ void nmethod::verify() {
VerifyOopsClosure voc(this);
oops_do(&voc);
assert(voc.ok(), "embedded oops must be OK");
- verify_scavenge_root_oops();
+ Universe::heap()->verify_nmethod(this);
verify_scopes();
}
@@ -2230,10 +2202,6 @@ public:
};
void nmethod::verify_scavenge_root_oops() {
- if (UseG1GC) {
- return;
- }
-
if (!on_scavenge_root_list()) {
// Actually look inside, to verify the claim that it's clean.
DebugScavengeRoot debug_scavenge_root(this);
@@ -2258,8 +2226,6 @@ void nmethod::print() const {
tty->print("(c1) ");
} else if (is_compiled_by_c2()) {
tty->print("(c2) ");
- } else if (is_compiled_by_shark()) {
- tty->print("(shark) ");
} else if (is_compiled_by_jvmci()) {
tty->print("(JVMCI) ");
} else {
@@ -2880,9 +2846,6 @@ void nmethod::print_statistics() {
#endif
#if INCLUDE_JVMCI
jvmci_java_nmethod_stats.print_nmethod_stats("JVMCI");
-#endif
-#ifdef SHARK
- shark_java_nmethod_stats.print_nmethod_stats("Shark");
#endif
unknown_java_nmethod_stats.print_nmethod_stats("Unknown");
DebugInformationRecorder::print_statistics();
diff --git a/src/hotspot/share/code/stubs.cpp b/src/hotspot/share/code/stubs.cpp
index 326fcc12130..56883bc623d 100644
--- a/src/hotspot/share/code/stubs.cpp
+++ b/src/hotspot/share/code/stubs.cpp
@@ -24,6 +24,7 @@
#include "precompiled.hpp"
#include "code/codeBlob.hpp"
+#include "code/codeCache.hpp"
#include "code/stubs.hpp"
#include "memory/allocation.inline.hpp"
#include "oops/oop.inline.hpp"
@@ -89,6 +90,13 @@ StubQueue::~StubQueue() {
Unimplemented();
}
+void StubQueue::deallocate_unused_tail() {
+ CodeBlob* blob = CodeCache::find_blob((void*)_stub_buffer);
+ CodeCache::free_unused_tail(blob, used_space());
+ // Update the limits to the new, trimmed CodeBlob size
+ _buffer_size = blob->content_size();
+ _buffer_limit = blob->content_size();
+}
Stub* StubQueue::stub_containing(address pc) const {
if (contains(pc)) {
diff --git a/src/hotspot/share/code/stubs.hpp b/src/hotspot/share/code/stubs.hpp
index b340b4b61f9..ab84ffaada9 100644
--- a/src/hotspot/share/code/stubs.hpp
+++ b/src/hotspot/share/code/stubs.hpp
@@ -201,12 +201,15 @@ class StubQueue: public CHeapObj {
void remove_first(int n); // remove the first n stubs in the queue
void remove_all(); // remove all stubs in the queue
+ void deallocate_unused_tail(); // deallocate the unused tail of the underlying CodeBlob
+ // only used from TemplateInterpreter::initialize()
// Iteration
static void queues_do(void f(StubQueue* s)); // call f with each StubQueue
void stubs_do(void f(Stub* s)); // call f with all stubs
Stub* first() const { return number_of_stubs() > 0 ? stub_at(_queue_begin) : NULL; }
Stub* next(Stub* s) const { int i = index_of(s) + stub_size(s);
- if (i == _buffer_limit) i = 0;
+ // Only wrap around in the non-contiguous case (see stubss.cpp)
+ if (i == _buffer_limit && _queue_end < _buffer_limit) i = 0;
return (i == _queue_end) ? NULL : stub_at(i);
}
diff --git a/src/hotspot/share/compiler/abstractCompiler.hpp b/src/hotspot/share/compiler/abstractCompiler.hpp
index 958102bb694..52a93ba3c4a 100644
--- a/src/hotspot/share/compiler/abstractCompiler.hpp
+++ b/src/hotspot/share/compiler/abstractCompiler.hpp
@@ -152,7 +152,6 @@ class AbstractCompiler : public CHeapObj {
const bool is_c1() { return _type == compiler_c1; }
const bool is_c2() { return _type == compiler_c2; }
const bool is_jvmci() { return _type == compiler_jvmci; }
- const bool is_shark() { return _type == compiler_shark; }
const CompilerType type() { return _type; }
// Extra tests to identify trivial methods for the tiered compilation policy.
diff --git a/src/hotspot/share/compiler/compileBroker.cpp b/src/hotspot/share/compiler/compileBroker.cpp
index 9dee3fa9fa2..1169842d3d2 100644
--- a/src/hotspot/share/compiler/compileBroker.cpp
+++ b/src/hotspot/share/compiler/compileBroker.cpp
@@ -70,9 +70,6 @@
#ifdef COMPILER2
#include "opto/c2compiler.hpp"
#endif
-#ifdef SHARK
-#include "shark/sharkCompiler.hpp"
-#endif
#ifdef DTRACE_ENABLED
@@ -531,7 +528,6 @@ void CompileBroker::compilation_init(TRAPS) {
if (!UseCompiler) {
return;
}
-#ifndef SHARK
// Set the interface to the current compiler(s).
int c1_count = CompilationPolicy::policy()->compiler_count(CompLevel_simple);
int c2_count = CompilationPolicy::policy()->compiler_count(CompLevel_full_optimization);
@@ -573,13 +569,6 @@ void CompileBroker::compilation_init(TRAPS) {
}
#endif // COMPILER2
-#else // SHARK
- int c1_count = 0;
- int c2_count = 1;
-
- _compilers[1] = new SharkCompiler();
-#endif // SHARK
-
// Start the compiler thread(s) and the sweeper thread
init_compiler_sweeper_threads(c1_count, c2_count);
// totalTime performance counter is always created as it is required
@@ -774,9 +763,9 @@ JavaThread* CompileBroker::make_thread(const char* name, CompileQueue* queue, Co
void CompileBroker::init_compiler_sweeper_threads(int c1_compiler_count, int c2_compiler_count) {
EXCEPTION_MARK;
-#if !defined(ZERO) && !defined(SHARK)
+#if !defined(ZERO)
assert(c2_compiler_count > 0 || c1_compiler_count > 0, "No compilers?");
-#endif // !ZERO && !SHARK
+#endif // !ZERO
// Initialize the compilation queue
if (c2_compiler_count > 0) {
const char* name = JVMCI_ONLY(UseJVMCICompiler ? "JVMCI compile queue" :) "C2 compile queue";
@@ -796,7 +785,6 @@ void CompileBroker::init_compiler_sweeper_threads(int c1_compiler_count, int c2_
// Create a name for our thread.
sprintf(name_buffer, "%s CompilerThread%d", _compilers[1]->name(), i);
CompilerCounters* counters = new CompilerCounters();
- // Shark and C2
make_thread(name_buffer, _c2_compile_queue, counters, _compilers[1], compiler_thread, CHECK);
}
@@ -1100,7 +1088,7 @@ nmethod* CompileBroker::compile_method(const methodHandle& method, int osr_bci,
assert(!HAS_PENDING_EXCEPTION, "No exception should be present");
// some prerequisites that are compiler specific
- if (comp->is_c2() || comp->is_shark()) {
+ if (comp->is_c2()) {
method->constants()->resolve_string_constants(CHECK_AND_CLEAR_NULL);
// Resolve all classes seen in the signature of the method
// we are compiling.
@@ -1490,10 +1478,8 @@ bool CompileBroker::init_compiler_runtime() {
ThreadInVMfromNative tv(thread);
ResetNoHandleMark rnhm;
- if (!comp->is_shark()) {
- // Perform per-thread and global initializations
- comp->initialize();
- }
+ // Perform per-thread and global initializations
+ comp->initialize();
}
if (comp->is_failed()) {
diff --git a/src/hotspot/share/compiler/compileBroker.hpp b/src/hotspot/share/compiler/compileBroker.hpp
index 571ee7020e0..ced793a0c4f 100644
--- a/src/hotspot/share/compiler/compileBroker.hpp
+++ b/src/hotspot/share/compiler/compileBroker.hpp
@@ -332,7 +332,7 @@ public:
static void disable_compilation_forever() {
UseCompiler = false;
AlwaysCompileLoopMethods = false;
- Atomic::xchg(shutdown_compilation, &_should_compile_new_jobs);
+ Atomic::xchg(jint(shutdown_compilation), &_should_compile_new_jobs);
}
static bool is_compilation_disabled_forever() {
diff --git a/src/hotspot/share/compiler/compilerDefinitions.cpp b/src/hotspot/share/compiler/compilerDefinitions.cpp
index bcec3ffa20d..0139ceb25f3 100644
--- a/src/hotspot/share/compiler/compilerDefinitions.cpp
+++ b/src/hotspot/share/compiler/compilerDefinitions.cpp
@@ -31,11 +31,10 @@ const char* compilertype2name_tab[compiler_number_of_types] = {
"",
"c1",
"c2",
- "jvmci",
- "shark"
+ "jvmci"
};
-#if defined(COMPILER2) || defined(SHARK)
+#if defined(COMPILER2)
CompLevel CompLevel_highest_tier = CompLevel_full_optimization; // pure C2 and tiered or JVMCI and tiered
#elif defined(COMPILER1)
CompLevel CompLevel_highest_tier = CompLevel_simple; // pure C1 or JVMCI
@@ -47,7 +46,7 @@ CompLevel CompLevel_highest_tier = CompLevel_none;
CompLevel CompLevel_initial_compile = CompLevel_full_profile; // tiered
#elif defined(COMPILER1) || INCLUDE_JVMCI
CompLevel CompLevel_initial_compile = CompLevel_simple; // pure C1 or JVMCI
-#elif defined(COMPILER2) || defined(SHARK)
+#elif defined(COMPILER2)
CompLevel CompLevel_initial_compile = CompLevel_full_optimization; // pure C2
#else
CompLevel CompLevel_initial_compile = CompLevel_none;
diff --git a/src/hotspot/share/compiler/compilerDefinitions.hpp b/src/hotspot/share/compiler/compilerDefinitions.hpp
index a2378ea1e92..16ae7b0e3af 100644
--- a/src/hotspot/share/compiler/compilerDefinitions.hpp
+++ b/src/hotspot/share/compiler/compilerDefinitions.hpp
@@ -33,7 +33,6 @@ enum CompilerType {
compiler_c1,
compiler_c2,
compiler_jvmci,
- compiler_shark,
compiler_number_of_types
};
@@ -54,7 +53,7 @@ enum CompLevel {
CompLevel_simple = 1, // C1
CompLevel_limited_profile = 2, // C1, invocation & backedge counters
CompLevel_full_profile = 3, // C1, invocation & backedge counters + mdo
- CompLevel_full_optimization = 4 // C2, Shark or JVMCI
+ CompLevel_full_optimization = 4 // C2 or JVMCI
};
extern CompLevel CompLevel_highest_tier;
diff --git a/src/hotspot/share/compiler/compilerDirectives.cpp b/src/hotspot/share/compiler/compilerDirectives.cpp
index 467a89d6e14..e3181e00c12 100644
--- a/src/hotspot/share/compiler/compilerDirectives.cpp
+++ b/src/hotspot/share/compiler/compilerDirectives.cpp
@@ -171,7 +171,7 @@ DirectiveSet* CompilerDirectives::get_for(AbstractCompiler *comp) {
return _c2_store;
} else {
// use c1_store as default
- assert(comp->is_c1() || comp->is_jvmci() || comp->is_shark(), "");
+ assert(comp->is_c1() || comp->is_jvmci(), "");
return _c1_store;
}
}
diff --git a/src/hotspot/share/compiler/disassembler.cpp b/src/hotspot/share/compiler/disassembler.cpp
index d5e6f66f440..bb36edbc58d 100644
--- a/src/hotspot/share/compiler/disassembler.cpp
+++ b/src/hotspot/share/compiler/disassembler.cpp
@@ -35,9 +35,6 @@
#include "runtime/stubCodeGenerator.hpp"
#include "runtime/stubRoutines.hpp"
#include CPU_HEADER(depChecker)
-#ifdef SHARK
-#include "shark/sharkEntry.hpp"
-#endif
void* Disassembler::_library = NULL;
bool Disassembler::_tried_to_load_library = false;
@@ -521,14 +518,8 @@ void Disassembler::decode(nmethod* nm, outputStream* st) {
decode_env env(nm, st);
env.output()->print_cr("----------------------------------------------------------------------");
-#ifdef SHARK
- SharkEntry* entry = (SharkEntry *) nm->code_begin();
- unsigned char* p = entry->code_start();
- unsigned char* end = entry->code_limit();
-#else
unsigned char* p = nm->code_begin();
unsigned char* end = nm->code_end();
-#endif // SHARK
nm->method()->method_holder()->name()->print_symbol_on(env.output());
env.output()->print(".");
diff --git a/src/hotspot/share/compiler/methodMatcher.cpp b/src/hotspot/share/compiler/methodMatcher.cpp
index 8113d859cff..cc84fb53e8b 100644
--- a/src/hotspot/share/compiler/methodMatcher.cpp
+++ b/src/hotspot/share/compiler/methodMatcher.cpp
@@ -96,7 +96,7 @@ bool MethodMatcher::canonicalize(char * line, const char *& error_msg) {
bool have_colon = (colon != NULL);
if (have_colon) {
// Don't allow multiple '::'
- if (colon + 2 != '\0') {
+ if (colon[2] != '\0') {
if (strstr(colon+2, "::")) {
error_msg = "Method pattern only allows one '::' allowed";
return false;
diff --git a/src/hotspot/share/compiler/oopMap.cpp b/src/hotspot/share/compiler/oopMap.cpp
index 7759615c4ce..e412e4bf2d0 100644
--- a/src/hotspot/share/compiler/oopMap.cpp
+++ b/src/hotspot/share/compiler/oopMap.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -30,6 +30,7 @@
#include "compiler/oopMap.hpp"
#include "gc/shared/collectedHeap.hpp"
#include "memory/allocation.inline.hpp"
+#include "memory/iterator.hpp"
#include "memory/resourceArea.hpp"
#include "runtime/frame.inline.hpp"
#include "runtime/signature.hpp"
@@ -40,9 +41,6 @@
#ifdef COMPILER2
#include "opto/optoreg.hpp"
#endif
-#ifdef SPARC
-#include "vmreg_sparc.inline.hpp"
-#endif
// OopMapStream
@@ -266,13 +264,6 @@ OopMap* OopMapSet::find_map_at_offset(int pc_offset) const {
return m;
}
-class DoNothingClosure: public OopClosure {
- public:
- void do_oop(oop* p) {}
- void do_oop(narrowOop* p) {}
-};
-static DoNothingClosure do_nothing;
-
static void add_derived_oop(oop* base, oop* derived) {
#if !defined(TIERED) && !defined(INCLUDE_JVMCI)
COMPILER1_PRESENT(ShouldNotReachHere();)
@@ -313,7 +304,7 @@ static void trace_codeblob_maps(const frame *fr, const RegisterMap *reg_map) {
void OopMapSet::oops_do(const frame *fr, const RegisterMap* reg_map, OopClosure* f) {
// add derived oops to a table
- all_do(fr, reg_map, f, add_derived_oop, &do_nothing);
+ all_do(fr, reg_map, f, add_derived_oop, &do_nothing_cl);
}
diff --git a/src/hotspot/share/gc/cms/cmsHeap.cpp b/src/hotspot/share/gc/cms/cmsHeap.cpp
new file mode 100644
index 00000000000..da344cce24d
--- /dev/null
+++ b/src/hotspot/share/gc/cms/cmsHeap.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc/cms/concurrentMarkSweepThread.hpp"
+#include "gc/cms/cmsHeap.hpp"
+#include "gc/cms/vmCMSOperations.hpp"
+#include "gc/shared/genOopClosures.inline.hpp"
+#include "gc/shared/strongRootsScope.hpp"
+#include "gc/shared/workgroup.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/vmThread.hpp"
+#include "utilities/stack.inline.hpp"
+
+CMSHeap::CMSHeap(GenCollectorPolicy *policy) : GenCollectedHeap(policy) {
+ _workers = new WorkGang("GC Thread", ParallelGCThreads,
+ /* are_GC_task_threads */true,
+ /* are_ConcurrentGC_threads */false);
+ _workers->initialize_workers();
+}
+
+jint CMSHeap::initialize() {
+ jint status = GenCollectedHeap::initialize();
+ if (status != JNI_OK) return status;
+
+ // If we are running CMS, create the collector responsible
+ // for collecting the CMS generations.
+ assert(collector_policy()->is_concurrent_mark_sweep_policy(), "must be CMS policy");
+ if (!create_cms_collector()) {
+ return JNI_ENOMEM;
+ }
+
+ return JNI_OK;
+}
+
+void CMSHeap::check_gen_kinds() {
+ assert(young_gen()->kind() == Generation::ParNew,
+ "Wrong youngest generation type");
+ assert(old_gen()->kind() == Generation::ConcurrentMarkSweep,
+ "Wrong generation kind");
+}
+
+CMSHeap* CMSHeap::heap() {
+ CollectedHeap* heap = Universe::heap();
+ assert(heap != NULL, "Uninitialized access to CMSHeap::heap()");
+ assert(heap->kind() == CollectedHeap::CMSHeap, "Not a CMSHeap");
+ return (CMSHeap*) heap;
+}
+
+void CMSHeap::gc_threads_do(ThreadClosure* tc) const {
+ assert(workers() != NULL, "should have workers here");
+ workers()->threads_do(tc);
+ ConcurrentMarkSweepThread::threads_do(tc);
+}
+
+void CMSHeap::print_gc_threads_on(outputStream* st) const {
+ assert(workers() != NULL, "should have workers here");
+ workers()->print_worker_threads_on(st);
+ ConcurrentMarkSweepThread::print_all_on(st);
+}
+
+void CMSHeap::print_on_error(outputStream* st) const {
+ GenCollectedHeap::print_on_error(st);
+ st->cr();
+ CMSCollector::print_on_error(st);
+}
+
+bool CMSHeap::create_cms_collector() {
+ assert(old_gen()->kind() == Generation::ConcurrentMarkSweep,
+ "Unexpected generation kinds");
+ assert(gen_policy()->is_concurrent_mark_sweep_policy(), "Unexpected policy type");
+ CMSCollector* collector =
+ new CMSCollector((ConcurrentMarkSweepGeneration*) old_gen(),
+ rem_set(),
+ gen_policy()->as_concurrent_mark_sweep_policy());
+
+ if (collector == NULL || !collector->completed_initialization()) {
+ if (collector) {
+ delete collector; // Be nice in embedded situation
+ }
+ vm_shutdown_during_initialization("Could not create CMS collector");
+ return false;
+ }
+ return true; // success
+}
+
+void CMSHeap::collect(GCCause::Cause cause) {
+ if (should_do_concurrent_full_gc(cause)) {
+ // Mostly concurrent full collection.
+ collect_mostly_concurrent(cause);
+ } else {
+ GenCollectedHeap::collect(cause);
+ }
+}
+
+bool CMSHeap::should_do_concurrent_full_gc(GCCause::Cause cause) {
+ switch (cause) {
+ case GCCause::_gc_locker: return GCLockerInvokesConcurrent;
+ case GCCause::_java_lang_system_gc:
+ case GCCause::_dcmd_gc_run: return ExplicitGCInvokesConcurrent;
+ default: return false;
+ }
+}
+
+void CMSHeap::collect_mostly_concurrent(GCCause::Cause cause) {
+ assert(!Heap_lock->owned_by_self(), "Should not own Heap_lock");
+
+ MutexLocker ml(Heap_lock);
+ // Read the GC counts while holding the Heap_lock
+ unsigned int full_gc_count_before = total_full_collections();
+ unsigned int gc_count_before = total_collections();
+ {
+ MutexUnlocker mu(Heap_lock);
+ VM_GenCollectFullConcurrent op(gc_count_before, full_gc_count_before, cause);
+ VMThread::execute(&op);
+ }
+}
+
+void CMSHeap::stop() {
+ ConcurrentMarkSweepThread::cmst()->stop();
+}
+
+void CMSHeap::safepoint_synchronize_begin() {
+ ConcurrentMarkSweepThread::synchronize(false);
+}
+
+void CMSHeap::safepoint_synchronize_end() {
+ ConcurrentMarkSweepThread::desynchronize(false);
+}
+
+void CMSHeap::cms_process_roots(StrongRootsScope* scope,
+ bool young_gen_as_roots,
+ ScanningOption so,
+ bool only_strong_roots,
+ OopsInGenClosure* root_closure,
+ CLDClosure* cld_closure) {
+ MarkingCodeBlobClosure mark_code_closure(root_closure, !CodeBlobToOopClosure::FixRelocations);
+ OopsInGenClosure* weak_roots = only_strong_roots ? NULL : root_closure;
+ CLDClosure* weak_cld_closure = only_strong_roots ? NULL : cld_closure;
+
+ process_roots(scope, so, root_closure, weak_roots, cld_closure, weak_cld_closure, &mark_code_closure);
+ if (!only_strong_roots) {
+ process_string_table_roots(scope, root_closure);
+ }
+
+ if (young_gen_as_roots &&
+ !_process_strong_tasks->is_task_claimed(GCH_PS_younger_gens)) {
+ root_closure->set_generation(young_gen());
+ young_gen()->oop_iterate(root_closure);
+ root_closure->reset_generation();
+ }
+
+ _process_strong_tasks->all_tasks_completed(scope->n_threads());
+}
+
+void CMSHeap::gc_prologue(bool full) {
+ always_do_update_barrier = false;
+ GenCollectedHeap::gc_prologue(full);
+};
+
+void CMSHeap::gc_epilogue(bool full) {
+ GenCollectedHeap::gc_epilogue(full);
+ always_do_update_barrier = true;
+};
diff --git a/src/hotspot/share/gc/cms/cmsHeap.hpp b/src/hotspot/share/gc/cms/cmsHeap.hpp
new file mode 100644
index 00000000000..82fbd4107a5
--- /dev/null
+++ b/src/hotspot/share/gc/cms/cmsHeap.hpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_CMS_CMSHEAP_HPP
+#define SHARE_VM_GC_CMS_CMSHEAP_HPP
+
+#include "gc/cms/concurrentMarkSweepGeneration.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "gc/shared/gcCause.hpp"
+#include "gc/shared/genCollectedHeap.hpp"
+
+class CLDClosure;
+class GenCollectorPolicy;
+class OopsInGenClosure;
+class outputStream;
+class StrongRootsScope;
+class ThreadClosure;
+class WorkGang;
+
+class CMSHeap : public GenCollectedHeap {
+public:
+ CMSHeap(GenCollectorPolicy *policy);
+
+ // Returns JNI_OK on success
+ virtual jint initialize();
+
+ virtual void check_gen_kinds();
+
+ // Convenience function to be used in situations where the heap type can be
+ // asserted to be this type.
+ static CMSHeap* heap();
+
+ virtual Name kind() const {
+ return CollectedHeap::CMSHeap;
+ }
+
+ virtual const char* name() const {
+ return "Concurrent Mark Sweep";
+ }
+
+ WorkGang* workers() const { return _workers; }
+
+ virtual void print_gc_threads_on(outputStream* st) const;
+ virtual void gc_threads_do(ThreadClosure* tc) const;
+ virtual void print_on_error(outputStream* st) const;
+
+ // Perform a full collection of the heap; intended for use in implementing
+ // "System.gc". This implies as full a collection as the CollectedHeap
+ // supports. Caller does not hold the Heap_lock on entry.
+ void collect(GCCause::Cause cause);
+
+ bool is_in_closed_subset(const void* p) const {
+ return is_in_reserved(p);
+ }
+
+ bool card_mark_must_follow_store() const {
+ return true;
+ }
+
+ void stop();
+ void safepoint_synchronize_begin();
+ void safepoint_synchronize_end();
+
+ // If "young_gen_as_roots" is false, younger generations are
+ // not scanned as roots; in this case, the caller must be arranging to
+ // scan the younger generations itself. (For example, a generation might
+ // explicitly mark reachable objects in younger generations, to avoid
+ // excess storage retention.)
+ void cms_process_roots(StrongRootsScope* scope,
+ bool young_gen_as_roots,
+ ScanningOption so,
+ bool only_strong_roots,
+ OopsInGenClosure* root_closure,
+ CLDClosure* cld_closure);
+
+private:
+ WorkGang* _workers;
+
+ virtual void gc_prologue(bool full);
+ virtual void gc_epilogue(bool full);
+
+ // Accessor for memory state verification support
+ NOT_PRODUCT(
+ virtual size_t skip_header_HeapWords() { return CMSCollector::skip_header_HeapWords(); }
+ )
+
+ // Returns success or failure.
+ bool create_cms_collector();
+
+ // In support of ExplicitGCInvokesConcurrent functionality
+ bool should_do_concurrent_full_gc(GCCause::Cause cause);
+
+ void collect_mostly_concurrent(GCCause::Cause cause);
+};
+
+#endif // SHARE_VM_GC_CMS_CMSHEAP_HPP
diff --git a/src/hotspot/share/gc/cms/cmsOopClosures.hpp b/src/hotspot/share/gc/cms/cmsOopClosures.hpp
index 11416afebc9..ab29b0136cd 100644
--- a/src/hotspot/share/gc/cms/cmsOopClosures.hpp
+++ b/src/hotspot/share/gc/cms/cmsOopClosures.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -48,12 +48,7 @@ class ParMarkFromRootsClosure;
// because some CMS OopClosures derive from OopsInGenClosure. It would be
// good to get rid of them completely.
class MetadataAwareOopsInGenClosure: public OopsInGenClosure {
- KlassToOopClosure _klass_closure;
public:
- MetadataAwareOopsInGenClosure() {
- _klass_closure.initialize(this);
- }
-
virtual bool do_metadata() { return do_metadata_nv(); }
inline bool do_metadata_nv() { return true; }
diff --git a/src/hotspot/share/gc/cms/cmsOopClosures.inline.hpp b/src/hotspot/share/gc/cms/cmsOopClosures.inline.hpp
index 36a6e841cfc..ad01a9d45d7 100644
--- a/src/hotspot/share/gc/cms/cmsOopClosures.inline.hpp
+++ b/src/hotspot/share/gc/cms/cmsOopClosures.inline.hpp
@@ -40,10 +40,8 @@ inline void MetadataAwareOopsInGenClosure::do_klass_nv(Klass* k) {
inline void MetadataAwareOopsInGenClosure::do_klass(Klass* k) { do_klass_nv(k); }
inline void MetadataAwareOopsInGenClosure::do_cld_nv(ClassLoaderData* cld) {
- assert(_klass_closure._oop_closure == this, "Must be");
-
bool claim = true; // Must claim the class loader data before processing.
- cld->oops_do(_klass_closure._oop_closure, &_klass_closure, claim);
+ cld->oops_do(this, claim);
}
// Decode the oop and call do_oop on it.
diff --git a/src/hotspot/share/gc/cms/compactibleFreeListSpace.cpp b/src/hotspot/share/gc/cms/compactibleFreeListSpace.cpp
index 3653c4df24f..34116c60433 100644
--- a/src/hotspot/share/gc/cms/compactibleFreeListSpace.cpp
+++ b/src/hotspot/share/gc/cms/compactibleFreeListSpace.cpp
@@ -23,13 +23,13 @@
*/
#include "precompiled.hpp"
+#include "gc/cms/cmsHeap.hpp"
#include "gc/cms/cmsLockVerifier.hpp"
#include "gc/cms/compactibleFreeListSpace.hpp"
#include "gc/cms/concurrentMarkSweepGeneration.inline.hpp"
#include "gc/cms/concurrentMarkSweepThread.hpp"
#include "gc/shared/blockOffsetTable.inline.hpp"
#include "gc/shared/collectedHeap.inline.hpp"
-#include "gc/shared/genCollectedHeap.hpp"
#include "gc/shared/space.inline.hpp"
#include "gc/shared/spaceDecorator.hpp"
#include "logging/log.hpp"
@@ -154,7 +154,7 @@ HeapWord* CompactibleFreeListSpace::forward(oop q, size_t size,
cp->space->set_compaction_top(compact_top);
cp->space = cp->space->next_compaction_space();
if (cp->space == NULL) {
- cp->gen = GenCollectedHeap::heap()->young_gen();
+ cp->gen = CMSHeap::heap()->young_gen();
assert(cp->gen != NULL, "compaction must succeed");
cp->space = cp->gen->first_compaction_space();
assert(cp->space != NULL, "generation must have a first compaction space");
@@ -2298,7 +2298,7 @@ void CompactibleFreeListSpace::verify() const {
// Iterate over all oops in the heap. Uses the _no_header version
// since we are not interested in following the klass pointers.
- GenCollectedHeap::heap()->oop_iterate_no_header(&cl);
+ CMSHeap::heap()->oop_iterate_no_header(&cl);
}
if (VerifyObjectStartArray) {
diff --git a/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp b/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp
index 22d5030ba72..0901117f646 100644
--- a/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp
+++ b/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp
@@ -29,6 +29,7 @@
#include "classfile/systemDictionary.hpp"
#include "code/codeCache.hpp"
#include "gc/cms/cmsCollectorPolicy.hpp"
+#include "gc/cms/cmsHeap.hpp"
#include "gc/cms/cmsOopClosures.inline.hpp"
#include "gc/cms/compactibleFreeListSpace.hpp"
#include "gc/cms/concurrentMarkSweepGeneration.inline.hpp"
@@ -54,6 +55,7 @@
#include "gc/shared/referencePolicy.hpp"
#include "gc/shared/strongRootsScope.hpp"
#include "gc/shared/taskqueue.inline.hpp"
+#include "gc/shared/weakProcessor.hpp"
#include "logging/log.hpp"
#include "logging/logStream.hpp"
#include "memory/allocation.hpp"
@@ -298,14 +300,14 @@ void CMSCollector::ref_processor_init() {
}
AdaptiveSizePolicy* CMSCollector::size_policy() {
- GenCollectedHeap* gch = GenCollectedHeap::heap();
- return gch->gen_policy()->size_policy();
+ CMSHeap* heap = CMSHeap::heap();
+ return heap->gen_policy()->size_policy();
}
void ConcurrentMarkSweepGeneration::initialize_performance_counters() {
const char* gen_name = "old";
- GenCollectorPolicy* gcp = GenCollectedHeap::heap()->gen_policy();
+ GenCollectorPolicy* gcp = CMSHeap::heap()->gen_policy();
// Generation Counters - generation 1, 1 subspace
_gen_counters = new GenerationCounters(gen_name, 1, 1,
gcp->min_old_size(), gcp->max_old_size(), &_virtual_space);
@@ -354,8 +356,8 @@ void CMSStats::adjust_cms_free_adjustment_factor(bool fail, size_t free) {
// young generation collection.
double CMSStats::time_until_cms_gen_full() const {
size_t cms_free = _cms_gen->cmsSpace()->free();
- GenCollectedHeap* gch = GenCollectedHeap::heap();
- size_t expected_promotion = MIN2(gch->young_gen()->capacity(),
+ CMSHeap* heap = CMSHeap::heap();
+ size_t expected_promotion = MIN2(heap->young_gen()->capacity(),
(size_t) _cms_gen->gc_stats()->avg_promoted()->padded_average());
if (cms_free > expected_promotion) {
// Start a cms collection if there isn't enough space to promote
@@ -595,12 +597,12 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
assert(CGC_lock != NULL, "Where's the CGC_lock?");
// Support for parallelizing young gen rescan
- GenCollectedHeap* gch = GenCollectedHeap::heap();
- assert(gch->young_gen()->kind() == Generation::ParNew, "CMS can only be used with ParNew");
- _young_gen = (ParNewGeneration*)gch->young_gen();
- if (gch->supports_inline_contig_alloc()) {
- _top_addr = gch->top_addr();
- _end_addr = gch->end_addr();
+ CMSHeap* heap = CMSHeap::heap();
+ assert(heap->young_gen()->kind() == Generation::ParNew, "CMS can only be used with ParNew");
+ _young_gen = (ParNewGeneration*)heap->young_gen();
+ if (heap->supports_inline_contig_alloc()) {
+ _top_addr = heap->top_addr();
+ _end_addr = heap->end_addr();
assert(_young_gen != NULL, "no _young_gen");
_eden_chunk_index = 0;
_eden_chunk_capacity = (_young_gen->max_capacity() + CMSSamplingGrain) / CMSSamplingGrain;
@@ -762,9 +764,9 @@ void ConcurrentMarkSweepGeneration::compute_new_size_free_list() {
log.trace(" Maximum free fraction %f", maximum_free_percentage);
log.trace(" Capacity " SIZE_FORMAT, capacity() / 1000);
log.trace(" Desired capacity " SIZE_FORMAT, desired_capacity / 1000);
- GenCollectedHeap* gch = GenCollectedHeap::heap();
- assert(gch->is_old_gen(this), "The CMS generation should always be the old generation");
- size_t young_size = gch->young_gen()->capacity();
+ CMSHeap* heap = CMSHeap::heap();
+ assert(heap->is_old_gen(this), "The CMS generation should always be the old generation");
+ size_t young_size = heap->young_gen()->capacity();
log.trace(" Young gen size " SIZE_FORMAT, young_size / 1000);
log.trace(" unsafe_max_alloc_nogc " SIZE_FORMAT, unsafe_max_alloc_nogc() / 1000);
log.trace(" contiguous available " SIZE_FORMAT, contiguous_available() / 1000);
@@ -923,7 +925,7 @@ oop ConcurrentMarkSweepGeneration::promote(oop obj, size_t obj_size) {
assert_lock_strong(freelistLock());
#ifndef PRODUCT
- if (GenCollectedHeap::heap()->promotion_should_fail()) {
+ if (CMSHeap::heap()->promotion_should_fail()) {
return NULL;
}
#endif // #ifndef PRODUCT
@@ -1000,7 +1002,7 @@ ConcurrentMarkSweepGeneration::par_promote(int thread_num,
oop old, markOop m,
size_t word_sz) {
#ifndef PRODUCT
- if (GenCollectedHeap::heap()->promotion_should_fail()) {
+ if (CMSHeap::heap()->promotion_should_fail()) {
return NULL;
}
#endif // #ifndef PRODUCT
@@ -1075,8 +1077,8 @@ ConcurrentMarkSweepGeneration::par_promote(int thread_num,
obj_ptr, old->is_objArray(), word_sz);
NOT_PRODUCT(
- Atomic::inc_ptr(&_numObjectsPromoted);
- Atomic::add_ptr(alloc_sz, &_numWordsPromoted);
+ Atomic::inc(&_numObjectsPromoted);
+ Atomic::add(alloc_sz, &_numWordsPromoted);
)
return obj;
@@ -1179,10 +1181,10 @@ bool CMSCollector::shouldConcurrentCollect() {
// We start a collection if we believe an incremental collection may fail;
// this is not likely to be productive in practice because it's probably too
// late anyway.
- GenCollectedHeap* gch = GenCollectedHeap::heap();
- assert(gch->collector_policy()->is_generation_policy(),
+ CMSHeap* heap = CMSHeap::heap();
+ assert(heap->collector_policy()->is_generation_policy(),
"You may want to check the correctness of the following");
- if (gch->incremental_collection_will_fail(true /* consult_young */)) {
+ if (heap->incremental_collection_will_fail(true /* consult_young */)) {
log.print("CMSCollector: collect because incremental collection will fail ");
return true;
}
@@ -1294,8 +1296,8 @@ void CMSCollector::collect(bool full,
}
void CMSCollector::request_full_gc(unsigned int full_gc_count, GCCause::Cause cause) {
- GenCollectedHeap* gch = GenCollectedHeap::heap();
- unsigned int gc_count = gch->total_full_collections();
+ CMSHeap* heap = CMSHeap::heap();
+ unsigned int gc_count = heap->total_full_collections();
if (gc_count == full_gc_count) {
MutexLockerEx y(CGC_lock, Mutex::_no_safepoint_check_flag);
_full_gc_requested = true;
@@ -1307,7 +1309,7 @@ void CMSCollector::request_full_gc(unsigned int full_gc_count, GCCause::Cause ca
}
bool CMSCollector::is_external_interruption() {
- GCCause::Cause cause = GenCollectedHeap::heap()->gc_cause();
+ GCCause::Cause cause = CMSHeap::heap()->gc_cause();
return GCCause::is_user_requested_gc(cause) ||
GCCause::is_serviceability_requested_gc(cause);
}
@@ -1456,8 +1458,8 @@ void CMSCollector::acquire_control_and_collect(bool full,
// Inform cms gen if this was due to partial collection failing.
// The CMS gen may use this fact to determine its expansion policy.
- GenCollectedHeap* gch = GenCollectedHeap::heap();
- if (gch->incremental_collection_will_fail(false /* don't consult_young */)) {
+ CMSHeap* heap = CMSHeap::heap();
+ if (heap->incremental_collection_will_fail(false /* don't consult_young */)) {
assert(!_cmsGen->incremental_collection_failed(),
"Should have been noticed, reacted to and cleared");
_cmsGen->set_incremental_collection_failed();
@@ -1489,14 +1491,14 @@ void CMSCollector::acquire_control_and_collect(bool full,
// Has the GC time limit been exceeded?
size_t max_eden_size = _young_gen->max_eden_size();
- GCCause::Cause gc_cause = gch->gc_cause();
+ GCCause::Cause gc_cause = heap->gc_cause();
size_policy()->check_gc_overhead_limit(_young_gen->used(),
_young_gen->eden()->used(),
_cmsGen->max_capacity(),
max_eden_size,
full,
gc_cause,
- gch->collector_policy());
+ heap->collector_policy());
// Reset the expansion cause, now that we just completed
// a collection cycle.
@@ -1518,21 +1520,21 @@ void CMSCollector::compute_new_size() {
// A work method used by the foreground collector to do
// a mark-sweep-compact.
void CMSCollector::do_compaction_work(bool clear_all_soft_refs) {
- GenCollectedHeap* gch = GenCollectedHeap::heap();
+ CMSHeap* heap = CMSHeap::heap();
STWGCTimer* gc_timer = GenMarkSweep::gc_timer();
gc_timer->register_gc_start();
SerialOldTracer* gc_tracer = GenMarkSweep::gc_tracer();
- gc_tracer->report_gc_start(gch->gc_cause(), gc_timer->gc_start());
+ gc_tracer->report_gc_start(heap->gc_cause(), gc_timer->gc_start());
- gch->pre_full_gc_dump(gc_timer);
+ heap->pre_full_gc_dump(gc_timer);
GCTraceTime(Trace, gc, phases) t("CMS:MSC");
// Temporarily widen the span of the weak reference processing to
// the entire heap.
- MemRegion new_span(GenCollectedHeap::heap()->reserved_region());
+ MemRegion new_span(CMSHeap::heap()->reserved_region());
ReferenceProcessorSpanMutator rp_mut_span(ref_processor(), new_span);
// Temporarily, clear the "is_alive_non_header" field of the
// reference processor.
@@ -1553,9 +1555,10 @@ void CMSCollector::do_compaction_work(bool clear_all_soft_refs) {
assert(_collectorState != Idling || _modUnionTable.isAllClear(),
"_modUnionTable should be clear if the baton was not passed");
_modUnionTable.clear_all();
- assert(_collectorState != Idling || _ct->klass_rem_set()->mod_union_is_clear(),
+ assert(_collectorState != Idling || _ct->cld_rem_set()->mod_union_is_clear(),
"mod union for klasses should be clear if the baton was passed");
- _ct->klass_rem_set()->clear_mod_union();
+ _ct->cld_rem_set()->clear_mod_union();
+
// We must adjust the allocation statistics being maintained
// in the free list space. We do so by reading and clearing
@@ -1607,7 +1610,7 @@ void CMSCollector::do_compaction_work(bool clear_all_soft_refs) {
// No longer a need to do a concurrent collection for Metaspace.
MetaspaceGC::set_should_concurrent_collect(false);
- gch->post_full_gc_dump(gc_timer);
+ heap->post_full_gc_dump(gc_timer);
gc_timer->register_gc_end();
@@ -1701,7 +1704,7 @@ void CMSCollector::collect_in_background(GCCause::Cause cause) {
assert(Thread::current()->is_ConcurrentGC_thread(),
"A CMS asynchronous collection is only allowed on a CMS thread.");
- GenCollectedHeap* gch = GenCollectedHeap::heap();
+ CMSHeap* heap = CMSHeap::heap();
{
bool safepoint_check = Mutex::_no_safepoint_check_flag;
MutexLockerEx hl(Heap_lock, safepoint_check);
@@ -1730,8 +1733,8 @@ void CMSCollector::collect_in_background(GCCause::Cause cause) {
_full_gc_requested = false; // acks all outstanding full gc requests
_full_gc_cause = GCCause::_no_gc;
// Signal that we are about to start a collection
- gch->increment_total_full_collections(); // ... starting a collection cycle
- _collection_count_start = gch->total_full_collections();
+ heap->increment_total_full_collections(); // ... starting a collection cycle
+ _collection_count_start = heap->total_full_collections();
}
size_t prev_used = _cmsGen->used();
@@ -1924,9 +1927,9 @@ void CMSCollector::register_gc_end() {
}
void CMSCollector::save_heap_summary() {
- GenCollectedHeap* gch = GenCollectedHeap::heap();
- _last_heap_summary = gch->create_heap_summary();
- _last_metaspace_summary = gch->create_metaspace_summary();
+ CMSHeap* heap = CMSHeap::heap();
+ _last_heap_summary = heap->create_heap_summary();
+ _last_metaspace_summary = heap->create_metaspace_summary();
}
void CMSCollector::report_heap_summary(GCWhen::Type when) {
@@ -2025,7 +2028,7 @@ void CMSCollector::gc_prologue(bool full) {
// that information. Tell the young collection to save the union of all
// modified klasses.
if (duringMarking) {
- _ct->klass_rem_set()->set_accumulate_modified_oops(true);
+ _ct->cld_rem_set()->set_accumulate_modified_oops(true);
}
bool registerClosure = duringMarking;
@@ -2101,7 +2104,7 @@ void CMSCollector::gc_epilogue(bool full) {
assert(haveFreelistLocks(), "must have freelist locks");
assert_lock_strong(bitMapLock());
- _ct->klass_rem_set()->set_accumulate_modified_oops(false);
+ _ct->cld_rem_set()->set_accumulate_modified_oops(false);
_cmsGen->gc_epilogue_work(full);
@@ -2302,10 +2305,10 @@ bool CMSCollector::verify_after_remark() {
assert(verification_mark_stack()->isEmpty(), "markStack should be empty");
verify_work_stacks_empty();
- GenCollectedHeap* gch = GenCollectedHeap::heap();
- gch->ensure_parsability(false); // fill TLABs, but no need to retire them
+ CMSHeap* heap = CMSHeap::heap();
+ heap->ensure_parsability(false); // fill TLABs, but no need to retire them
// Update the saved marks which may affect the root scans.
- gch->save_marks();
+ heap->save_marks();
if (CMSRemarkVerifyVariant == 1) {
// In this first variant of verification, we complete
@@ -2328,19 +2331,19 @@ bool CMSCollector::verify_after_remark() {
void CMSCollector::verify_after_remark_work_1() {
ResourceMark rm;
HandleMark hm;
- GenCollectedHeap* gch = GenCollectedHeap::heap();
+ CMSHeap* heap = CMSHeap::heap();
// Get a clear set of claim bits for the roots processing to work with.
ClassLoaderDataGraph::clear_claimed_marks();
// Mark from roots one level into CMS
MarkRefsIntoClosure notOlder(_span, verification_mark_bm());
- gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
+ heap->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
{
StrongRootsScope srs(1);
- gch->cms_process_roots(&srs,
+ heap->cms_process_roots(&srs,
true, // young gen as roots
GenCollectedHeap::ScanningOption(roots_scanning_options()),
should_unload_classes(),
@@ -2375,30 +2378,30 @@ void CMSCollector::verify_after_remark_work_1() {
log.error("Failed marking verification after remark");
ResourceMark rm;
LogStream ls(log.error());
- gch->print_on(&ls);
+ heap->print_on(&ls);
fatal("CMS: failed marking verification after remark");
}
}
-class VerifyKlassOopsKlassClosure : public KlassClosure {
- class VerifyKlassOopsClosure : public OopClosure {
+class VerifyCLDOopsCLDClosure : public CLDClosure {
+ class VerifyCLDOopsClosure : public OopClosure {
CMSBitMap* _bitmap;
public:
- VerifyKlassOopsClosure(CMSBitMap* bitmap) : _bitmap(bitmap) { }
+ VerifyCLDOopsClosure(CMSBitMap* bitmap) : _bitmap(bitmap) { }
void do_oop(oop* p) { guarantee(*p == NULL || _bitmap->isMarked((HeapWord*) *p), "Should be marked"); }
void do_oop(narrowOop* p) { ShouldNotReachHere(); }
} _oop_closure;
public:
- VerifyKlassOopsKlassClosure(CMSBitMap* bitmap) : _oop_closure(bitmap) {}
- void do_klass(Klass* k) {
- k->oops_do(&_oop_closure);
+ VerifyCLDOopsCLDClosure(CMSBitMap* bitmap) : _oop_closure(bitmap) {}
+ void do_cld(ClassLoaderData* cld) {
+ cld->oops_do(&_oop_closure, false, false);
}
};
void CMSCollector::verify_after_remark_work_2() {
ResourceMark rm;
HandleMark hm;
- GenCollectedHeap* gch = GenCollectedHeap::heap();
+ CMSHeap* heap = CMSHeap::heap();
// Get a clear set of claim bits for the roots processing to work with.
ClassLoaderDataGraph::clear_claimed_marks();
@@ -2408,12 +2411,12 @@ void CMSCollector::verify_after_remark_work_2() {
markBitMap());
CLDToOopClosure cld_closure(¬Older, true);
- gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
+ heap->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
{
StrongRootsScope srs(1);
- gch->cms_process_roots(&srs,
+ heap->cms_process_roots(&srs,
true, // young gen as roots
GenCollectedHeap::ScanningOption(roots_scanning_options()),
should_unload_classes(),
@@ -2437,8 +2440,8 @@ void CMSCollector::verify_after_remark_work_2() {
assert(verification_mark_stack()->isEmpty(), "Should have been drained");
verify_work_stacks_empty();
- VerifyKlassOopsKlassClosure verify_klass_oops(verification_mark_bm());
- ClassLoaderDataGraph::classes_do(&verify_klass_oops);
+ VerifyCLDOopsCLDClosure verify_cld_oops(verification_mark_bm());
+ ClassLoaderDataGraph::cld_do(&verify_cld_oops);
// Marking completed -- now verify that each bit marked in
// verification_mark_bm() is also marked in markBitMap(); flag all
@@ -2802,7 +2805,7 @@ class CMSParInitialMarkTask: public CMSParMarkTask {
void CMSCollector::checkpointRootsInitial() {
assert(_collectorState == InitialMarking, "Wrong collector state");
check_correct_thread_executing();
- TraceCMSMemoryManagerStats tms(_collectorState,GenCollectedHeap::heap()->gc_cause());
+ TraceCMSMemoryManagerStats tms(_collectorState, CMSHeap::heap()->gc_cause());
save_heap_summary();
report_heap_summary(GCWhen::BeforeGC);
@@ -2843,14 +2846,14 @@ void CMSCollector::checkpointRootsInitialWork() {
HandleMark hm;
MarkRefsIntoClosure notOlder(_span, &_markBitMap);
- GenCollectedHeap* gch = GenCollectedHeap::heap();
+ CMSHeap* heap = CMSHeap::heap();
verify_work_stacks_empty();
verify_overflow_empty();
- gch->ensure_parsability(false); // fill TLABs, but no need to retire them
+ heap->ensure_parsability(false); // fill TLABs, but no need to retire them
// Update the saved marks which may affect the root scans.
- gch->save_marks();
+ heap->save_marks();
// weak reference processing has not started yet.
ref_processor()->set_enqueuing_is_done(false);
@@ -2871,7 +2874,7 @@ void CMSCollector::checkpointRootsInitialWork() {
#endif
if (CMSParallelInitialMarkEnabled) {
// The parallel version.
- WorkGang* workers = gch->workers();
+ WorkGang* workers = heap->workers();
assert(workers != NULL, "Need parallel worker threads.");
uint n_workers = workers->active_workers();
@@ -2890,11 +2893,11 @@ void CMSCollector::checkpointRootsInitialWork() {
} else {
// The serial version.
CLDToOopClosure cld_closure(¬Older, true);
- gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
+ heap->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
StrongRootsScope srs(1);
- gch->cms_process_roots(&srs,
+ heap->cms_process_roots(&srs,
true, // young gen as roots
GenCollectedHeap::ScanningOption(roots_scanning_options()),
should_unload_classes(),
@@ -2911,7 +2914,7 @@ void CMSCollector::checkpointRootsInitialWork() {
" or no bits are set in the gc_prologue before the start of the next "
"subsequent marking phase.");
- assert(_ct->klass_rem_set()->mod_union_is_clear(), "Must be");
+ assert(_ct->cld_rem_set()->mod_union_is_clear(), "Must be");
// Save the end of the used_region of the constituent generations
// to be used to limit the extent of sweep in each generation.
@@ -3178,7 +3181,7 @@ void CMSConcMarkingTask::bump_global_finger(HeapWord* f) {
HeapWord* cur = read;
while (f > read) {
cur = read;
- read = (HeapWord*) Atomic::cmpxchg_ptr(f, &_global_finger, cur);
+ read = Atomic::cmpxchg(f, &_global_finger, cur);
if (cur == read) {
// our cas succeeded
assert(_global_finger >= f, "protocol consistency");
@@ -3799,7 +3802,7 @@ size_t CMSCollector::preclean_work(bool clean_refs, bool clean_survivor) {
bitMapLock());
startTimer();
unsigned int before_count =
- GenCollectedHeap::heap()->total_collections();
+ CMSHeap::heap()->total_collections();
SurvivorSpacePrecleanClosure
sss_cl(this, _span, &_markBitMap, &_markStack,
&pam_cl, before_count, CMSYield);
@@ -3848,7 +3851,7 @@ size_t CMSCollector::preclean_work(bool clean_refs, bool clean_survivor) {
}
}
- preclean_klasses(&mrias_cl, _cmsGen->freelistLock());
+ preclean_cld(&mrias_cl, _cmsGen->freelistLock());
curNumCards = preclean_card_table(_cmsGen, &smoac_cl);
cumNumCards += curNumCards;
@@ -4067,21 +4070,21 @@ size_t CMSCollector::preclean_card_table(ConcurrentMarkSweepGeneration* old_gen,
return cumNumDirtyCards;
}
-class PrecleanKlassClosure : public KlassClosure {
- KlassToOopClosure _cm_klass_closure;
+class PrecleanCLDClosure : public CLDClosure {
+ MetadataAwareOopsInGenClosure* _cm_closure;
public:
- PrecleanKlassClosure(OopClosure* oop_closure) : _cm_klass_closure(oop_closure) {}
- void do_klass(Klass* k) {
- if (k->has_accumulated_modified_oops()) {
- k->clear_accumulated_modified_oops();
+ PrecleanCLDClosure(MetadataAwareOopsInGenClosure* oop_closure) : _cm_closure(oop_closure) {}
+ void do_cld(ClassLoaderData* cld) {
+ if (cld->has_accumulated_modified_oops()) {
+ cld->clear_accumulated_modified_oops();
- _cm_klass_closure.do_klass(k);
+ _cm_closure->do_cld(cld);
}
}
};
// The freelist lock is needed to prevent asserts, is it really needed?
-void CMSCollector::preclean_klasses(MarkRefsIntoAndScanClosure* cl, Mutex* freelistLock) {
+void CMSCollector::preclean_cld(MarkRefsIntoAndScanClosure* cl, Mutex* freelistLock) {
cl->set_freelistLock(freelistLock);
@@ -4089,8 +4092,8 @@ void CMSCollector::preclean_klasses(MarkRefsIntoAndScanClosure* cl, Mutex* freel
// SSS: Add equivalent to ScanMarkedObjectsAgainCarefullyClosure::do_yield_check and should_abort_preclean?
// SSS: We should probably check if precleaning should be aborted, at suitable intervals?
- PrecleanKlassClosure preclean_klass_closure(cl);
- ClassLoaderDataGraph::classes_do(&preclean_klass_closure);
+ PrecleanCLDClosure preclean_closure(cl);
+ ClassLoaderDataGraph::cld_do(&preclean_closure);
verify_work_stacks_empty();
verify_overflow_empty();
@@ -4102,7 +4105,7 @@ void CMSCollector::checkpointRootsFinal() {
// world is stopped at this checkpoint
assert(SafepointSynchronize::is_at_safepoint(),
"world should be stopped");
- TraceCMSMemoryManagerStats tms(_collectorState,GenCollectedHeap::heap()->gc_cause());
+ TraceCMSMemoryManagerStats tms(_collectorState, CMSHeap::heap()->gc_cause());
verify_work_stacks_empty();
verify_overflow_empty();
@@ -4111,16 +4114,16 @@ void CMSCollector::checkpointRootsFinal() {
_young_gen->used() / K, _young_gen->capacity() / K);
{
if (CMSScavengeBeforeRemark) {
- GenCollectedHeap* gch = GenCollectedHeap::heap();
+ CMSHeap* heap = CMSHeap::heap();
// Temporarily set flag to false, GCH->do_collection will
// expect it to be false and set to true
- FlagSetting fl(gch->_is_gc_active, false);
+ FlagSetting fl(heap->_is_gc_active, false);
- gch->do_collection(true, // full (i.e. force, see below)
- false, // !clear_all_soft_refs
- 0, // size
- false, // is_tlab
- GenCollectedHeap::YoungGen // type
+ heap->do_collection(true, // full (i.e. force, see below)
+ false, // !clear_all_soft_refs
+ 0, // size
+ false, // is_tlab
+ GenCollectedHeap::YoungGen // type
);
}
FreelistLocker x(this);
@@ -4141,7 +4144,7 @@ void CMSCollector::checkpointRootsFinalWork() {
ResourceMark rm;
HandleMark hm;
- GenCollectedHeap* gch = GenCollectedHeap::heap();
+ CMSHeap* heap = CMSHeap::heap();
if (should_unload_classes()) {
CodeCache::gc_prologue();
@@ -4161,9 +4164,9 @@ void CMSCollector::checkpointRootsFinalWork() {
// or of an indication of whether the scavenge did indeed occur,
// we cannot rely on TLAB's having been filled and must do
// so here just in case a scavenge did not happen.
- gch->ensure_parsability(false); // fill TLAB's, but no need to retire them
+ heap->ensure_parsability(false); // fill TLAB's, but no need to retire them
// Update the saved marks which may affect the root scans.
- gch->save_marks();
+ heap->save_marks();
print_eden_and_survivor_chunk_arrays();
@@ -4239,7 +4242,7 @@ void CMSCollector::checkpointRootsFinalWork() {
_markStack._failed_double = 0;
if ((VerifyAfterGC || VerifyDuringGC) &&
- GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
+ CMSHeap::heap()->total_collections() >= VerifyGCStartAt) {
verify_after_remark();
}
@@ -4250,7 +4253,7 @@ void CMSCollector::checkpointRootsFinalWork() {
// Call isAllClear() under bitMapLock
assert(_modUnionTable.isAllClear(),
"Should be clear by end of the final marking");
- assert(_ct->klass_rem_set()->mod_union_is_clear(),
+ assert(_ct->cld_rem_set()->mod_union_is_clear(),
"Should be clear by end of the final marking");
}
@@ -4261,7 +4264,7 @@ void CMSParInitialMarkTask::work(uint worker_id) {
// ---------- scan from roots --------------
_timer.start();
- GenCollectedHeap* gch = GenCollectedHeap::heap();
+ CMSHeap* heap = CMSHeap::heap();
ParMarkRefsIntoClosure par_mri_cl(_collector->_span, &(_collector->_markBitMap));
// ---------- young gen roots --------------
@@ -4277,12 +4280,12 @@ void CMSParInitialMarkTask::work(uint worker_id) {
CLDToOopClosure cld_closure(&par_mri_cl, true);
- gch->cms_process_roots(_strong_roots_scope,
- false, // yg was scanned above
- GenCollectedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
- _collector->should_unload_classes(),
- &par_mri_cl,
- &cld_closure);
+ heap->cms_process_roots(_strong_roots_scope,
+ false, // yg was scanned above
+ GenCollectedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
+ _collector->should_unload_classes(),
+ &par_mri_cl,
+ &cld_closure);
assert(_collector->should_unload_classes()
|| (_collector->CMSCollector::roots_scanning_options() & GenCollectedHeap::SO_AllCodeCache),
"if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
@@ -4332,26 +4335,26 @@ class CMSParRemarkTask: public CMSParMarkTask {
void do_work_steal(int i, ParMarkRefsIntoAndScanClosure* cl, int* seed);
};
-class RemarkKlassClosure : public KlassClosure {
- KlassToOopClosure _cm_klass_closure;
+class RemarkCLDClosure : public CLDClosure {
+ CLDToOopClosure _cm_closure;
public:
- RemarkKlassClosure(OopClosure* oop_closure) : _cm_klass_closure(oop_closure) {}
- void do_klass(Klass* k) {
- // Check if we have modified any oops in the Klass during the concurrent marking.
- if (k->has_accumulated_modified_oops()) {
- k->clear_accumulated_modified_oops();
+ RemarkCLDClosure(OopClosure* oop_closure) : _cm_closure(oop_closure) {}
+ void do_cld(ClassLoaderData* cld) {
+ // Check if we have modified any oops in the CLD during the concurrent marking.
+ if (cld->has_accumulated_modified_oops()) {
+ cld->clear_accumulated_modified_oops();
// We could have transfered the current modified marks to the accumulated marks,
// like we do with the Card Table to Mod Union Table. But it's not really necessary.
- } else if (k->has_modified_oops()) {
+ } else if (cld->has_modified_oops()) {
// Don't clear anything, this info is needed by the next young collection.
} else {
- // No modified oops in the Klass.
+ // No modified oops in the ClassLoaderData.
return;
}
// The klass has modified fields, need to scan the klass.
- _cm_klass_closure.do_klass(k);
+ _cm_closure.do_cld(cld);
}
};
@@ -4386,7 +4389,7 @@ void CMSParRemarkTask::work(uint worker_id) {
// ---------- rescan from roots --------------
_timer.start();
- GenCollectedHeap* gch = GenCollectedHeap::heap();
+ CMSHeap* heap = CMSHeap::heap();
ParMarkRefsIntoAndScanClosure par_mrias_cl(_collector,
_collector->_span, _collector->ref_processor(),
&(_collector->_markBitMap),
@@ -4406,12 +4409,12 @@ void CMSParRemarkTask::work(uint worker_id) {
// ---------- remaining roots --------------
_timer.reset();
_timer.start();
- gch->cms_process_roots(_strong_roots_scope,
- false, // yg was scanned above
- GenCollectedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
- _collector->should_unload_classes(),
- &par_mrias_cl,
- NULL); // The dirty klasses will be handled below
+ heap->cms_process_roots(_strong_roots_scope,
+ false, // yg was scanned above
+ GenCollectedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
+ _collector->should_unload_classes(),
+ &par_mrias_cl,
+ NULL); // The dirty klasses will be handled below
assert(_collector->should_unload_classes()
|| (_collector->CMSCollector::roots_scanning_options() & GenCollectedHeap::SO_AllCodeCache),
@@ -4439,24 +4442,24 @@ void CMSParRemarkTask::work(uint worker_id) {
log_trace(gc, task)("Finished unhandled CLD scanning work in %dth thread: %3.3f sec", worker_id, _timer.seconds());
}
- // ---------- dirty klass scanning ----------
+ // We might have added oops to ClassLoaderData::_handles during the
+ // concurrent marking phase. These oops do not always point to newly allocated objects
+ // that are guaranteed to be kept alive. Hence,
+ // we do have to revisit the _handles block during the remark phase.
+
+ // ---------- dirty CLD scanning ----------
if (worker_id == 0) { // Single threaded at the moment.
_timer.reset();
_timer.start();
// Scan all classes that was dirtied during the concurrent marking phase.
- RemarkKlassClosure remark_klass_closure(&par_mrias_cl);
- ClassLoaderDataGraph::classes_do(&remark_klass_closure);
+ RemarkCLDClosure remark_closure(&par_mrias_cl);
+ ClassLoaderDataGraph::cld_do(&remark_closure);
_timer.stop();
- log_trace(gc, task)("Finished dirty klass scanning work in %dth thread: %3.3f sec", worker_id, _timer.seconds());
+ log_trace(gc, task)("Finished dirty CLD scanning work in %dth thread: %3.3f sec", worker_id, _timer.seconds());
}
- // We might have added oops to ClassLoaderData::_handles during the
- // concurrent marking phase. These oops point to newly allocated objects
- // that are guaranteed to be kept alive. Either by the direct allocation
- // code, or when the young collector processes the roots. Hence,
- // we don't have to revisit the _handles block during the remark phase.
// ---------- rescan dirty cards ------------
_timer.reset();
@@ -4838,8 +4841,8 @@ initialize_sequential_subtasks_for_young_gen_rescan(int n_threads) {
// Parallel version of remark
void CMSCollector::do_remark_parallel() {
- GenCollectedHeap* gch = GenCollectedHeap::heap();
- WorkGang* workers = gch->workers();
+ CMSHeap* heap = CMSHeap::heap();
+ WorkGang* workers = heap->workers();
assert(workers != NULL, "Need parallel worker threads.");
// Choose to use the number of GC workers most recently set
// into "active_workers".
@@ -4855,7 +4858,7 @@ void CMSCollector::do_remark_parallel() {
// the younger_gen cards, so we shouldn't call the following else
// the verification code as well as subsequent younger_refs_iterate
// code would get confused. XXX
- // gch->rem_set()->prepare_for_younger_refs_iterate(true); // parallel
+ // heap->rem_set()->prepare_for_younger_refs_iterate(true); // parallel
// The young gen rescan work will not be done as part of
// process_roots (which currently doesn't know how to
@@ -4897,7 +4900,7 @@ void CMSCollector::do_remark_parallel() {
void CMSCollector::do_remark_non_parallel() {
ResourceMark rm;
HandleMark hm;
- GenCollectedHeap* gch = GenCollectedHeap::heap();
+ CMSHeap* heap = CMSHeap::heap();
ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), false);
MarkRefsIntoAndScanClosure
@@ -4938,7 +4941,7 @@ void CMSCollector::do_remark_non_parallel() {
}
}
if (VerifyDuringGC &&
- GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
+ CMSHeap::heap()->total_collections() >= VerifyGCStartAt) {
HandleMark hm; // Discard invalid handles created during verification
Universe::verify();
}
@@ -4947,15 +4950,15 @@ void CMSCollector::do_remark_non_parallel() {
verify_work_stacks_empty();
- gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
+ heap->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
StrongRootsScope srs(1);
- gch->cms_process_roots(&srs,
- true, // young gen as roots
- GenCollectedHeap::ScanningOption(roots_scanning_options()),
- should_unload_classes(),
- &mrias_cl,
- NULL); // The dirty klasses will be handled below
+ heap->cms_process_roots(&srs,
+ true, // young gen as roots
+ GenCollectedHeap::ScanningOption(roots_scanning_options()),
+ should_unload_classes(),
+ &mrias_cl,
+ NULL); // The dirty klasses will be handled below
assert(should_unload_classes()
|| (roots_scanning_options() & GenCollectedHeap::SO_AllCodeCache),
@@ -4981,23 +4984,21 @@ void CMSCollector::do_remark_non_parallel() {
verify_work_stacks_empty();
}
+ // We might have added oops to ClassLoaderData::_handles during the
+ // concurrent marking phase. These oops do not point to newly allocated objects
+ // that are guaranteed to be kept alive. Hence,
+ // we do have to revisit the _handles block during the remark phase.
{
- GCTraceTime(Trace, gc, phases) t("Dirty Klass Scan", _gc_timer_cm);
+ GCTraceTime(Trace, gc, phases) t("Dirty CLD Scan", _gc_timer_cm);
verify_work_stacks_empty();
- RemarkKlassClosure remark_klass_closure(&mrias_cl);
- ClassLoaderDataGraph::classes_do(&remark_klass_closure);
+ RemarkCLDClosure remark_closure(&mrias_cl);
+ ClassLoaderDataGraph::cld_do(&remark_closure);
verify_work_stacks_empty();
}
- // We might have added oops to ClassLoaderData::_handles during the
- // concurrent marking phase. These oops point to newly allocated objects
- // that are guaranteed to be kept alive. Either by the direct allocation
- // code, or when the young collector processes the roots. Hence,
- // we don't have to revisit the _handles block during the remark phase.
-
verify_work_stacks_empty();
// Restore evacuated mark words, if any, used for overflow list links
restore_preserved_marks_if_any();
@@ -5149,8 +5150,8 @@ void CMSRefProcTaskProxy::do_work_steal(int i,
void CMSRefProcTaskExecutor::execute(ProcessTask& task)
{
- GenCollectedHeap* gch = GenCollectedHeap::heap();
- WorkGang* workers = gch->workers();
+ CMSHeap* heap = CMSHeap::heap();
+ WorkGang* workers = heap->workers();
assert(workers != NULL, "Need parallel worker threads.");
CMSRefProcTaskProxy rp_task(task, &_collector,
_collector.ref_processor()->span(),
@@ -5162,8 +5163,8 @@ void CMSRefProcTaskExecutor::execute(ProcessTask& task)
void CMSRefProcTaskExecutor::execute(EnqueueTask& task)
{
- GenCollectedHeap* gch = GenCollectedHeap::heap();
- WorkGang* workers = gch->workers();
+ CMSHeap* heap = CMSHeap::heap();
+ WorkGang* workers = heap->workers();
assert(workers != NULL, "Need parallel worker threads.");
CMSRefEnqueueTaskProxy enq_task(task);
workers->run_task(&enq_task);
@@ -5180,15 +5181,17 @@ void CMSCollector::refProcessingWork() {
rp->setup_policy(false);
verify_work_stacks_empty();
- CMSKeepAliveClosure cmsKeepAliveClosure(this, _span, &_markBitMap,
- &_markStack, false /* !preclean */);
- CMSDrainMarkingStackClosure cmsDrainMarkingStackClosure(this,
- _span, &_markBitMap, &_markStack,
- &cmsKeepAliveClosure, false /* !preclean */);
ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->num_q());
{
GCTraceTime(Debug, gc, phases) t("Reference Processing", _gc_timer_cm);
+ // Setup keep_alive and complete closures.
+ CMSKeepAliveClosure cmsKeepAliveClosure(this, _span, &_markBitMap,
+ &_markStack, false /* !preclean */);
+ CMSDrainMarkingStackClosure cmsDrainMarkingStackClosure(this,
+ _span, &_markBitMap, &_markStack,
+ &cmsKeepAliveClosure, false /* !preclean */);
+
ReferenceProcessorStats stats;
if (rp->processing_is_mt()) {
// Set the degree of MT here. If the discovery is done MT, there
@@ -5196,9 +5199,9 @@ void CMSCollector::refProcessingWork() {
// and a different number of discovered lists may have Ref objects.
// That is OK as long as the Reference lists are balanced (see
// balance_all_queues() and balance_queues()).
- GenCollectedHeap* gch = GenCollectedHeap::heap();
+ CMSHeap* heap = CMSHeap::heap();
uint active_workers = ParallelGCThreads;
- WorkGang* workers = gch->workers();
+ WorkGang* workers = heap->workers();
if (workers != NULL) {
active_workers = workers->active_workers();
// The expectation is that active_workers will have already
@@ -5227,6 +5230,11 @@ void CMSCollector::refProcessingWork() {
// This is the point where the entire marking should have completed.
verify_work_stacks_empty();
+ {
+ GCTraceTime(Debug, gc, phases) t("Weak Processing", _gc_timer_cm);
+ WeakProcessor::weak_oops_do(&_is_alive_closure, &do_nothing_cl);
+ }
+
if (should_unload_classes()) {
{
GCTraceTime(Debug, gc, phases) t("Class Unloading", _gc_timer_cm);
@@ -5306,7 +5314,7 @@ void CMSCollector::sweep() {
verify_work_stacks_empty();
verify_overflow_empty();
increment_sweep_count();
- TraceCMSMemoryManagerStats tms(_collectorState,GenCollectedHeap::heap()->gc_cause());
+ TraceCMSMemoryManagerStats tms(_collectorState, CMSHeap::heap()->gc_cause());
_inter_sweep_timer.stop();
_inter_sweep_estimate.sample(_inter_sweep_timer.seconds());
@@ -5379,9 +5387,9 @@ void CMSCollector::sweep() {
// this generation. If such a promotion may still fail,
// the flag will be set again when a young collection is
// attempted.
- GenCollectedHeap* gch = GenCollectedHeap::heap();
- gch->clear_incremental_collection_failed(); // Worth retrying as fresh space may have been freed up
- gch->update_full_collections_completed(_collection_count_start);
+ CMSHeap* heap = CMSHeap::heap();
+ heap->clear_incremental_collection_failed(); // Worth retrying as fresh space may have been freed up
+ heap->update_full_collections_completed(_collection_count_start);
}
// FIX ME!!! Looks like this belongs in CFLSpace, with
@@ -5416,7 +5424,7 @@ void ConcurrentMarkSweepGeneration::update_gc_stats(Generation* current_generati
bool full) {
// If the young generation has been collected, gather any statistics
// that are of interest at this point.
- bool current_is_young = GenCollectedHeap::heap()->is_young_gen(current_generation);
+ bool current_is_young = CMSHeap::heap()->is_young_gen(current_generation);
if (!full && current_is_young) {
// Gather statistics on the young generation collection.
collector()->stats().record_gc0_end(used());
@@ -6189,7 +6197,7 @@ size_t SurvivorSpacePrecleanClosure::do_object_careful(oop p) {
do_yield_check();
}
unsigned int after_count =
- GenCollectedHeap::heap()->total_collections();
+ CMSHeap::heap()->total_collections();
bool abort = (_before_count != after_count) ||
_collector->should_abort_preclean();
return abort ? 0 : size;
@@ -7853,7 +7861,7 @@ bool CMSCollector::par_take_from_overflow_list(size_t num,
return false;
}
// Grab the entire list; we'll put back a suffix
- oop prefix = cast_to_oop(Atomic::xchg_ptr(BUSY, &_overflow_list));
+ oop prefix = cast_to_oop(Atomic::xchg((oopDesc*)BUSY, &_overflow_list));
Thread* tid = Thread::current();
// Before "no_of_gc_threads" was introduced CMSOverflowSpinCount was
// set to ParallelGCThreads.
@@ -7868,7 +7876,7 @@ bool CMSCollector::par_take_from_overflow_list(size_t num,
return false;
} else if (_overflow_list != BUSY) {
// Try and grab the prefix
- prefix = cast_to_oop(Atomic::xchg_ptr(BUSY, &_overflow_list));
+ prefix = cast_to_oop(Atomic::xchg((oopDesc*)BUSY, &_overflow_list));
}
}
// If the list was found to be empty, or we spun long
@@ -7881,7 +7889,7 @@ bool CMSCollector::par_take_from_overflow_list(size_t num,
if (prefix == NULL) {
// Write back the NULL in case we overwrote it with BUSY above
// and it is still the same value.
- (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
+ Atomic::cmpxchg((oopDesc*)NULL, &_overflow_list, (oopDesc*)BUSY);
}
return false;
}
@@ -7896,7 +7904,7 @@ bool CMSCollector::par_take_from_overflow_list(size_t num,
// Write back the NULL in lieu of the BUSY we wrote
// above, if it is still the same value.
if (_overflow_list == BUSY) {
- (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
+ Atomic::cmpxchg((oopDesc*)NULL, &_overflow_list, (oopDesc*)BUSY);
}
} else {
// Chop off the suffix and return it to the global list.
@@ -7912,7 +7920,7 @@ bool CMSCollector::par_take_from_overflow_list(size_t num,
bool attached = false;
while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
observed_overflow_list =
- (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
+ Atomic::cmpxchg((oopDesc*)suffix_head, &_overflow_list, (oopDesc*)cur_overflow_list);
if (cur_overflow_list == observed_overflow_list) {
attached = true;
break;
@@ -7937,7 +7945,7 @@ bool CMSCollector::par_take_from_overflow_list(size_t num,
}
// ... and try to place spliced list back on overflow_list ...
observed_overflow_list =
- (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
+ Atomic::cmpxchg((oopDesc*)suffix_head, &_overflow_list, (oopDesc*)cur_overflow_list);
} while (cur_overflow_list != observed_overflow_list);
// ... until we have succeeded in doing so.
}
@@ -7958,7 +7966,7 @@ bool CMSCollector::par_take_from_overflow_list(size_t num,
}
#ifndef PRODUCT
assert(_num_par_pushes >= n, "Too many pops?");
- Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes);
+ Atomic::sub(n, &_num_par_pushes);
#endif
return true;
}
@@ -7974,7 +7982,7 @@ void CMSCollector::push_on_overflow_list(oop p) {
// Multi-threaded; use CAS to prepend to overflow list
void CMSCollector::par_push_on_overflow_list(oop p) {
- NOT_PRODUCT(Atomic::inc_ptr(&_num_par_pushes);)
+ NOT_PRODUCT(Atomic::inc(&_num_par_pushes);)
assert(oopDesc::is_oop(p), "Not an oop");
par_preserve_mark_if_necessary(p);
oop observed_overflow_list = _overflow_list;
@@ -7987,7 +7995,7 @@ void CMSCollector::par_push_on_overflow_list(oop p) {
p->set_mark(NULL);
}
observed_overflow_list =
- (oop) Atomic::cmpxchg_ptr(p, &_overflow_list, cur_overflow_list);
+ Atomic::cmpxchg((oopDesc*)p, &_overflow_list, (oopDesc*)cur_overflow_list);
} while (cur_overflow_list != observed_overflow_list);
}
#undef BUSY
diff --git a/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.hpp b/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.hpp
index 978310aaade..aebaae75705 100644
--- a/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.hpp
+++ b/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.hpp
@@ -777,7 +777,7 @@ class CMSCollector: public CHeapObj {
// Does precleaning work, returning a quantity indicative of
// the amount of "useful work" done.
size_t preclean_work(bool clean_refs, bool clean_survivors);
- void preclean_klasses(MarkRefsIntoAndScanClosure* cl, Mutex* freelistLock);
+ void preclean_cld(MarkRefsIntoAndScanClosure* cl, Mutex* freelistLock);
void abortable_preclean(); // Preclean while looking for possible abort
void initialize_sequential_subtasks_for_young_gen_rescan(int i);
// Helper function for above; merge-sorts the per-thread plab samples
diff --git a/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.inline.hpp b/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.inline.hpp
index 503ef69eff0..ca61c8fd45c 100644
--- a/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.inline.hpp
+++ b/src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.inline.hpp
@@ -25,13 +25,13 @@
#ifndef SHARE_VM_GC_CMS_CONCURRENTMARKSWEEPGENERATION_INLINE_HPP
#define SHARE_VM_GC_CMS_CONCURRENTMARKSWEEPGENERATION_INLINE_HPP
+#include "gc/cms/cmsHeap.hpp"
#include "gc/cms/cmsLockVerifier.hpp"
#include "gc/cms/compactibleFreeListSpace.hpp"
#include "gc/cms/concurrentMarkSweepGeneration.hpp"
#include "gc/cms/concurrentMarkSweepThread.hpp"
#include "gc/cms/parNewGeneration.hpp"
#include "gc/shared/gcUtil.hpp"
-#include "gc/shared/genCollectedHeap.hpp"
#include "utilities/align.hpp"
#include "utilities/bitMap.inline.hpp"
@@ -256,7 +256,7 @@ inline bool CMSCollector::should_abort_preclean() const {
// scavenge is done or foreground GC wants to take over collection
return _collectorState == AbortablePreclean &&
(_abort_preclean || _foregroundGCIsActive ||
- GenCollectedHeap::heap()->incremental_collection_will_fail(true /* consult_young */));
+ CMSHeap::heap()->incremental_collection_will_fail(true /* consult_young */));
}
inline size_t CMSCollector::get_eden_used() const {
diff --git a/src/hotspot/share/gc/cms/concurrentMarkSweepThread.cpp b/src/hotspot/share/gc/cms/concurrentMarkSweepThread.cpp
index 43a32e913e8..a4de148bc1a 100644
--- a/src/hotspot/share/gc/cms/concurrentMarkSweepThread.cpp
+++ b/src/hotspot/share/gc/cms/concurrentMarkSweepThread.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -24,10 +24,10 @@
#include "precompiled.hpp"
#include "classfile/systemDictionary.hpp"
+#include "gc/cms/cmsHeap.hpp"
#include "gc/cms/concurrentMarkSweepGeneration.inline.hpp"
#include "gc/cms/concurrentMarkSweepThread.hpp"
#include "gc/shared/gcId.hpp"
-#include "gc/shared/genCollectedHeap.hpp"
#include "oops/oop.inline.hpp"
#include "runtime/init.hpp"
#include "runtime/interfaceSupport.hpp"
@@ -225,7 +225,7 @@ void ConcurrentMarkSweepThread::wait_on_cms_lock_for_scavenge(long t_millis) {
// Wait time in millis or 0 value representing infinite wait for a scavenge
assert(t_millis >= 0, "Wait time for scavenge should be 0 or positive");
- GenCollectedHeap* gch = GenCollectedHeap::heap();
+ CMSHeap* heap = CMSHeap::heap();
double start_time_secs = os::elapsedTime();
double end_time_secs = start_time_secs + (t_millis / ((double) MILLIUNITS));
@@ -233,7 +233,7 @@ void ConcurrentMarkSweepThread::wait_on_cms_lock_for_scavenge(long t_millis) {
unsigned int before_count;
{
MutexLockerEx hl(Heap_lock, Mutex::_no_safepoint_check_flag);
- before_count = gch->total_collections();
+ before_count = heap->total_collections();
}
unsigned int loop_count = 0;
@@ -279,7 +279,7 @@ void ConcurrentMarkSweepThread::wait_on_cms_lock_for_scavenge(long t_millis) {
unsigned int after_count;
{
MutexLockerEx hl(Heap_lock, Mutex::_no_safepoint_check_flag);
- after_count = gch->total_collections();
+ after_count = heap->total_collections();
}
if(before_count != after_count) {
diff --git a/src/hotspot/share/gc/cms/parCardTableModRefBS.cpp b/src/hotspot/share/gc/cms/parCardTableModRefBS.cpp
index 4456fe6fc7d..085b94cb388 100644
--- a/src/hotspot/share/gc/cms/parCardTableModRefBS.cpp
+++ b/src/hotspot/share/gc/cms/parCardTableModRefBS.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -23,10 +23,10 @@
*/
#include "precompiled.hpp"
+#include "gc/cms/cmsHeap.hpp"
#include "gc/shared/cardTableModRefBS.hpp"
#include "gc/shared/cardTableRS.hpp"
#include "gc/shared/collectedHeap.hpp"
-#include "gc/shared/genCollectedHeap.hpp"
#include "gc/shared/space.inline.hpp"
#include "memory/allocation.inline.hpp"
#include "memory/virtualspace.hpp"
@@ -394,7 +394,7 @@ get_LNC_array_for_space(Space* sp,
// Do a dirty read here. If we pass the conditional then take the rare
// event lock and do the read again in case some other thread had already
// succeeded and done the resize.
- int cur_collection = GenCollectedHeap::heap()->total_collections();
+ int cur_collection = CMSHeap::heap()->total_collections();
// Updated _last_LNC_resizing_collection[i] must not be visible before
// _lowest_non_clean and friends are visible. Therefore use acquire/release
// to guarantee this on non TSO architecures.
diff --git a/src/hotspot/share/gc/cms/parNewGeneration.cpp b/src/hotspot/share/gc/cms/parNewGeneration.cpp
index 5d651e0507e..74bb6c96e3f 100644
--- a/src/hotspot/share/gc/cms/parNewGeneration.cpp
+++ b/src/hotspot/share/gc/cms/parNewGeneration.cpp
@@ -23,6 +23,7 @@
*/
#include "precompiled.hpp"
+#include "gc/cms/cmsHeap.hpp"
#include "gc/cms/compactibleFreeListSpace.hpp"
#include "gc/cms/concurrentMarkSweepGeneration.hpp"
#include "gc/cms/parNewGeneration.inline.hpp"
@@ -45,6 +46,7 @@
#include "gc/shared/spaceDecorator.hpp"
#include "gc/shared/strongRootsScope.hpp"
#include "gc/shared/taskqueue.inline.hpp"
+#include "gc/shared/weakProcessor.hpp"
#include "gc/shared/workgroup.hpp"
#include "logging/log.hpp"
#include "logging/logStream.hpp"
@@ -124,7 +126,7 @@ bool ParScanThreadState::should_be_partially_scanned(oop new_obj, oop old_obj) c
void ParScanThreadState::scan_partial_array_and_push_remainder(oop old) {
assert(old->is_objArray(), "must be obj array");
assert(old->is_forwarded(), "must be forwarded");
- assert(GenCollectedHeap::heap()->is_in_reserved(old), "must be in heap.");
+ assert(CMSHeap::heap()->is_in_reserved(old), "must be in heap.");
assert(!old_gen()->is_in(old), "must be in young generation.");
objArrayOop obj = objArrayOop(old->forwardee());
@@ -205,9 +207,9 @@ bool ParScanThreadState::take_from_overflow_stack() {
for (size_t i = 0; i != num_take_elems; i++) {
oop cur = of_stack->pop();
oop obj_to_push = cur->forwardee();
- assert(GenCollectedHeap::heap()->is_in_reserved(cur), "Should be in heap");
+ assert(CMSHeap::heap()->is_in_reserved(cur), "Should be in heap");
assert(!old_gen()->is_in_reserved(cur), "Should be in young gen");
- assert(GenCollectedHeap::heap()->is_in_reserved(obj_to_push), "Should be in heap");
+ assert(CMSHeap::heap()->is_in_reserved(obj_to_push), "Should be in heap");
if (should_be_partially_scanned(obj_to_push, cur)) {
assert(arrayOop(cur)->length() == 0, "entire array remaining to be scanned");
obj_to_push = cur;
@@ -493,7 +495,7 @@ void ParScanThreadStateSet::flush() {
ParScanClosure::ParScanClosure(ParNewGeneration* g,
ParScanThreadState* par_scan_state) :
- OopsInKlassOrGenClosure(g), _par_scan_state(par_scan_state), _g(g) {
+ OopsInClassLoaderDataOrGenClosure(g), _par_scan_state(par_scan_state), _g(g) {
_boundary = _g->reserved().end();
}
@@ -590,7 +592,7 @@ ParNewGenTask::ParNewGenTask(ParNewGeneration* young_gen,
{}
void ParNewGenTask::work(uint worker_id) {
- GenCollectedHeap* gch = GenCollectedHeap::heap();
+ CMSHeap* heap = CMSHeap::heap();
// Since this is being done in a separate thread, need new resource
// and handle marks.
ResourceMark rm;
@@ -601,14 +603,11 @@ void ParNewGenTask::work(uint worker_id) {
par_scan_state.set_young_old_boundary(_young_old_boundary);
- KlassScanClosure klass_scan_closure(&par_scan_state.to_space_root_closure(),
- gch->rem_set()->klass_rem_set());
- CLDToKlassAndOopClosure cld_scan_closure(&klass_scan_closure,
- &par_scan_state.to_space_root_closure(),
- false);
+ CLDScanClosure cld_scan_closure(&par_scan_state.to_space_root_closure(),
+ heap->rem_set()->cld_rem_set()->accumulate_modified_oops());
par_scan_state.start_strong_roots();
- gch->young_process_roots(_strong_roots_scope,
+ heap->young_process_roots(_strong_roots_scope,
&par_scan_state.to_space_root_closure(),
&par_scan_state.older_gen_closure(),
&cld_scan_closure);
@@ -690,7 +689,7 @@ void /*ParNewGeneration::*/ParKeepAliveClosure::do_oop_work(T* p) {
_par_cl->do_oop_nv(p);
- if (GenCollectedHeap::heap()->is_in_reserved(p)) {
+ if (CMSHeap::heap()->is_in_reserved(p)) {
oop obj = oopDesc::load_decode_heap_oop_not_null(p);
_rs->write_ref_field_gc_par(p, obj);
}
@@ -717,7 +716,7 @@ void /*ParNewGeneration::*/KeepAliveClosure::do_oop_work(T* p) {
_cl->do_oop_nv(p);
- if (GenCollectedHeap::heap()->is_in_reserved(p)) {
+ if (CMSHeap::heap()->is_in_reserved(p)) {
oop obj = oopDesc::load_decode_heap_oop_not_null(p);
_rs->write_ref_field_gc_par(p, obj);
}
@@ -807,7 +806,7 @@ public:
};
void ParNewRefProcTaskExecutor::execute(ProcessTask& task) {
- GenCollectedHeap* gch = GenCollectedHeap::heap();
+ CMSHeap* gch = CMSHeap::heap();
WorkGang* workers = gch->workers();
assert(workers != NULL, "Need parallel worker threads.");
_state_set.reset(workers->active_workers(), _young_gen.promotion_failed());
@@ -819,7 +818,7 @@ void ParNewRefProcTaskExecutor::execute(ProcessTask& task) {
}
void ParNewRefProcTaskExecutor::execute(EnqueueTask& task) {
- GenCollectedHeap* gch = GenCollectedHeap::heap();
+ CMSHeap* gch = CMSHeap::heap();
WorkGang* workers = gch->workers();
assert(workers != NULL, "Need parallel worker threads.");
ParNewRefEnqueueTaskProxy enq_task(task);
@@ -828,8 +827,8 @@ void ParNewRefProcTaskExecutor::execute(EnqueueTask& task) {
void ParNewRefProcTaskExecutor::set_single_threaded_mode() {
_state_set.flush();
- GenCollectedHeap* gch = GenCollectedHeap::heap();
- gch->save_marks();
+ CMSHeap* heap = CMSHeap::heap();
+ heap->save_marks();
}
ScanClosureWithParBarrier::
@@ -838,10 +837,10 @@ ScanClosureWithParBarrier(ParNewGeneration* g, bool gc_barrier) :
{ }
EvacuateFollowersClosureGeneral::
-EvacuateFollowersClosureGeneral(GenCollectedHeap* gch,
+EvacuateFollowersClosureGeneral(CMSHeap* heap,
OopsInGenClosure* cur,
OopsInGenClosure* older) :
- _gch(gch),
+ _heap(heap),
_scan_cur_or_nonheap(cur), _scan_older(older)
{ }
@@ -849,15 +848,15 @@ void EvacuateFollowersClosureGeneral::do_void() {
do {
// Beware: this call will lead to closure applications via virtual
// calls.
- _gch->oop_since_save_marks_iterate(GenCollectedHeap::YoungGen,
- _scan_cur_or_nonheap,
- _scan_older);
- } while (!_gch->no_allocs_since_save_marks());
+ _heap->oop_since_save_marks_iterate(GenCollectedHeap::YoungGen,
+ _scan_cur_or_nonheap,
+ _scan_older);
+ } while (!_heap->no_allocs_since_save_marks());
}
// A Generation that does parallel young-gen collection.
-void ParNewGeneration::handle_promotion_failed(GenCollectedHeap* gch, ParScanThreadStateSet& thread_state_set) {
+void ParNewGeneration::handle_promotion_failed(CMSHeap* gch, ParScanThreadStateSet& thread_state_set) {
assert(_promo_failure_scan_stack.is_empty(), "post condition");
_promo_failure_scan_stack.clear(true); // Clear cached segments.
@@ -886,7 +885,7 @@ void ParNewGeneration::collect(bool full,
bool is_tlab) {
assert(full || size > 0, "otherwise we don't want to collect");
- GenCollectedHeap* gch = GenCollectedHeap::heap();
+ CMSHeap* gch = CMSHeap::heap();
_gc_timer->register_gc_start();
@@ -1001,6 +1000,14 @@ void ParNewGeneration::collect(bool full,
_gc_tracer.report_tenuring_threshold(tenuring_threshold());
pt.print_all_references();
+ assert(gch->no_allocs_since_save_marks(), "evacuation should be done at this point");
+
+ WeakProcessor::weak_oops_do(&is_alive, &keep_alive);
+
+ // Verify that the usage of keep_alive only forwarded
+ // the oops and did not find anything new to copy.
+ assert(gch->no_allocs_since_save_marks(), "unexpectedly copied objects");
+
if (!promotion_failed()) {
// Swap the survivor spaces.
eden()->clear(SpaceDecorator::Mangle);
@@ -1067,7 +1074,7 @@ void ParNewGeneration::collect(bool full,
}
size_t ParNewGeneration::desired_plab_sz() {
- return _plab_stats.desired_plab_sz(GenCollectedHeap::heap()->workers()->active_workers());
+ return _plab_stats.desired_plab_sz(CMSHeap::heap()->workers()->active_workers());
}
static int sum;
@@ -1171,7 +1178,7 @@ oop ParNewGeneration::copy_to_survivor_space(ParScanThreadState* par_scan_state,
} else {
// Is in to-space; do copying ourselves.
Copy::aligned_disjoint_words((HeapWord*)old, (HeapWord*)new_obj, sz);
- assert(GenCollectedHeap::heap()->is_in_reserved(new_obj), "illegal forwarding pointer value.");
+ assert(CMSHeap::heap()->is_in_reserved(new_obj), "illegal forwarding pointer value.");
forward_ptr = old->forward_to_atomic(new_obj);
// Restore the mark word copied above.
new_obj->set_mark(m);
@@ -1281,7 +1288,7 @@ void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadSt
// XXX This is horribly inefficient when a promotion failure occurs
// and should be fixed. XXX FIX ME !!!
#ifndef PRODUCT
- Atomic::inc_ptr(&_num_par_pushes);
+ Atomic::inc(&_num_par_pushes);
assert(_num_par_pushes > 0, "Tautology");
#endif
if (from_space_obj->forwardee() == from_space_obj) {
@@ -1299,7 +1306,7 @@ void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadSt
from_space_obj->set_klass_to_list_ptr(NULL);
}
observed_overflow_list =
- (oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list);
+ Atomic::cmpxchg((oopDesc*)from_space_obj, &_overflow_list, (oopDesc*)cur_overflow_list);
} while (cur_overflow_list != observed_overflow_list);
}
}
@@ -1342,7 +1349,7 @@ bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan
if (_overflow_list == NULL) return false;
// Otherwise, there was something there; try claiming the list.
- oop prefix = cast_to_oop(Atomic::xchg_ptr(BUSY, &_overflow_list));
+ oop prefix = cast_to_oop(Atomic::xchg((oopDesc*)BUSY, &_overflow_list));
// Trim off a prefix of at most objsFromOverflow items
Thread* tid = Thread::current();
size_t spin_count = ParallelGCThreads;
@@ -1356,7 +1363,7 @@ bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan
return false;
} else if (_overflow_list != BUSY) {
// try and grab the prefix
- prefix = cast_to_oop(Atomic::xchg_ptr(BUSY, &_overflow_list));
+ prefix = cast_to_oop(Atomic::xchg((oopDesc*)BUSY, &_overflow_list));
}
}
if (prefix == NULL || prefix == BUSY) {
@@ -1364,7 +1371,7 @@ bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan
if (prefix == NULL) {
// Write back the NULL in case we overwrote it with BUSY above
// and it is still the same value.
- (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
+ (void) Atomic::cmpxchg((oopDesc*)NULL, &_overflow_list, (oopDesc*)BUSY);
}
return false;
}
@@ -1383,7 +1390,7 @@ bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan
// Write back the NULL in lieu of the BUSY we wrote
// above and it is still the same value.
if (_overflow_list == BUSY) {
- (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
+ (void) Atomic::cmpxchg((oopDesc*)NULL, &_overflow_list, (oopDesc*)BUSY);
}
} else {
assert(suffix != BUSY, "Error");
@@ -1397,7 +1404,7 @@ bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan
bool attached = false;
while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
observed_overflow_list =
- (oop) Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list);
+ Atomic::cmpxchg((oopDesc*)suffix, &_overflow_list, (oopDesc*)cur_overflow_list);
if (cur_overflow_list == observed_overflow_list) {
attached = true;
break;
@@ -1423,7 +1430,7 @@ bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan
last->set_klass_to_list_ptr(NULL);
}
observed_overflow_list =
- (oop)Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list);
+ Atomic::cmpxchg((oopDesc*)suffix, &_overflow_list, (oopDesc*)cur_overflow_list);
} while (cur_overflow_list != observed_overflow_list);
}
}
@@ -1455,7 +1462,7 @@ bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan
TASKQUEUE_STATS_ONLY(par_scan_state->note_overflow_refill(n));
#ifndef PRODUCT
assert(_num_par_pushes >= n, "Too many pops?");
- Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes);
+ Atomic::sub(n, &_num_par_pushes);
#endif
return true;
}
@@ -1478,3 +1485,9 @@ void ParNewGeneration::ref_processor_init() {
const char* ParNewGeneration::name() const {
return "par new generation";
}
+
+void ParNewGeneration::restore_preserved_marks() {
+ SharedRestorePreservedMarksTaskExecutor task_executor(CMSHeap::heap()->workers());
+ _preserved_marks_set.restore(&task_executor);
+}
+
diff --git a/src/hotspot/share/gc/cms/parNewGeneration.hpp b/src/hotspot/share/gc/cms/parNewGeneration.hpp
index 0576e05cebd..a27c7c36c43 100644
--- a/src/hotspot/share/gc/cms/parNewGeneration.hpp
+++ b/src/hotspot/share/gc/cms/parNewGeneration.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -35,6 +35,7 @@
#include "memory/padded.hpp"
class ChunkArray;
+class CMSHeap;
class ParScanWithoutBarrierClosure;
class ParScanWithBarrierClosure;
class ParRootScanWithoutBarrierClosure;
@@ -259,11 +260,11 @@ class KeepAliveClosure: public DefNewGeneration::KeepAliveClosure {
class EvacuateFollowersClosureGeneral: public VoidClosure {
private:
- GenCollectedHeap* _gch;
+ CMSHeap* _heap;
OopsInGenClosure* _scan_cur_or_nonheap;
OopsInGenClosure* _scan_older;
public:
- EvacuateFollowersClosureGeneral(GenCollectedHeap* gch,
+ EvacuateFollowersClosureGeneral(CMSHeap* heap,
OopsInGenClosure* cur,
OopsInGenClosure* older);
virtual void do_void();
@@ -336,7 +337,7 @@ class ParNewGeneration: public DefNewGeneration {
static oop real_forwardee_slow(oop obj);
static void waste_some_time();
- void handle_promotion_failed(GenCollectedHeap* gch, ParScanThreadStateSet& thread_state_set);
+ void handle_promotion_failed(CMSHeap* gch, ParScanThreadStateSet& thread_state_set);
protected:
@@ -345,6 +346,8 @@ class ParNewGeneration: public DefNewGeneration {
bool survivor_overflow() { return _survivor_overflow; }
void set_survivor_overflow(bool v) { _survivor_overflow = v; }
+ void restore_preserved_marks();
+
public:
ParNewGeneration(ReservedSpace rs, size_t initial_byte_size);
diff --git a/src/hotspot/share/gc/cms/parOopClosures.hpp b/src/hotspot/share/gc/cms/parOopClosures.hpp
index daf95f65785..87b5d98d975 100644
--- a/src/hotspot/share/gc/cms/parOopClosures.hpp
+++ b/src/hotspot/share/gc/cms/parOopClosures.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -37,7 +37,7 @@ typedef Padded ObjToScanQueue;
typedef GenericTaskQueueSet ObjToScanQueueSet;
class ParallelTaskTerminator;
-class ParScanClosure: public OopsInKlassOrGenClosure {
+class ParScanClosure: public OopsInClassLoaderDataOrGenClosure {
protected:
ParScanThreadState* _par_scan_state;
ParNewGeneration* _g;
diff --git a/src/hotspot/share/gc/cms/parOopClosures.inline.hpp b/src/hotspot/share/gc/cms/parOopClosures.inline.hpp
index 12092b62be9..a24f004d45a 100644
--- a/src/hotspot/share/gc/cms/parOopClosures.inline.hpp
+++ b/src/hotspot/share/gc/cms/parOopClosures.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -25,10 +25,10 @@
#ifndef SHARE_VM_GC_CMS_PAROOPCLOSURES_INLINE_HPP
#define SHARE_VM_GC_CMS_PAROOPCLOSURES_INLINE_HPP
+#include "gc/cms/cmsHeap.hpp"
#include "gc/cms/parNewGeneration.hpp"
#include "gc/cms/parOopClosures.hpp"
#include "gc/shared/cardTableRS.hpp"
-#include "gc/shared/genCollectedHeap.hpp"
#include "gc/shared/genOopClosures.inline.hpp"
#include "logging/log.hpp"
#include "logging/logStream.hpp"
@@ -72,9 +72,9 @@ template
inline void ParScanClosure::do_oop_work(T* p,
bool gc_barrier,
bool root_scan) {
- assert((!GenCollectedHeap::heap()->is_in_reserved(p) ||
+ assert((!CMSHeap::heap()->is_in_reserved(p) ||
generation()->is_in_reserved(p))
- && (GenCollectedHeap::heap()->is_young_gen(generation()) || gc_barrier),
+ && (CMSHeap::heap()->is_young_gen(generation()) || gc_barrier),
"The gen must be right, and we must be doing the barrier "
"in older generations.");
T heap_oop = oopDesc::load_heap_oop(p);
@@ -85,8 +85,8 @@ inline void ParScanClosure::do_oop_work(T* p,
if (_g->to()->is_in_reserved(obj)) {
Log(gc) log;
log.error("Scanning field (" PTR_FORMAT ") twice?", p2i(p));
- GenCollectedHeap* gch = GenCollectedHeap::heap();
- Space* sp = gch->space_containing(p);
+ CMSHeap* heap = CMSHeap::heap();
+ Space* sp = heap->space_containing(p);
oop obj = oop(sp->block_start(p));
assert((HeapWord*)obj < (HeapWord*)p, "Error");
log.error("Object: " PTR_FORMAT, p2i((void *)obj));
@@ -96,7 +96,7 @@ inline void ParScanClosure::do_oop_work(T* p,
log.error("-----");
log.error("Heap:");
log.error("-----");
- gch->print_on(&ls);
+ heap->print_on(&ls);
ShouldNotReachHere();
}
#endif
@@ -126,8 +126,8 @@ inline void ParScanClosure::do_oop_work(T* p,
(void)_par_scan_state->trim_queues(10 * ParallelGCThreads);
}
}
- if (is_scanning_a_klass()) {
- do_klass_barrier();
+ if (is_scanning_a_cld()) {
+ do_cld_barrier();
} else if (gc_barrier) {
// Now call parent closure
par_do_barrier(p);
diff --git a/src/hotspot/share/gc/cms/vmCMSOperations.cpp b/src/hotspot/share/gc/cms/vmCMSOperations.cpp
index 3aa8a0ccd6b..1c2e6a51d10 100644
--- a/src/hotspot/share/gc/cms/vmCMSOperations.cpp
+++ b/src/hotspot/share/gc/cms/vmCMSOperations.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -23,6 +23,7 @@
*/
#include "precompiled.hpp"
+#include "gc/cms/cmsHeap.hpp"
#include "gc/cms/concurrentMarkSweepGeneration.inline.hpp"
#include "gc/cms/concurrentMarkSweepThread.hpp"
#include "gc/cms/vmCMSOperations.hpp"
@@ -39,19 +40,19 @@
//////////////////////////////////////////////////////////
void VM_CMS_Operation::verify_before_gc() {
if (VerifyBeforeGC &&
- GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
+ CMSHeap::heap()->total_collections() >= VerifyGCStartAt) {
GCTraceTime(Info, gc, phases, verify) tm("Verify Before", _collector->_gc_timer_cm);
HandleMark hm;
FreelistLocker x(_collector);
MutexLockerEx y(_collector->bitMapLock(), Mutex::_no_safepoint_check_flag);
- GenCollectedHeap::heap()->prepare_for_verify();
+ CMSHeap::heap()->prepare_for_verify();
Universe::verify();
}
}
void VM_CMS_Operation::verify_after_gc() {
if (VerifyAfterGC &&
- GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
+ CMSHeap::heap()->total_collections() >= VerifyGCStartAt) {
GCTraceTime(Info, gc, phases, verify) tm("Verify After", _collector->_gc_timer_cm);
HandleMark hm;
FreelistLocker x(_collector);
@@ -112,13 +113,13 @@ void VM_CMS_Initial_Mark::doit() {
_collector->_gc_timer_cm->register_gc_pause_start("Initial Mark");
- GenCollectedHeap* gch = GenCollectedHeap::heap();
- GCCauseSetter gccs(gch, GCCause::_cms_initial_mark);
+ CMSHeap* heap = CMSHeap::heap();
+ GCCauseSetter gccs(heap, GCCause::_cms_initial_mark);
VM_CMS_Operation::verify_before_gc();
IsGCActiveMark x; // stop-world GC active
- _collector->do_CMS_operation(CMSCollector::CMS_op_checkpointRootsInitial, gch->gc_cause());
+ _collector->do_CMS_operation(CMSCollector::CMS_op_checkpointRootsInitial, heap->gc_cause());
VM_CMS_Operation::verify_after_gc();
@@ -140,13 +141,13 @@ void VM_CMS_Final_Remark::doit() {
_collector->_gc_timer_cm->register_gc_pause_start("Final Mark");
- GenCollectedHeap* gch = GenCollectedHeap::heap();
- GCCauseSetter gccs(gch, GCCause::_cms_final_remark);
+ CMSHeap* heap = CMSHeap::heap();
+ GCCauseSetter gccs(heap, GCCause::_cms_final_remark);
VM_CMS_Operation::verify_before_gc();
IsGCActiveMark x; // stop-world GC active
- _collector->do_CMS_operation(CMSCollector::CMS_op_checkpointRootsFinal, gch->gc_cause());
+ _collector->do_CMS_operation(CMSCollector::CMS_op_checkpointRootsFinal, heap->gc_cause());
VM_CMS_Operation::verify_after_gc();
@@ -162,8 +163,8 @@ void VM_GenCollectFullConcurrent::doit() {
assert(Thread::current()->is_VM_thread(), "Should be VM thread");
assert(GCLockerInvokesConcurrent || ExplicitGCInvokesConcurrent, "Unexpected");
- GenCollectedHeap* gch = GenCollectedHeap::heap();
- if (_gc_count_before == gch->total_collections()) {
+ CMSHeap* heap = CMSHeap::heap();
+ if (_gc_count_before == heap->total_collections()) {
// The "full" of do_full_collection call below "forces"
// a collection; the second arg, 0, below ensures that
// only the young gen is collected. XXX In the future,
@@ -173,21 +174,21 @@ void VM_GenCollectFullConcurrent::doit() {
// for the future.
assert(SafepointSynchronize::is_at_safepoint(),
"We can only be executing this arm of if at a safepoint");
- GCCauseSetter gccs(gch, _gc_cause);
- gch->do_full_collection(gch->must_clear_all_soft_refs(), GenCollectedHeap::YoungGen);
+ GCCauseSetter gccs(heap, _gc_cause);
+ heap->do_full_collection(heap->must_clear_all_soft_refs(), GenCollectedHeap::YoungGen);
} // Else no need for a foreground young gc
- assert((_gc_count_before < gch->total_collections()) ||
+ assert((_gc_count_before < heap->total_collections()) ||
(GCLocker::is_active() /* gc may have been skipped */
- && (_gc_count_before == gch->total_collections())),
+ && (_gc_count_before == heap->total_collections())),
"total_collections() should be monotonically increasing");
MutexLockerEx x(FullGCCount_lock, Mutex::_no_safepoint_check_flag);
- assert(_full_gc_count_before <= gch->total_full_collections(), "Error");
- if (gch->total_full_collections() == _full_gc_count_before) {
+ assert(_full_gc_count_before <= heap->total_full_collections(), "Error");
+ if (heap->total_full_collections() == _full_gc_count_before) {
// Nudge the CMS thread to start a concurrent collection.
CMSCollector::request_full_gc(_full_gc_count_before, _gc_cause);
} else {
- assert(_full_gc_count_before < gch->total_full_collections(), "Error");
+ assert(_full_gc_count_before < heap->total_full_collections(), "Error");
FullGCCount_lock->notify_all(); // Inform the Java thread its work is done
}
}
@@ -197,11 +198,11 @@ bool VM_GenCollectFullConcurrent::evaluate_at_safepoint() const {
assert(thr != NULL, "Unexpected tid");
if (!thr->is_Java_thread()) {
assert(thr->is_VM_thread(), "Expected to be evaluated by VM thread");
- GenCollectedHeap* gch = GenCollectedHeap::heap();
- if (_gc_count_before != gch->total_collections()) {
+ CMSHeap* heap = CMSHeap::heap();
+ if (_gc_count_before != heap->total_collections()) {
// No need to do a young gc, we'll just nudge the CMS thread
// in the doit() method above, to be executed soon.
- assert(_gc_count_before < gch->total_collections(),
+ assert(_gc_count_before < heap->total_collections(),
"total_collections() should be monotonically increasing");
return false; // no need for foreground young gc
}
@@ -227,9 +228,9 @@ void VM_GenCollectFullConcurrent::doit_epilogue() {
// count overflows and wraps around. XXX fix me !!!
// e.g. at the rate of 1 full gc per ms, this could
// overflow in about 1000 years.
- GenCollectedHeap* gch = GenCollectedHeap::heap();
+ CMSHeap* heap = CMSHeap::heap();
if (_gc_cause != GCCause::_gc_locker &&
- gch->total_full_collections_completed() <= _full_gc_count_before) {
+ heap->total_full_collections_completed() <= _full_gc_count_before) {
// maybe we should change the condition to test _gc_cause ==
// GCCause::_java_lang_system_gc or GCCause::_dcmd_gc_run,
// instead of _gc_cause != GCCause::_gc_locker
@@ -245,7 +246,7 @@ void VM_GenCollectFullConcurrent::doit_epilogue() {
MutexLockerEx ml(FullGCCount_lock, Mutex::_no_safepoint_check_flag);
// Either a concurrent or a stop-world full gc is sufficient
// witness to our request.
- while (gch->total_full_collections_completed() <= _full_gc_count_before) {
+ while (heap->total_full_collections_completed() <= _full_gc_count_before) {
FullGCCount_lock->wait(Mutex::_no_safepoint_check_flag);
}
}
diff --git a/src/hotspot/share/gc/g1/concurrentG1RefineThread.cpp b/src/hotspot/share/gc/g1/concurrentG1RefineThread.cpp
index 93903159139..8af13dc078d 100644
--- a/src/hotspot/share/gc/g1/concurrentG1RefineThread.cpp
+++ b/src/hotspot/share/gc/g1/concurrentG1RefineThread.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -27,7 +27,7 @@
#include "gc/g1/concurrentG1RefineThread.hpp"
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1RemSet.hpp"
-#include "gc/g1/suspendibleThreadSet.hpp"
+#include "gc/shared/suspendibleThreadSet.hpp"
#include "logging/log.hpp"
#include "memory/resourceArea.hpp"
#include "runtime/handles.inline.hpp"
diff --git a/src/hotspot/share/gc/g1/concurrentMarkThread.cpp b/src/hotspot/share/gc/g1/concurrentMarkThread.cpp
index d6930853fd4..feb287a0135 100644
--- a/src/hotspot/share/gc/g1/concurrentMarkThread.cpp
+++ b/src/hotspot/share/gc/g1/concurrentMarkThread.cpp
@@ -30,12 +30,12 @@
#include "gc/g1/g1ConcurrentMark.inline.hpp"
#include "gc/g1/g1MMUTracker.hpp"
#include "gc/g1/g1Policy.hpp"
-#include "gc/g1/suspendibleThreadSet.hpp"
#include "gc/g1/vm_operations_g1.hpp"
#include "gc/shared/concurrentGCPhaseManager.hpp"
#include "gc/shared/gcId.hpp"
#include "gc/shared/gcTrace.hpp"
#include "gc/shared/gcTraceTime.inline.hpp"
+#include "gc/shared/suspendibleThreadSet.hpp"
#include "logging/log.hpp"
#include "memory/resourceArea.hpp"
#include "runtime/vmThread.hpp"
@@ -95,7 +95,7 @@ public:
_cm(cm) {}
void do_void(){
- _cm->checkpointRootsFinal(false); // !clear_all_soft_refs
+ _cm->checkpoint_roots_final(false); // !clear_all_soft_refs
}
};
@@ -429,7 +429,7 @@ void ConcurrentMarkThread::run_service() {
G1ConcPhase p(G1ConcurrentPhase::CLEANUP_FOR_NEXT_MARK, this);
_cm->cleanup_for_next_mark();
} else {
- assert(!G1VerifyBitmaps || _cm->nextMarkBitmapIsClear(), "Next mark bitmap must be clear");
+ assert(!G1VerifyBitmaps || _cm->next_mark_bitmap_is_clear(), "Next mark bitmap must be clear");
}
}
diff --git a/src/hotspot/share/gc/g1/dirtyCardQueue.cpp b/src/hotspot/share/gc/g1/dirtyCardQueue.cpp
index 9b134474e6f..11c2da764e4 100644
--- a/src/hotspot/share/gc/g1/dirtyCardQueue.cpp
+++ b/src/hotspot/share/gc/g1/dirtyCardQueue.cpp
@@ -280,13 +280,13 @@ void DirtyCardQueueSet::par_apply_closure_to_all_completed_buffers(CardTableEntr
BufferNode* nd = _cur_par_buffer_node;
while (nd != NULL) {
BufferNode* next = nd->next();
- void* actual = Atomic::cmpxchg_ptr(next, &_cur_par_buffer_node, nd);
+ BufferNode* actual = Atomic::cmpxchg(next, &_cur_par_buffer_node, nd);
if (actual == nd) {
bool b = apply_closure_to_buffer(cl, nd, false);
guarantee(b, "Should not stop early.");
nd = next;
} else {
- nd = static_cast(actual);
+ nd = actual;
}
}
}
diff --git a/src/hotspot/share/gc/g1/g1CardLiveData.cpp b/src/hotspot/share/gc/g1/g1CardLiveData.cpp
index 040bf8b6ca5..0357a84c47c 100644
--- a/src/hotspot/share/gc/g1/g1CardLiveData.cpp
+++ b/src/hotspot/share/gc/g1/g1CardLiveData.cpp
@@ -26,7 +26,7 @@
#include "gc/g1/g1CollectedHeap.inline.hpp"
#include "gc/g1/g1ConcurrentMark.inline.hpp"
#include "gc/g1/g1CardLiveData.inline.hpp"
-#include "gc/g1/suspendibleThreadSet.hpp"
+#include "gc/shared/suspendibleThreadSet.hpp"
#include "gc/shared/workgroup.hpp"
#include "logging/log.hpp"
#include "memory/universe.hpp"
@@ -313,7 +313,7 @@ public:
G1CollectedHeap* g1h = G1CollectedHeap::heap();
G1ConcurrentMark* cm = g1h->concurrent_mark();
- G1CreateLiveDataClosure cl(g1h, cm, cm->nextMarkBitMap(), _live_data);
+ G1CreateLiveDataClosure cl(g1h, cm, cm->next_mark_bitmap(), _live_data);
g1h->heap_region_par_iterate(&cl, worker_id, &_hr_claimer);
}
};
diff --git a/src/hotspot/share/gc/g1/g1CodeCacheRemSet.cpp b/src/hotspot/share/gc/g1/g1CodeCacheRemSet.cpp
index 88611f472ca..bd18ae29621 100644
--- a/src/hotspot/share/gc/g1/g1CodeCacheRemSet.cpp
+++ b/src/hotspot/share/gc/g1/g1CodeCacheRemSet.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -155,19 +155,19 @@ G1CodeRootSet::~G1CodeRootSet() {
}
G1CodeRootSetTable* G1CodeRootSet::load_acquire_table() {
- return (G1CodeRootSetTable*) OrderAccess::load_ptr_acquire(&_table);
+ return OrderAccess::load_acquire(&_table);
}
void G1CodeRootSet::allocate_small_table() {
G1CodeRootSetTable* temp = new G1CodeRootSetTable(SmallSize);
- OrderAccess::release_store_ptr(&_table, temp);
+ OrderAccess::release_store(&_table, temp);
}
void G1CodeRootSetTable::purge_list_append(G1CodeRootSetTable* table) {
for (;;) {
table->_purge_next = _purge_list;
- G1CodeRootSetTable* old = (G1CodeRootSetTable*) Atomic::cmpxchg_ptr(table, &_purge_list, table->_purge_next);
+ G1CodeRootSetTable* old = Atomic::cmpxchg(table, &_purge_list, table->_purge_next);
if (old == table->_purge_next) {
break;
}
@@ -191,7 +191,7 @@ void G1CodeRootSet::move_to_large() {
G1CodeRootSetTable::purge_list_append(_table);
- OrderAccess::release_store_ptr(&_table, temp);
+ OrderAccess::release_store(&_table, temp);
}
void G1CodeRootSet::purge() {
diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp
index 19e2998a50d..006c09fa490 100644
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp
@@ -57,7 +57,6 @@
#include "gc/g1/heapRegion.inline.hpp"
#include "gc/g1/heapRegionRemSet.hpp"
#include "gc/g1/heapRegionSet.inline.hpp"
-#include "gc/g1/suspendibleThreadSet.hpp"
#include "gc/g1/vm_operations_g1.hpp"
#include "gc/shared/gcHeapSummary.hpp"
#include "gc/shared/gcId.hpp"
@@ -68,8 +67,10 @@
#include "gc/shared/generationSpec.hpp"
#include "gc/shared/isGCActiveMark.hpp"
#include "gc/shared/preservedMarks.inline.hpp"
+#include "gc/shared/suspendibleThreadSet.hpp"
#include "gc/shared/referenceProcessor.inline.hpp"
#include "gc/shared/taskqueue.inline.hpp"
+#include "gc/shared/weakProcessor.hpp"
#include "logging/log.hpp"
#include "memory/allocation.hpp"
#include "memory/iterator.hpp"
@@ -141,13 +142,6 @@ void G1RegionMappingChangedListener::on_commit(uint start_idx, size_t num_region
reset_from_card_cache(start_idx, num_regions);
}
-// Returns true if the reference points to an object that
-// can move in an incremental collection.
-bool G1CollectedHeap::is_scavengable(const void* p) {
- HeapRegion* hr = heap_region_containing(p);
- return !hr->is_pinned();
-}
-
// Private methods.
HeapRegion*
@@ -1774,7 +1768,7 @@ jint G1CollectedHeap::initialize() {
vm_shutdown_during_initialization("Could not create/initialize G1ConcurrentMark");
return JNI_ENOMEM;
}
- _cmThread = _cm->cmThread();
+ _cmThread = _cm->cm_thread();
// Now expand into the initial heap size.
if (!expand(init_byte_size, _workers)) {
@@ -1849,6 +1843,14 @@ void G1CollectedHeap::stop() {
}
}
+void G1CollectedHeap::safepoint_synchronize_begin() {
+ SuspendibleThreadSet::synchronize();
+}
+
+void G1CollectedHeap::safepoint_synchronize_end() {
+ SuspendibleThreadSet::desynchronize();
+}
+
size_t G1CollectedHeap::conservative_max_heap_alignment() {
return HeapRegion::max_region_size();
}
@@ -3029,7 +3031,7 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
g1_policy()->record_collection_pause_start(sample_start_time_sec);
if (collector_state()->during_initial_mark_pause()) {
- concurrent_mark()->checkpointRootsInitialPre();
+ concurrent_mark()->checkpoint_roots_initial_pre();
}
g1_policy()->finalize_collection_set(target_pause_time_ms, &_survivor);
@@ -3100,7 +3102,7 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
// We have to do this before we notify the CM threads that
// they can start working to make sure that all the
// appropriate initialization is done on the CM object.
- concurrent_mark()->checkpointRootsInitialPost();
+ concurrent_mark()->checkpoint_roots_initial_post();
collector_state()->set_mark_in_progress(true);
// Note that we don't actually trigger the CM thread at
// this point. We do that later when we're sure that
@@ -3458,10 +3460,10 @@ private:
// Variables used to claim nmethods.
CompiledMethod* _first_nmethod;
- volatile CompiledMethod* _claimed_nmethod;
+ CompiledMethod* volatile _claimed_nmethod;
// The list of nmethods that need to be processed by the second pass.
- volatile CompiledMethod* _postponed_list;
+ CompiledMethod* volatile _postponed_list;
volatile uint _num_entered_barrier;
public:
@@ -3480,7 +3482,7 @@ private:
if(iter.next_alive()) {
_first_nmethod = iter.method();
}
- _claimed_nmethod = (volatile CompiledMethod*)_first_nmethod;
+ _claimed_nmethod = _first_nmethod;
}
~G1CodeCacheUnloadingTask() {
@@ -3496,9 +3498,9 @@ private:
void add_to_postponed_list(CompiledMethod* nm) {
CompiledMethod* old;
do {
- old = (CompiledMethod*)_postponed_list;
+ old = _postponed_list;
nm->set_unloading_next(old);
- } while ((CompiledMethod*)Atomic::cmpxchg_ptr(nm, &_postponed_list, old) != old);
+ } while (Atomic::cmpxchg(nm, &_postponed_list, old) != old);
}
void clean_nmethod(CompiledMethod* nm) {
@@ -3527,7 +3529,7 @@ private:
do {
*num_claimed_nmethods = 0;
- first = (CompiledMethod*)_claimed_nmethod;
+ first = _claimed_nmethod;
last = CompiledMethodIterator(first);
if (first != NULL) {
@@ -3541,7 +3543,7 @@ private:
}
}
- } while ((CompiledMethod*)Atomic::cmpxchg_ptr(last.method(), &_claimed_nmethod, first) != first);
+ } while (Atomic::cmpxchg(last.method(), &_claimed_nmethod, first) != first);
}
CompiledMethod* claim_postponed_nmethod() {
@@ -3549,14 +3551,14 @@ private:
CompiledMethod* next;
do {
- claim = (CompiledMethod*)_postponed_list;
+ claim = _postponed_list;
if (claim == NULL) {
return NULL;
}
next = claim->unloading_next();
- } while ((CompiledMethod*)Atomic::cmpxchg_ptr(next, &_postponed_list, claim) != claim);
+ } while (Atomic::cmpxchg(next, &_postponed_list, claim) != claim);
return claim;
}
@@ -4127,17 +4129,6 @@ public:
}
};
-void G1CollectedHeap::process_weak_jni_handles() {
- double ref_proc_start = os::elapsedTime();
-
- G1STWIsAliveClosure is_alive(this);
- G1KeepAliveClosure keep_alive(this);
- JNIHandles::weak_oops_do(&is_alive, &keep_alive);
-
- double ref_proc_time = os::elapsedTime() - ref_proc_start;
- g1_policy()->phase_times()->record_ref_proc_time(ref_proc_time * 1000.0);
-}
-
void G1CollectedHeap::preserve_cm_referents(G1ParScanThreadStateSet* per_thread_states) {
// Any reference objects, in the collection set, that were 'discovered'
// by the CM ref processor should have already been copied (either by
@@ -4164,7 +4155,7 @@ void G1CollectedHeap::preserve_cm_referents(G1ParScanThreadStateSet* per_thread_
// To avoid spawning task when there is no work to do, check that
// a concurrent cycle is active and that some references have been
// discovered.
- if (concurrent_mark()->cmThread()->during_cycle() &&
+ if (concurrent_mark()->cm_thread()->during_cycle() &&
ref_processor_cm()->has_discovered_references()) {
double preserve_cm_referents_start = os::elapsedTime();
uint no_of_gc_workers = workers()->active_workers();
@@ -4368,14 +4359,23 @@ void G1CollectedHeap::post_evacuate_collection_set(EvacuationInfo& evacuation_in
process_discovered_references(per_thread_states);
} else {
ref_processor_stw()->verify_no_references_recorded();
- process_weak_jni_handles();
+ }
+
+ G1STWIsAliveClosure is_alive(this);
+ G1KeepAliveClosure keep_alive(this);
+
+ {
+ double start = os::elapsedTime();
+
+ WeakProcessor::weak_oops_do(&is_alive, &keep_alive);
+
+ double time_ms = (os::elapsedTime() - start) * 1000.0;
+ g1_policy()->phase_times()->record_ref_proc_time(time_ms);
}
if (G1StringDedup::is_enabled()) {
double fixup_start = os::elapsedTime();
- G1STWIsAliveClosure is_alive(this);
- G1KeepAliveClosure keep_alive(this);
G1StringDedup::unlink_or_oops_do(&is_alive, &keep_alive, true, g1_policy()->phase_times());
double fixup_time_ms = (os::elapsedTime() - fixup_start) * 1000.0;
@@ -4448,7 +4448,7 @@ void G1CollectedHeap::free_region(HeapRegion* hr,
if (G1VerifyBitmaps) {
MemRegion mr(hr->bottom(), hr->end());
- concurrent_mark()->clearRangePrevBitmap(mr);
+ concurrent_mark()->clear_range_in_prev_bitmap(mr);
}
// Clear the card counts for this region.
@@ -4814,7 +4814,7 @@ class G1FreeHumongousRegionClosure : public HeapRegionClosure {
G1CollectedHeap* g1h = G1CollectedHeap::heap();
oop obj = (oop)r->bottom();
- G1CMBitMap* next_bitmap = g1h->concurrent_mark()->nextMarkBitMap();
+ G1CMBitMap* next_bitmap = g1h->concurrent_mark()->next_mark_bitmap();
// The following checks whether the humongous object is live are sufficient.
// The main additional check (in addition to having a reference from the roots
@@ -5323,17 +5323,20 @@ public:
void do_oop(narrowOop* p) { do_oop_work(p); }
};
-void G1CollectedHeap::register_nmethod(nmethod* nm) {
- CollectedHeap::register_nmethod(nm);
+// Returns true if the reference points to an object that
+// can move in an incremental collection.
+bool G1CollectedHeap::is_scavengable(oop obj) {
+ HeapRegion* hr = heap_region_containing(obj);
+ return !hr->is_pinned();
+}
+void G1CollectedHeap::register_nmethod(nmethod* nm) {
guarantee(nm != NULL, "sanity");
RegisterNMethodOopClosure reg_cl(this, nm);
nm->oops_do(®_cl);
}
void G1CollectedHeap::unregister_nmethod(nmethod* nm) {
- CollectedHeap::unregister_nmethod(nm);
-
guarantee(nm != NULL, "sanity");
UnregisterNMethodOopClosure reg_cl(this, nm);
nm->oops_do(®_cl, true);
diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp
index 5ed6ae89021..fd014af03e8 100644
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.hpp
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.hpp
@@ -63,7 +63,6 @@ class HRRSCleanupTask;
class GenerationSpec;
class G1ParScanThreadState;
class G1ParScanThreadStateSet;
-class G1KlassScanClosure;
class G1ParScanThreadState;
class ObjectClosure;
class SpaceClosure;
@@ -304,8 +303,6 @@ private:
void trace_heap(GCWhen::Type when, const GCTracer* tracer);
- void process_weak_jni_handles();
-
// These are macros so that, if the assert fires, we get the correct
// line number, file, etc.
@@ -969,6 +966,8 @@ public:
jint initialize();
virtual void stop();
+ virtual void safepoint_synchronize_begin();
+ virtual void safepoint_synchronize_end();
// Return the (conservative) maximum heap alignment for any G1 heap
static size_t conservative_max_heap_alignment();
@@ -1283,8 +1282,6 @@ public:
inline bool is_in_young(const oop obj);
- virtual bool is_scavengable(const void* addr);
-
// We don't need barriers for initializing stores to objects
// in the young gen: for the SATB pre-barrier, there is no
// pre-value that needs to be remembered; for the remembered-set
@@ -1364,7 +1361,7 @@ public:
// is not marked, and c) it is not in an archive region.
bool is_obj_dead(const oop obj, const HeapRegion* hr) const {
return
- hr->is_obj_dead(obj, _cm->prevMarkBitMap()) &&
+ hr->is_obj_dead(obj, _cm->prev_mark_bitmap()) &&
!hr->is_archive();
}
@@ -1396,6 +1393,9 @@ public:
// Optimized nmethod scanning support routines
+ // Is an oop scavengeable
+ virtual bool is_scavengable(oop obj);
+
// Register the given nmethod with the G1 heap.
virtual void register_nmethod(nmethod* nm);
diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp b/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp
index 650bb2c032f..982bebfe24a 100644
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp
@@ -135,7 +135,7 @@ inline RefToScanQueue* G1CollectedHeap::task_queue(uint i) const {
}
inline bool G1CollectedHeap::isMarkedNext(oop obj) const {
- return _cm->nextMarkBitMap()->is_marked((HeapWord*)obj);
+ return _cm->next_mark_bitmap()->is_marked((HeapWord*)obj);
}
inline bool G1CollectedHeap::is_in_cset(oop obj) {
diff --git a/src/hotspot/share/gc/g1/g1CollectionSet.cpp b/src/hotspot/share/gc/g1/g1CollectionSet.cpp
index aa93d2805fc..d7bb19d0723 100644
--- a/src/hotspot/share/gc/g1/g1CollectionSet.cpp
+++ b/src/hotspot/share/gc/g1/g1CollectionSet.cpp
@@ -431,15 +431,15 @@ void G1CollectionSet::finalize_old_part(double time_remaining_ms) {
// Stop adding regions if the remaining reclaimable space is
// not above G1HeapWastePercent.
size_t reclaimable_bytes = cset_chooser()->remaining_reclaimable_bytes();
- double reclaimable_perc = _policy->reclaimable_bytes_perc(reclaimable_bytes);
+ double reclaimable_percent = _policy->reclaimable_bytes_percent(reclaimable_bytes);
double threshold = (double) G1HeapWastePercent;
- if (reclaimable_perc <= threshold) {
+ if (reclaimable_percent <= threshold) {
// We've added enough old regions that the amount of uncollected
// reclaimable space is at or below the waste threshold. Stop
// adding old regions to the CSet.
log_debug(gc, ergo, cset)("Finish adding old regions to CSet (reclaimable percentage not over threshold). "
"old %u regions, max %u regions, reclaimable: " SIZE_FORMAT "B (%1.2f%%) threshold: " UINTX_FORMAT "%%",
- old_region_length(), max_old_cset_length, reclaimable_bytes, reclaimable_perc, G1HeapWastePercent);
+ old_region_length(), max_old_cset_length, reclaimable_bytes, reclaimable_percent, G1HeapWastePercent);
break;
}
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp b/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp
index 497d07c52b5..bc346a0fbf9 100644
--- a/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp
@@ -38,7 +38,6 @@
#include "gc/g1/heapRegion.inline.hpp"
#include "gc/g1/heapRegionRemSet.hpp"
#include "gc/g1/heapRegionSet.inline.hpp"
-#include "gc/g1/suspendibleThreadSet.hpp"
#include "gc/shared/gcId.hpp"
#include "gc/shared/gcTimer.hpp"
#include "gc/shared/gcTrace.hpp"
@@ -46,8 +45,10 @@
#include "gc/shared/genOopClosures.inline.hpp"
#include "gc/shared/referencePolicy.hpp"
#include "gc/shared/strongRootsScope.hpp"
+#include "gc/shared/suspendibleThreadSet.hpp"
#include "gc/shared/taskqueue.inline.hpp"
#include "gc/shared/vmGCOperations.hpp"
+#include "gc/shared/weakProcessor.hpp"
#include "logging/log.hpp"
#include "memory/allocation.hpp"
#include "memory/resourceArea.hpp"
@@ -325,31 +326,44 @@ bool G1CMRootRegions::wait_until_scan_finished() {
return true;
}
-uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
- return MAX2((n_par_threads + 2) / 4, 1U);
+// Returns the maximum number of workers to be used in a concurrent
+// phase based on the number of GC workers being used in a STW
+// phase.
+static uint scale_concurrent_worker_threads(uint num_gc_workers) {
+ return MAX2((num_gc_workers + 2) / 4, 1U);
}
-G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
+G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
+ G1RegionToSpaceMapper* prev_bitmap_storage,
+ G1RegionToSpaceMapper* next_bitmap_storage) :
+ // _cm_thread set inside the constructor
_g1h(g1h),
- _markBitMap1(),
- _markBitMap2(),
- _parallel_marking_threads(0),
- _max_parallel_marking_threads(0),
- _sleep_factor(0.0),
- _marking_task_overhead(1.0),
- _cleanup_list("Cleanup List"),
+ _completed_initialization(false),
- _prevMarkBitMap(&_markBitMap1),
- _nextMarkBitMap(&_markBitMap2),
+ _cleanup_list("Concurrent Mark Cleanup List"),
+ _mark_bitmap_1(),
+ _mark_bitmap_2(),
+ _prev_mark_bitmap(&_mark_bitmap_1),
+ _next_mark_bitmap(&_mark_bitmap_2),
+
+ _heap_start(_g1h->reserved_region().start()),
+ _heap_end(_g1h->reserved_region().end()),
+
+ _root_regions(),
_global_mark_stack(),
+
// _finger set in set_non_marking_state
- _max_worker_id(ParallelGCThreads),
- // _active_tasks set in set_non_marking_state
+ _max_num_tasks(ParallelGCThreads),
+ // _num_active_tasks set in set_non_marking_state()
// _tasks set inside the constructor
- _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)),
- _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
+
+ _task_queues(new G1CMTaskQueueSet((int) _max_num_tasks)),
+ _terminator(ParallelTaskTerminator((int) _max_num_tasks, _task_queues)),
+
+ _first_overflow_barrier_sync(),
+ _second_overflow_barrier_sync(),
_has_overflown(false),
_concurrent(false),
@@ -362,87 +376,62 @@ G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper*
// _verbose_level set below
_init_times(),
- _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
+ _remark_times(),
+ _remark_mark_times(),
+ _remark_weak_ref_times(),
_cleanup_times(),
_total_counting_time(0.0),
_total_rs_scrub_time(0.0),
- _parallel_workers(NULL),
+ _accum_task_vtime(NULL),
- _completed_initialization(false) {
+ _concurrent_workers(NULL),
+ _num_concurrent_workers(0),
+ _max_concurrent_workers(0)
+{
+ _mark_bitmap_1.initialize(g1h->reserved_region(), prev_bitmap_storage);
+ _mark_bitmap_2.initialize(g1h->reserved_region(), next_bitmap_storage);
- _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
- _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
-
- // Create & start a ConcurrentMark thread.
- _cmThread = new ConcurrentMarkThread(this);
- assert(cmThread() != NULL, "CM Thread should have been created");
- assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
- if (_cmThread->osthread() == NULL) {
- vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
+ // Create & start ConcurrentMark thread.
+ _cm_thread = new ConcurrentMarkThread(this);
+ if (_cm_thread->osthread() == NULL) {
+ vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
}
- assert(CGC_lock != NULL, "Where's the CGC_lock?");
+ assert(CGC_lock != NULL, "CGC_lock must be initialized");
SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
satb_qs.set_buffer_size(G1SATBBufferSize);
_root_regions.init(_g1h->survivor(), this);
+ if (FLAG_IS_DEFAULT(ConcGCThreads) || ConcGCThreads == 0) {
+ // Calculate the number of concurrent worker threads by scaling
+ // the number of parallel GC threads.
+ uint marking_thread_num = scale_concurrent_worker_threads(ParallelGCThreads);
+ FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
+ }
+
+ assert(ConcGCThreads > 0, "ConcGCThreads have been set.");
if (ConcGCThreads > ParallelGCThreads) {
- log_warning(gc)("Can't have more ConcGCThreads (%u) than ParallelGCThreads (%u).",
+ log_warning(gc)("More ConcGCThreads (%u) than ParallelGCThreads (%u).",
ConcGCThreads, ParallelGCThreads);
return;
}
- if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
- // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
- // if both are set
- _sleep_factor = 0.0;
- _marking_task_overhead = 1.0;
- } else if (G1MarkingOverheadPercent > 0) {
- // We will calculate the number of parallel marking threads based
- // on a target overhead with respect to the soft real-time goal
- double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
- double overall_cm_overhead =
- (double) MaxGCPauseMillis * marking_overhead /
- (double) GCPauseIntervalMillis;
- double cpu_ratio = 1.0 / os::initial_active_processor_count();
- double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
- double marking_task_overhead =
- overall_cm_overhead / marking_thread_num * os::initial_active_processor_count();
- double sleep_factor =
- (1.0 - marking_task_overhead) / marking_task_overhead;
- FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
- _sleep_factor = sleep_factor;
- _marking_task_overhead = marking_task_overhead;
- } else {
- // Calculate the number of parallel marking threads by scaling
- // the number of parallel GC threads.
- uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
- FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
- _sleep_factor = 0.0;
- _marking_task_overhead = 1.0;
- }
-
- assert(ConcGCThreads > 0, "Should have been set");
log_debug(gc)("ConcGCThreads: %u", ConcGCThreads);
log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
- _parallel_marking_threads = ConcGCThreads;
- _max_parallel_marking_threads = _parallel_marking_threads;
- _parallel_workers = new WorkGang("G1 Marker",
- _max_parallel_marking_threads, false, true);
- if (_parallel_workers == NULL) {
- vm_exit_during_initialization("Failed necessary allocation.");
- } else {
- _parallel_workers->initialize_workers();
- }
+ _num_concurrent_workers = ConcGCThreads;
+ _max_concurrent_workers = _num_concurrent_workers;
+
+ _concurrent_workers = new WorkGang("G1 Conc", _max_concurrent_workers, false, true);
+ _concurrent_workers->initialize_workers();
if (FLAG_IS_DEFAULT(MarkStackSize)) {
size_t mark_stack_size =
MIN2(MarkStackSizeMax,
- MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
+ MAX2(MarkStackSize, (size_t) (_max_concurrent_workers * TASKQUEUE_SIZE)));
// Verify that the calculated value for MarkStackSize is in range.
// It would be nice to use the private utility routine from Arguments.
if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
@@ -477,24 +466,22 @@ G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper*
vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
}
- _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC);
- _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
+ _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
+ _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_num_tasks, mtGC);
// so that the assertion in MarkingTaskQueue::task_queue doesn't fail
- _active_tasks = _max_worker_id;
+ _num_active_tasks = _max_num_tasks;
- for (uint i = 0; i < _max_worker_id; ++i) {
+ for (uint i = 0; i < _max_num_tasks; ++i) {
G1CMTaskQueue* task_queue = new G1CMTaskQueue();
task_queue->initialize();
_task_queues->register_queue(i, task_queue);
- _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues);
+ _tasks[i] = new G1CMTask(i, this, task_queue);
_accum_task_vtime[i] = 0.0;
}
- // so that the call below can read a sensible value
- _heap_start = g1h->reserved_region().start();
set_non_marking_state();
_completed_initialization = true;
}
@@ -514,11 +501,11 @@ void G1ConcurrentMark::reset() {
// Reset all the marking data structures and any necessary flags
reset_marking_state();
- // We do reset all of them, since different phases will use
+ // We reset all of them, since different phases will use
// different number of active threads. So, it's easiest to have all
// of them ready.
- for (uint i = 0; i < _max_worker_id; ++i) {
- _tasks[i]->reset(_nextMarkBitMap);
+ for (uint i = 0; i < _max_num_tasks; ++i) {
+ _tasks[i]->reset(_next_mark_bitmap);
}
// we need this to make sure that the flag is on during the evac
@@ -538,16 +525,16 @@ void G1ConcurrentMark::reset_marking_state() {
clear_has_overflown();
_finger = _heap_start;
- for (uint i = 0; i < _max_worker_id; ++i) {
+ for (uint i = 0; i < _max_num_tasks; ++i) {
G1CMTaskQueue* queue = _task_queues->queue(i);
queue->set_empty();
}
}
void G1ConcurrentMark::set_concurrency(uint active_tasks) {
- assert(active_tasks <= _max_worker_id, "we should not have more");
+ assert(active_tasks <= _max_num_tasks, "we should not have more");
- _active_tasks = active_tasks;
+ _num_active_tasks = active_tasks;
// Need to update the three data structures below according to the
// number of active threads for this phase.
_terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
@@ -560,8 +547,9 @@ void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurr
_concurrent = concurrent;
// We propagate this to all tasks, not just the active ones.
- for (uint i = 0; i < _max_worker_id; ++i)
+ for (uint i = 0; i < _max_num_tasks; ++i) {
_tasks[i]->set_concurrent(concurrent);
+ }
if (concurrent) {
set_concurrent_marking_in_progress();
@@ -580,7 +568,7 @@ void G1ConcurrentMark::set_non_marking_state() {
// We set the global marking state to some default values when we're
// not doing marking.
reset_marking_state();
- _active_tasks = 0;
+ _num_active_tasks = 0;
clear_concurrent_marking_in_progress();
}
@@ -623,7 +611,7 @@ private:
// as asserts here to minimize their overhead on the product. However, we
// will have them as guarantees at the beginning / end of the bitmap
// clearing to get some checking in the product.
- assert(_cm == NULL || _cm->cmThread()->during_cycle(), "invariant");
+ assert(_cm == NULL || _cm->cm_thread()->during_cycle(), "invariant");
assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
}
assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
@@ -672,7 +660,7 @@ void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool
void G1ConcurrentMark::cleanup_for_next_mark() {
// Make sure that the concurrent mark thread looks to still be in
// the current cycle.
- guarantee(cmThread()->during_cycle(), "invariant");
+ guarantee(cm_thread()->during_cycle(), "invariant");
// We are finishing up the current cycle by clearing the next
// marking bitmap and getting it ready for the next cycle. During
@@ -680,23 +668,23 @@ void G1ConcurrentMark::cleanup_for_next_mark() {
// is the case.
guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
- clear_bitmap(_nextMarkBitMap, _parallel_workers, true);
+ clear_bitmap(_next_mark_bitmap, _concurrent_workers, true);
// Clear the live count data. If the marking has been aborted, the abort()
// call already did that.
if (!has_aborted()) {
- clear_live_data(_parallel_workers);
+ clear_live_data(_concurrent_workers);
DEBUG_ONLY(verify_live_data_clear());
}
// Repeat the asserts from above.
- guarantee(cmThread()->during_cycle(), "invariant");
+ guarantee(cm_thread()->during_cycle(), "invariant");
guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
}
void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) {
assert(SafepointSynchronize::is_at_safepoint(), "Should only clear the entire prev bitmap at a safepoint.");
- clear_bitmap(_prevMarkBitMap, workers, false);
+ clear_bitmap(_prev_mark_bitmap, workers, false);
}
class CheckBitmapClearHRClosure : public HeapRegionClosure {
@@ -716,8 +704,8 @@ class CheckBitmapClearHRClosure : public HeapRegionClosure {
}
};
-bool G1ConcurrentMark::nextMarkBitmapIsClear() {
- CheckBitmapClearHRClosure cl(_nextMarkBitMap);
+bool G1ConcurrentMark::next_mark_bitmap_is_clear() {
+ CheckBitmapClearHRClosure cl(_next_mark_bitmap);
_g1h->heap_region_iterate(&cl);
return cl.complete();
}
@@ -730,7 +718,7 @@ public:
}
};
-void G1ConcurrentMark::checkpointRootsInitialPre() {
+void G1ConcurrentMark::checkpoint_roots_initial_pre() {
G1CollectedHeap* g1h = G1CollectedHeap::heap();
_has_aborted = false;
@@ -744,7 +732,7 @@ void G1ConcurrentMark::checkpointRootsInitialPre() {
}
-void G1ConcurrentMark::checkpointRootsInitialPost() {
+void G1ConcurrentMark::checkpoint_roots_initial_post() {
G1CollectedHeap* g1h = G1CollectedHeap::heap();
// Start Concurrent Marking weak-reference discovery.
@@ -842,8 +830,7 @@ private:
public:
void work(uint worker_id) {
- assert(Thread::current()->is_ConcurrentGC_thread(),
- "this should only be done by a conc GC thread");
+ assert(Thread::current()->is_ConcurrentGC_thread(), "Not a concurrent GC thread");
ResourceMark rm;
double start_vtime = os::elapsedVTime();
@@ -852,34 +839,20 @@ public:
SuspendibleThreadSetJoiner sts_join;
assert(worker_id < _cm->active_tasks(), "invariant");
- G1CMTask* the_task = _cm->task(worker_id);
- the_task->record_start_time();
+
+ G1CMTask* task = _cm->task(worker_id);
+ task->record_start_time();
if (!_cm->has_aborted()) {
do {
- double start_vtime_sec = os::elapsedVTime();
- double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
+ task->do_marking_step(G1ConcMarkStepDurationMillis,
+ true /* do_termination */,
+ false /* is_serial*/);
- the_task->do_marking_step(mark_step_duration_ms,
- true /* do_termination */,
- false /* is_serial*/);
-
- double end_vtime_sec = os::elapsedVTime();
- double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
_cm->do_yield_check();
-
- jlong sleep_time_ms;
- if (!_cm->has_aborted() && the_task->has_aborted()) {
- sleep_time_ms =
- (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
- {
- SuspendibleThreadSetLeaver sts_leave;
- os::sleep(Thread::current(), sleep_time_ms, false);
- }
- }
- } while (!_cm->has_aborted() && the_task->has_aborted());
+ } while (!_cm->has_aborted() && task->has_aborted());
}
- the_task->record_end_time();
- guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
+ task->record_end_time();
+ guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
}
double end_vtime = os::elapsedVTime();
@@ -893,30 +866,28 @@ public:
~G1CMConcurrentMarkingTask() { }
};
-// Calculates the number of active workers for a concurrent
-// phase.
-uint G1ConcurrentMark::calc_parallel_marking_threads() {
- uint n_conc_workers = 0;
+uint G1ConcurrentMark::calc_active_marking_workers() {
+ uint result = 0;
if (!UseDynamicNumberOfGCThreads ||
(!FLAG_IS_DEFAULT(ConcGCThreads) &&
!ForceDynamicNumberOfGCThreads)) {
- n_conc_workers = max_parallel_marking_threads();
+ result = _max_concurrent_workers;
} else {
- n_conc_workers =
- AdaptiveSizePolicy::calc_default_active_workers(max_parallel_marking_threads(),
+ result =
+ AdaptiveSizePolicy::calc_default_active_workers(_max_concurrent_workers,
1, /* Minimum workers */
- parallel_marking_threads(),
+ _num_concurrent_workers,
Threads::number_of_non_daemon_threads());
- // Don't scale down "n_conc_workers" by scale_parallel_threads() because
- // that scaling has already gone into "_max_parallel_marking_threads".
+ // Don't scale the result down by scale_concurrent_workers() because
+ // that scaling has already gone into "_max_concurrent_workers".
}
- assert(n_conc_workers > 0 && n_conc_workers <= max_parallel_marking_threads(),
- "Calculated number of workers must be larger than zero and at most the maximum %u, but is %u",
- max_parallel_marking_threads(), n_conc_workers);
- return n_conc_workers;
+ assert(result > 0 && result <= _max_concurrent_workers,
+ "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
+ _max_concurrent_workers, result);
+ return result;
}
-void G1ConcurrentMark::scanRootRegion(HeapRegion* hr) {
+void G1ConcurrentMark::scan_root_region(HeapRegion* hr) {
// Currently, only survivors can be root regions.
assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
G1RootRegionScanClosure cl(_g1h, this);
@@ -948,7 +919,7 @@ public:
G1CMRootRegions* root_regions = _cm->root_regions();
HeapRegion* hr = root_regions->claim_next();
while (hr != NULL) {
- _cm->scanRootRegion(hr);
+ _cm->scan_root_region(hr);
hr = root_regions->claim_next();
}
}
@@ -961,17 +932,17 @@ void G1ConcurrentMark::scan_root_regions() {
if (root_regions()->scan_in_progress()) {
assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
- _parallel_marking_threads = MIN2(calc_parallel_marking_threads(),
- // We distribute work on a per-region basis, so starting
- // more threads than that is useless.
- root_regions()->num_root_regions());
- assert(parallel_marking_threads() <= max_parallel_marking_threads(),
+ _num_concurrent_workers = MIN2(calc_active_marking_workers(),
+ // We distribute work on a per-region basis, so starting
+ // more threads than that is useless.
+ root_regions()->num_root_regions());
+ assert(_num_concurrent_workers <= _max_concurrent_workers,
"Maximum number of marking threads exceeded");
G1CMRootRegionScanTask task(this);
log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
- task.name(), _parallel_marking_threads, root_regions()->num_root_regions());
- _parallel_workers->run_task(&task, _parallel_marking_threads);
+ task.name(), _num_concurrent_workers, root_regions()->num_root_regions());
+ _concurrent_workers->run_task(&task, _num_concurrent_workers);
// It's possible that has_aborted() is true here without actually
// aborting the survivor scan earlier. This is OK as it's
@@ -1010,29 +981,25 @@ void G1ConcurrentMark::mark_from_roots() {
_restart_for_overflow = false;
- // _g1h has _n_par_threads
- _parallel_marking_threads = calc_parallel_marking_threads();
- assert(parallel_marking_threads() <= max_parallel_marking_threads(),
- "Maximum number of marking threads exceeded");
+ _num_concurrent_workers = calc_active_marking_workers();
- uint active_workers = MAX2(1U, parallel_marking_threads());
- assert(active_workers > 0, "Should have been set");
+ uint active_workers = MAX2(1U, _num_concurrent_workers);
// Setting active workers is not guaranteed since fewer
// worker threads may currently exist and more may not be
// available.
- active_workers = _parallel_workers->update_active_workers(active_workers);
- log_info(gc, task)("Using %u workers of %u for marking", active_workers, _parallel_workers->total_workers());
+ active_workers = _concurrent_workers->update_active_workers(active_workers);
+ log_info(gc, task)("Using %u workers of %u for marking", active_workers, _concurrent_workers->total_workers());
// Parallel task terminator is set in "set_concurrency_and_phase()"
set_concurrency_and_phase(active_workers, true /* concurrent */);
- G1CMConcurrentMarkingTask markingTask(this, cmThread());
- _parallel_workers->run_task(&markingTask);
+ G1CMConcurrentMarkingTask marking_task(this, cm_thread());
+ _concurrent_workers->run_task(&marking_task);
print_stats();
}
-void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
+void G1ConcurrentMark::checkpoint_roots_final(bool clear_all_soft_refs) {
// world is stopped at this checkpoint
assert(SafepointSynchronize::is_at_safepoint(),
"world should be stopped");
@@ -1059,11 +1026,11 @@ void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
double start = os::elapsedTime();
- checkpointRootsFinalWork();
+ checkpoint_roots_final_work();
double mark_work_end = os::elapsedTime();
- weakRefsWork(clear_all_soft_refs);
+ weak_refs_work(clear_all_soft_refs);
if (has_overflown()) {
// We overflowed. Restart concurrent marking.
@@ -1257,7 +1224,7 @@ void G1ConcurrentMark::cleanup() {
}
// Install newly created mark bitMap as "prev".
- swapMarkBitMaps();
+ swap_mark_bitmaps();
g1h->reset_gc_time_stamp();
@@ -1584,7 +1551,7 @@ void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
_workers->run_task(&enq_task_proxy);
}
-void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
+void G1ConcurrentMark::weak_refs_work(bool clear_all_soft_refs) {
if (has_overflown()) {
// Skip processing the discovered references if we have
// overflown the global marking stack. Reference objects
@@ -1640,7 +1607,7 @@ void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
// we utilize all the worker threads we can.
bool processing_is_mt = rp->processing_is_mt();
uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
- active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
+ active_workers = MAX2(MIN2(active_workers, _max_num_tasks), 1U);
// Parallel processing task executor.
G1CMRefProcTaskExecutor par_task_executor(g1h, this,
@@ -1687,6 +1654,14 @@ void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
assert(!rp->discovery_enabled(), "Post condition");
}
+ assert(has_overflown() || _global_mark_stack.is_empty(),
+ "Mark stack should be empty (unless it has overflown)");
+
+ {
+ GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
+ WeakProcessor::weak_oops_do(&g1_is_alive, &do_nothing_cl);
+ }
+
if (has_overflown()) {
// We can not trust g1_is_alive if the marking stack overflowed
return;
@@ -1708,10 +1683,10 @@ void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
}
}
-void G1ConcurrentMark::swapMarkBitMaps() {
- G1CMBitMap* temp = _prevMarkBitMap;
- _prevMarkBitMap = _nextMarkBitMap;
- _nextMarkBitMap = temp;
+void G1ConcurrentMark::swap_mark_bitmaps() {
+ G1CMBitMap* temp = _prev_mark_bitmap;
+ _prev_mark_bitmap = _next_mark_bitmap;
+ _next_mark_bitmap = temp;
}
// Closure for marking entries in SATB buffers.
@@ -1811,7 +1786,7 @@ public:
}
};
-void G1ConcurrentMark::checkpointRootsFinalWork() {
+void G1ConcurrentMark::checkpoint_roots_final_work() {
ResourceMark rm;
HandleMark hm;
G1CollectedHeap* g1h = G1CollectedHeap::heap();
@@ -1848,8 +1823,8 @@ void G1ConcurrentMark::checkpointRootsFinalWork() {
print_stats();
}
-void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
- _prevMarkBitMap->clear_range(mr);
+void G1ConcurrentMark::clear_range_in_prev_bitmap(MemRegion mr) {
+ _prev_mark_bitmap->clear_range(mr);
}
HeapRegion*
@@ -1870,7 +1845,7 @@ G1ConcurrentMark::claim_region(uint worker_id) {
HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
// Is the gap between reading the finger and doing the CAS too long?
- HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
+ HeapWord* res = Atomic::cmpxchg(end, &_finger, finger);
if (res == finger && curr_region != NULL) {
// we succeeded
HeapWord* bottom = curr_region->bottom();
@@ -1937,7 +1912,7 @@ void G1ConcurrentMark::verify_no_cset_oops() {
_global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
// Verify entries on the task queues
- for (uint i = 0; i < _max_worker_id; ++i) {
+ for (uint i = 0; i < _max_num_tasks; ++i) {
G1CMTaskQueue* queue = _task_queues->queue(i);
queue->iterate(VerifyNoCSetOops("Queue", i));
}
@@ -1954,8 +1929,8 @@ void G1ConcurrentMark::verify_no_cset_oops() {
}
// Verify the task fingers
- assert(parallel_marking_threads() <= _max_worker_id, "sanity");
- for (uint i = 0; i < parallel_marking_threads(); ++i) {
+ assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
+ for (uint i = 0; i < _num_concurrent_workers; ++i) {
G1CMTask* task = _tasks[i];
HeapWord* task_finger = task->finger();
if (task_finger != NULL && task_finger < _heap_end) {
@@ -1970,15 +1945,15 @@ void G1ConcurrentMark::verify_no_cset_oops() {
}
#endif // PRODUCT
void G1ConcurrentMark::create_live_data() {
- _g1h->g1_rem_set()->create_card_live_data(_parallel_workers, _nextMarkBitMap);
+ _g1h->g1_rem_set()->create_card_live_data(_concurrent_workers, _next_mark_bitmap);
}
void G1ConcurrentMark::finalize_live_data() {
- _g1h->g1_rem_set()->finalize_card_live_data(_g1h->workers(), _nextMarkBitMap);
+ _g1h->g1_rem_set()->finalize_card_live_data(_g1h->workers(), _next_mark_bitmap);
}
void G1ConcurrentMark::verify_live_data() {
- _g1h->g1_rem_set()->verify_card_live_data(_g1h->workers(), _nextMarkBitMap);
+ _g1h->g1_rem_set()->verify_card_live_data(_g1h->workers(), _next_mark_bitmap);
}
void G1ConcurrentMark::clear_live_data(WorkGang* workers) {
@@ -1996,14 +1971,14 @@ void G1ConcurrentMark::print_stats() {
return;
}
log_debug(gc, stats)("---------------------------------------------------------------------");
- for (size_t i = 0; i < _active_tasks; ++i) {
+ for (size_t i = 0; i < _num_active_tasks; ++i) {
_tasks[i]->print_stats();
log_debug(gc, stats)("---------------------------------------------------------------------");
}
}
void G1ConcurrentMark::abort() {
- if (!cmThread()->during_cycle() || _has_aborted) {
+ if (!cm_thread()->during_cycle() || _has_aborted) {
// We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
return;
}
@@ -2012,7 +1987,7 @@ void G1ConcurrentMark::abort() {
// concurrent bitmap clearing.
{
GCTraceTime(Debug, gc)("Clear Next Bitmap");
- clear_bitmap(_nextMarkBitMap, _g1h->workers(), false);
+ clear_bitmap(_next_mark_bitmap, _g1h->workers(), false);
}
// Note we cannot clear the previous marking bitmap here
// since VerifyDuringGC verifies the objects marked during
@@ -2028,7 +2003,7 @@ void G1ConcurrentMark::abort() {
})
// Empty mark stack
reset_marking_state();
- for (uint i = 0; i < _max_worker_id; ++i) {
+ for (uint i = 0; i < _max_num_tasks; ++i) {
_tasks[i]->clear_region_fields();
}
_first_overflow_barrier_sync.abort();
@@ -2078,22 +2053,22 @@ void G1ConcurrentMark::print_summary_info() {
log.trace(" Total stop_world time = %8.2f s.",
(_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
log.trace(" Total concurrent time = %8.2f s (%8.2f s marking).",
- cmThread()->vtime_accum(), cmThread()->vtime_mark_accum());
+ cm_thread()->vtime_accum(), cm_thread()->vtime_mark_accum());
}
void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
- _parallel_workers->print_worker_threads_on(st);
+ _concurrent_workers->print_worker_threads_on(st);
}
void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
- _parallel_workers->threads_do(tc);
+ _concurrent_workers->threads_do(tc);
}
void G1ConcurrentMark::print_on_error(outputStream* st) const {
st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
- p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
- _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
- _nextMarkBitMap->print_on_error(st, " Next Bits: ");
+ p2i(_prev_mark_bitmap), p2i(_next_mark_bitmap));
+ _prev_mark_bitmap->print_on_error(st, " Prev Bits: ");
+ _next_mark_bitmap->print_on_error(st, " Next Bits: ");
}
static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
@@ -2171,9 +2146,9 @@ void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
_cm_oop_closure = cm_oop_closure;
}
-void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) {
- guarantee(nextMarkBitMap != NULL, "invariant");
- _nextMarkBitMap = nextMarkBitMap;
+void G1CMTask::reset(G1CMBitMap* next_mark_bitmap) {
+ guarantee(next_mark_bitmap != NULL, "invariant");
+ _next_mark_bitmap = next_mark_bitmap;
clear_region_fields();
_calls = 0;
@@ -2215,7 +2190,9 @@ void G1CMTask::regular_clock_call() {
// If we are not concurrent (i.e. we're doing remark) we don't need
// to check anything else. The other steps are only needed during
// the concurrent marking phase.
- if (!concurrent()) return;
+ if (!_concurrent) {
+ return;
+ }
// (2) If marking has been aborted for Full GC, then we also abort.
if (_cm->has_aborted()) {
@@ -2267,10 +2244,8 @@ void G1CMTask::decrease_limits() {
// entries to/from the global stack). It basically tries to decrease the
// scanning limit so that the clock is called earlier.
- _words_scanned_limit = _real_words_scanned_limit -
- 3 * words_scanned_period / 4;
- _refs_reached_limit = _real_refs_reached_limit -
- 3 * refs_reached_period / 4;
+ _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
+ _refs_reached_limit = _real_refs_reached_limit - 3 * refs_reached_period / 4;
}
void G1CMTask::move_entries_to_global_stack() {
@@ -2409,7 +2384,7 @@ void G1CMTask::drain_satb_buffers() {
_draining_satb_buffers = false;
assert(has_aborted() ||
- concurrent() ||
+ _concurrent ||
satb_mq_set.completed_buffers_num() == 0, "invariant");
// again, this was a potentially expensive operation, decrease the
@@ -2418,7 +2393,7 @@ void G1CMTask::drain_satb_buffers() {
}
void G1CMTask::print_stats() {
- log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
+ log_debug(gc, stats)("Marking Stats, task = %u, calls = %u",
_worker_id, _calls);
log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
_elapsed_time_ms, _termination_time_ms);
@@ -2552,21 +2527,7 @@ void G1CMTask::do_marking_step(double time_target_ms,
bool do_termination,
bool is_serial) {
assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
- assert(concurrent() == _cm->concurrent(), "they should be the same");
-
- G1Policy* g1_policy = _g1h->g1_policy();
- assert(_task_queues != NULL, "invariant");
- assert(_task_queue != NULL, "invariant");
- assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
-
- assert(!_claimed,
- "only one thread should claim this task at any one time");
-
- // OK, this doesn't safeguard again all possible scenarios, as it is
- // possible for two threads to set the _claimed flag at the same
- // time. But it is only for debugging purposes anyway and it will
- // catch most problems.
- _claimed = true;
+ assert(_concurrent == _cm->concurrent(), "they should be the same");
_start_time_ms = os::elapsedVTime() * 1000.0;
@@ -2651,7 +2612,7 @@ void G1CMTask::do_marking_step(double time_target_ms,
giveup_current_region();
regular_clock_call();
} else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
- if (_nextMarkBitMap->is_marked(mr.start())) {
+ if (_next_mark_bitmap->is_marked(mr.start())) {
// The object is marked - apply the closure
bitmap_closure.do_addr(mr.start());
}
@@ -2659,7 +2620,7 @@ void G1CMTask::do_marking_step(double time_target_ms,
// we can (and should) give up the current region.
giveup_current_region();
regular_clock_call();
- } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
+ } else if (_next_mark_bitmap->iterate(&bitmap_closure, mr)) {
giveup_current_region();
regular_clock_call();
} else {
@@ -2787,10 +2748,10 @@ void G1CMTask::do_marking_step(double time_target_ms,
// We're all done.
if (_worker_id == 0) {
- // let's allow task 0 to do this
- if (concurrent()) {
+ // Let's allow task 0 to do this
+ if (_concurrent) {
assert(_cm->concurrent_marking_in_progress(), "invariant");
- // we need to set this to false before the next
+ // We need to set this to false before the next
// safepoint. This way we ensure that the marking phase
// doesn't observe any more heap expansions.
_cm->clear_concurrent_marking_in_progress();
@@ -2862,24 +2823,40 @@ void G1CMTask::do_marking_step(double time_target_ms,
// ready to restart.
}
}
-
- _claimed = false;
}
-G1CMTask::G1CMTask(uint worker_id,
- G1ConcurrentMark* cm,
- G1CMTaskQueue* task_queue,
- G1CMTaskQueueSet* task_queues)
- : _g1h(G1CollectedHeap::heap()),
- _worker_id(worker_id), _cm(cm),
- _objArray_processor(this),
- _claimed(false),
- _nextMarkBitMap(NULL), _hash_seed(17),
- _task_queue(task_queue),
- _task_queues(task_queues),
- _cm_oop_closure(NULL) {
+G1CMTask::G1CMTask(uint worker_id, G1ConcurrentMark* cm, G1CMTaskQueue* task_queue) :
+ _objArray_processor(this),
+ _worker_id(worker_id),
+ _g1h(G1CollectedHeap::heap()),
+ _cm(cm),
+ _next_mark_bitmap(NULL),
+ _task_queue(task_queue),
+ _calls(0),
+ _time_target_ms(0.0),
+ _start_time_ms(0.0),
+ _cm_oop_closure(NULL),
+ _curr_region(NULL),
+ _finger(NULL),
+ _region_limit(NULL),
+ _words_scanned(0),
+ _words_scanned_limit(0),
+ _real_words_scanned_limit(0),
+ _refs_reached(0),
+ _refs_reached_limit(0),
+ _real_refs_reached_limit(0),
+ _hash_seed(17),
+ _has_aborted(false),
+ _has_timed_out(false),
+ _draining_satb_buffers(false),
+ _step_times_ms(),
+ _elapsed_time_ms(0.0),
+ _termination_time_ms(0.0),
+ _termination_start_time_ms(0.0),
+ _concurrent(false),
+ _marking_step_diffs_ms()
+{
guarantee(task_queue != NULL, "invariant");
- guarantee(task_queues != NULL, "invariant");
_marking_step_diffs_ms.add(0.5);
}
@@ -2916,11 +2893,11 @@ G1CMTask::G1CMTask(uint worker_id,
#define G1PPRL_SUM_MB_FORMAT(tag) " " tag ": %1.2f MB"
#define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
-G1PrintRegionLivenessInfoClosure::
-G1PrintRegionLivenessInfoClosure(const char* phase_name)
- : _total_used_bytes(0), _total_capacity_bytes(0),
- _total_prev_live_bytes(0), _total_next_live_bytes(0),
- _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
+G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
+ _total_used_bytes(0), _total_capacity_bytes(0),
+ _total_prev_live_bytes(0), _total_next_live_bytes(0),
+ _total_remset_bytes(0), _total_strong_code_roots_bytes(0)
+{
G1CollectedHeap* g1h = G1CollectedHeap::heap();
MemRegion g1_reserved = g1h->g1_reserved();
double now = os::elapsedTime();
@@ -3010,11 +2987,11 @@ G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
G1PPRL_SUM_MB_FORMAT("code-roots"),
bytes_to_mb(_total_capacity_bytes),
bytes_to_mb(_total_used_bytes),
- perc(_total_used_bytes, _total_capacity_bytes),
+ percent_of(_total_used_bytes, _total_capacity_bytes),
bytes_to_mb(_total_prev_live_bytes),
- perc(_total_prev_live_bytes, _total_capacity_bytes),
+ percent_of(_total_prev_live_bytes, _total_capacity_bytes),
bytes_to_mb(_total_next_live_bytes),
- perc(_total_next_live_bytes, _total_capacity_bytes),
+ percent_of(_total_next_live_bytes, _total_capacity_bytes),
bytes_to_mb(_total_remset_bytes),
bytes_to_mb(_total_strong_code_roots_bytes));
}
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp b/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp
index d2e2f67f23f..57685cc0cb7 100644
--- a/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp
@@ -25,18 +25,18 @@
#ifndef SHARE_VM_GC_G1_G1CONCURRENTMARK_HPP
#define SHARE_VM_GC_G1_G1CONCURRENTMARK_HPP
-#include "classfile/javaClasses.hpp"
#include "gc/g1/g1ConcurrentMarkBitMap.hpp"
#include "gc/g1/g1ConcurrentMarkObjArrayProcessor.hpp"
-#include "gc/g1/g1RegionToSpaceMapper.hpp"
#include "gc/g1/heapRegionSet.hpp"
#include "gc/shared/taskqueue.hpp"
+class ConcurrentGCTimer;
+class ConcurrentMarkThread;
class G1CollectedHeap;
class G1CMTask;
class G1ConcurrentMark;
-class ConcurrentGCTimer;
class G1OldTracer;
+class G1RegionToSpaceMapper;
class G1SurvivorRegions;
#ifdef _MSC_VER
@@ -272,12 +272,10 @@ public:
bool wait_until_scan_finished();
};
-class ConcurrentMarkThread;
-
+// This class manages data structures and methods for doing liveness analysis in
+// G1's concurrent cycle.
class G1ConcurrentMark: public CHeapObj {
friend class ConcurrentMarkThread;
- friend class G1ParNoteEndTask;
- friend class G1VerifyLiveDataClosure;
friend class G1CMRefProcTaskProxy;
friend class G1CMRefProcTaskExecutor;
friend class G1CMKeepAliveAndDrainClosure;
@@ -287,46 +285,37 @@ class G1ConcurrentMark: public CHeapObj {
friend class G1CMRemarkTask;
friend class G1CMTask;
-protected:
- ConcurrentMarkThread* _cmThread; // The thread doing the work
- G1CollectedHeap* _g1h; // The heap
- uint _parallel_marking_threads; // The number of marking
- // threads we're using
- uint _max_parallel_marking_threads; // Max number of marking
- // threads we'll ever use
- double _sleep_factor; // How much we have to sleep, with
- // respect to the work we just did, to
- // meet the marking overhead goal
- double _marking_task_overhead; // Marking target overhead for
- // a single task
+ ConcurrentMarkThread* _cm_thread; // The thread doing the work
+ G1CollectedHeap* _g1h; // The heap
+ bool _completed_initialization; // Set to true when initialization is complete
- FreeRegionList _cleanup_list;
+ FreeRegionList _cleanup_list;
// Concurrent marking support structures
- G1CMBitMap _markBitMap1;
- G1CMBitMap _markBitMap2;
- G1CMBitMap* _prevMarkBitMap; // Completed mark bitmap
- G1CMBitMap* _nextMarkBitMap; // Under-construction mark bitmap
+ G1CMBitMap _mark_bitmap_1;
+ G1CMBitMap _mark_bitmap_2;
+ G1CMBitMap* _prev_mark_bitmap; // Completed mark bitmap
+ G1CMBitMap* _next_mark_bitmap; // Under-construction mark bitmap
// Heap bounds
- HeapWord* _heap_start;
- HeapWord* _heap_end;
+ HeapWord* _heap_start;
+ HeapWord* _heap_end;
// Root region tracking and claiming
- G1CMRootRegions _root_regions;
+ G1CMRootRegions _root_regions;
- // For gray objects
- G1CMMarkStack _global_mark_stack; // Grey objects behind global finger
- HeapWord* volatile _finger; // The global finger, region aligned,
- // always points to the end of the
- // last claimed region
+ // For grey objects
+ G1CMMarkStack _global_mark_stack; // Grey objects behind global finger
+ HeapWord* volatile _finger; // The global finger, region aligned,
+ // always pointing to the end of the
+ // last claimed region
- // Marking tasks
- uint _max_worker_id;// Maximum worker id
- uint _active_tasks; // Task num currently active
- G1CMTask** _tasks; // Task queue array (max_worker_id len)
- G1CMTaskQueueSet* _task_queues; // Task queue set
- ParallelTaskTerminator _terminator; // For termination
+ uint _max_num_tasks; // Maximum number of marking tasks
+ uint _num_active_tasks; // Number of tasks currently active
+ G1CMTask** _tasks; // Task queue array (max_worker_id length)
+
+ G1CMTaskQueueSet* _task_queues; // Task queue set
+ ParallelTaskTerminator _terminator; // For termination
// Two sync barriers that are used to synchronize tasks when an
// overflow occurs. The algorithm is the following. All tasks enter
@@ -337,32 +326,32 @@ protected:
// ensure, that no task starts doing work before all data
// structures (local and global) have been re-initialized. When they
// exit it, they are free to start working again.
- WorkGangBarrierSync _first_overflow_barrier_sync;
- WorkGangBarrierSync _second_overflow_barrier_sync;
+ WorkGangBarrierSync _first_overflow_barrier_sync;
+ WorkGangBarrierSync _second_overflow_barrier_sync;
// This is set by any task, when an overflow on the global data
// structures is detected
- volatile bool _has_overflown;
+ volatile bool _has_overflown;
// True: marking is concurrent, false: we're in remark
- volatile bool _concurrent;
+ volatile bool _concurrent;
// Set at the end of a Full GC so that marking aborts
- volatile bool _has_aborted;
+ volatile bool _has_aborted;
// Used when remark aborts due to an overflow to indicate that
// another concurrent marking phase should start
- volatile bool _restart_for_overflow;
+ volatile bool _restart_for_overflow;
// This is true from the very start of concurrent marking until the
// point when all the tasks complete their work. It is really used
// to determine the points between the end of concurrent marking and
// time of remark.
- volatile bool _concurrent_marking_in_progress;
+ volatile bool _concurrent_marking_in_progress;
- ConcurrentGCTimer* _gc_timer_cm;
+ ConcurrentGCTimer* _gc_timer_cm;
- G1OldTracer* _gc_tracer_cm;
+ G1OldTracer* _gc_tracer_cm;
- // All of these times are in ms
+ // Timing statistics. All of them are in ms
NumberSeq _init_times;
NumberSeq _remark_times;
NumberSeq _remark_mark_times;
@@ -373,14 +362,16 @@ protected:
double* _accum_task_vtime; // Accumulated task vtime
- WorkGang* _parallel_workers;
+ WorkGang* _concurrent_workers;
+ uint _num_concurrent_workers; // The number of marking worker threads we're using
+ uint _max_concurrent_workers; // Maximum number of marking worker threads
- void weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes);
- void weakRefsWork(bool clear_all_soft_refs);
+ void weak_refs_work_parallel_part(BoolObjectClosure* is_alive, bool purged_classes);
+ void weak_refs_work(bool clear_all_soft_refs);
- void swapMarkBitMaps();
+ void swap_mark_bitmaps();
- // It resets the global marking data structures, as well as the
+ // Resets the global marking data structures, as well as the
// task local ones; should be called during initial mark.
void reset();
@@ -395,7 +386,7 @@ protected:
// Called to indicate how many threads are currently active.
void set_concurrency(uint active_tasks);
- // It should be called to indicate which phase we're in (concurrent
+ // Should be called to indicate which phase we're in (concurrent
// mark or remark) and how many threads are currently active.
void set_concurrency_and_phase(uint active_tasks, bool concurrent);
@@ -406,18 +397,12 @@ protected:
return _cleanup_list.is_empty();
}
- // Accessor methods
- uint parallel_marking_threads() const { return _parallel_marking_threads; }
- uint max_parallel_marking_threads() const { return _max_parallel_marking_threads;}
- double sleep_factor() { return _sleep_factor; }
- double marking_task_overhead() { return _marking_task_overhead;}
-
HeapWord* finger() { return _finger; }
bool concurrent() { return _concurrent; }
- uint active_tasks() { return _active_tasks; }
+ uint active_tasks() { return _num_active_tasks; }
ParallelTaskTerminator* terminator() { return &_terminator; }
- // It claims the next available region to be scanned by a marking
+ // Claims the next available region to be scanned by a marking
// task/thread. It might return NULL if the next region is empty or
// we have run out of regions. In the latter case, out_of_regions()
// determines whether we've really run out of regions or the task
@@ -433,30 +418,19 @@ protected:
// frequently.
HeapRegion* claim_region(uint worker_id);
- // It determines whether we've run out of regions to scan. Note that
+ // Determines whether we've run out of regions to scan. Note that
// the finger can point past the heap end in case the heap was expanded
// to satisfy an allocation without doing a GC. This is fine, because all
// objects in those regions will be considered live anyway because of
// SATB guarantees (i.e. their TAMS will be equal to bottom).
- bool out_of_regions() { return _finger >= _heap_end; }
+ bool out_of_regions() { return _finger >= _heap_end; }
// Returns the task with the given id
- G1CMTask* task(int id) {
- assert(0 <= id && id < (int) _active_tasks,
- "task id not within active bounds");
+ G1CMTask* task(uint id) {
+ assert(id < _num_active_tasks, "Task id %u not within active bounds up to %u", id, _num_active_tasks);
return _tasks[id];
}
- // Returns the task queue with the given id
- G1CMTaskQueue* task_queue(int id) {
- assert(0 <= id && id < (int) _active_tasks,
- "task queue id not within active bounds");
- return (G1CMTaskQueue*) _task_queues->queue(id);
- }
-
- // Returns the task queue set
- G1CMTaskQueueSet* task_queues() { return _task_queues; }
-
// Access / manipulation of the overflow flag which is set to
// indicate that the global stack has overflown
bool has_overflown() { return _has_overflown; }
@@ -468,16 +442,6 @@ protected:
void enter_first_sync_barrier(uint worker_id);
void enter_second_sync_barrier(uint worker_id);
- // Card index of the bottom of the G1 heap. Used for biasing indices into
- // the card bitmaps.
- intptr_t _heap_bottom_card_num;
-
- // Set to true when initialization is complete
- bool _completed_initialization;
-
- // end_timer, true to end gc timer after ending concurrent phase.
- void register_concurrent_phase_end_common(bool end_timer);
-
// Clear the given bitmap in parallel using the given WorkGang. If may_yield is
// true, periodically insert checks to see if this method should exit prematurely.
void clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield);
@@ -495,13 +459,13 @@ public:
bool mark_stack_pop(G1TaskQueueEntry* arr) {
return _global_mark_stack.par_pop_chunk(arr);
}
- size_t mark_stack_size() { return _global_mark_stack.size(); }
- size_t partial_mark_stack_size_target() { return _global_mark_stack.capacity()/3; }
- bool mark_stack_empty() { return _global_mark_stack.is_empty(); }
+ size_t mark_stack_size() const { return _global_mark_stack.size(); }
+ size_t partial_mark_stack_size_target() const { return _global_mark_stack.capacity() / 3; }
+ bool mark_stack_empty() const { return _global_mark_stack.is_empty(); }
G1CMRootRegions* root_regions() { return &_root_regions; }
- bool concurrent_marking_in_progress() {
+ bool concurrent_marking_in_progress() const {
return _concurrent_marking_in_progress;
}
void set_concurrent_marking_in_progress() {
@@ -520,7 +484,7 @@ public:
double all_task_accum_vtime() {
double ret = 0.0;
- for (uint i = 0; i < _max_worker_id; ++i)
+ for (uint i = 0; i < _max_num_tasks; ++i)
ret += _accum_task_vtime[i];
return ret;
}
@@ -533,18 +497,13 @@ public:
G1RegionToSpaceMapper* next_bitmap_storage);
~G1ConcurrentMark();
- ConcurrentMarkThread* cmThread() { return _cmThread; }
+ ConcurrentMarkThread* cm_thread() { return _cm_thread; }
- const G1CMBitMap* const prevMarkBitMap() const { return _prevMarkBitMap; }
- G1CMBitMap* nextMarkBitMap() const { return _nextMarkBitMap; }
+ const G1CMBitMap* const prev_mark_bitmap() const { return _prev_mark_bitmap; }
+ G1CMBitMap* next_mark_bitmap() const { return _next_mark_bitmap; }
- // Returns the number of GC threads to be used in a concurrent
- // phase based on the number of GC threads being used in a STW
- // phase.
- uint scale_parallel_threads(uint n_par_threads);
-
- // Calculates the number of GC threads to be used in a concurrent phase.
- uint calc_parallel_marking_threads();
+ // Calculates the number of concurrent GC threads to be used in the marking phase.
+ uint calc_active_marking_workers();
// Prepare internal data structures for the next mark cycle. This includes clearing
// the next mark bitmap and some internal data structures. This method is intended
@@ -556,48 +515,49 @@ public:
// Return whether the next mark bitmap has no marks set. To be used for assertions
// only. Will not yield to pause requests.
- bool nextMarkBitmapIsClear();
+ bool next_mark_bitmap_is_clear();
// These two do the work that needs to be done before and after the
// initial root checkpoint. Since this checkpoint can be done at two
// different points (i.e. an explicit pause or piggy-backed on a
// young collection), then it's nice to be able to easily share the
// pre/post code. It might be the case that we can put everything in
- // the post method. TP
- void checkpointRootsInitialPre();
- void checkpointRootsInitialPost();
+ // the post method.
+ void checkpoint_roots_initial_pre();
+ void checkpoint_roots_initial_post();
// Scan all the root regions and mark everything reachable from
// them.
void scan_root_regions();
// Scan a single root region and mark everything reachable from it.
- void scanRootRegion(HeapRegion* hr);
+ void scan_root_region(HeapRegion* hr);
// Do concurrent phase of marking, to a tentative transitive closure.
void mark_from_roots();
- void checkpointRootsFinal(bool clear_all_soft_refs);
- void checkpointRootsFinalWork();
+ void checkpoint_roots_final(bool clear_all_soft_refs);
+ void checkpoint_roots_final_work();
+
void cleanup();
void complete_cleanup();
- // Mark in the previous bitmap. NB: this is usually read-only, so use
- // this carefully!
- inline void markPrev(oop p);
+ // Mark in the previous bitmap. Caution: the prev bitmap is usually read-only, so use
+ // this carefully.
+ inline void mark_in_prev_bitmap(oop p);
// Clears marks for all objects in the given range, for the prev or
- // next bitmaps. NB: the previous bitmap is usually
+ // next bitmaps. Caution: the previous bitmap is usually
// read-only, so use this carefully!
- void clearRangePrevBitmap(MemRegion mr);
+ void clear_range_in_prev_bitmap(MemRegion mr);
+
+ inline bool is_marked_in_prev_bitmap(oop p) const;
// Verify that there are no CSet oops on the stacks (taskqueues /
// global mark stack) and fingers (global / per-task).
// If marking is not in progress, it's a no-op.
void verify_no_cset_oops() PRODUCT_RETURN;
- inline bool isPrevMarked(oop p) const;
-
inline bool do_yield_check();
// Abandon current marking iteration due to a Full GC.
@@ -661,78 +621,71 @@ private:
uint _worker_id;
G1CollectedHeap* _g1h;
G1ConcurrentMark* _cm;
- G1CMBitMap* _nextMarkBitMap;
+ G1CMBitMap* _next_mark_bitmap;
// the task queue of this task
G1CMTaskQueue* _task_queue;
-private:
- // the task queue set---needed for stealing
- G1CMTaskQueueSet* _task_queues;
- // indicates whether the task has been claimed---this is only for
- // debugging purposes
- bool _claimed;
- // number of calls to this task
- int _calls;
+ // Number of calls to this task
+ uint _calls;
- // when the virtual timer reaches this time, the marking step should
- // exit
+ // When the virtual timer reaches this time, the marking step should exit
double _time_target_ms;
- // the start time of the current marking step
+ // Start time of the current marking step
double _start_time_ms;
- // the oop closure used for iterations over oops
+ // Oop closure used for iterations over oops
G1CMOopClosure* _cm_oop_closure;
- // the region this task is scanning, NULL if we're not scanning any
+ // Region this task is scanning, NULL if we're not scanning any
HeapRegion* _curr_region;
- // the local finger of this task, NULL if we're not scanning a region
+ // Local finger of this task, NULL if we're not scanning a region
HeapWord* _finger;
- // limit of the region this task is scanning, NULL if we're not scanning one
+ // Limit of the region this task is scanning, NULL if we're not scanning one
HeapWord* _region_limit;
- // the number of words this task has scanned
+ // Number of words this task has scanned
size_t _words_scanned;
// When _words_scanned reaches this limit, the regular clock is
// called. Notice that this might be decreased under certain
// circumstances (i.e. when we believe that we did an expensive
// operation).
size_t _words_scanned_limit;
- // the initial value of _words_scanned_limit (i.e. what it was
+ // Initial value of _words_scanned_limit (i.e. what it was
// before it was decreased).
size_t _real_words_scanned_limit;
- // the number of references this task has visited
+ // Number of references this task has visited
size_t _refs_reached;
// When _refs_reached reaches this limit, the regular clock is
// called. Notice this this might be decreased under certain
// circumstances (i.e. when we believe that we did an expensive
// operation).
size_t _refs_reached_limit;
- // the initial value of _refs_reached_limit (i.e. what it was before
+ // Initial value of _refs_reached_limit (i.e. what it was before
// it was decreased).
size_t _real_refs_reached_limit;
- // used by the work stealing stuff
+ // Used by the work stealing
int _hash_seed;
- // if this is true, then the task has aborted for some reason
+ // If true, then the task has aborted for some reason
bool _has_aborted;
- // set when the task aborts because it has met its time quota
+ // Set when the task aborts because it has met its time quota
bool _has_timed_out;
- // true when we're draining SATB buffers; this avoids the task
+ // True when we're draining SATB buffers; this avoids the task
// aborting due to SATB buffers being available (as we're already
// dealing with them)
bool _draining_satb_buffers;
- // number sequence of past step times
+ // Number sequence of past step times
NumberSeq _step_times_ms;
- // elapsed time of this task
+ // Elapsed time of this task
double _elapsed_time_ms;
- // termination time of this task
+ // Termination time of this task
double _termination_time_ms;
- // when this task got into the termination protocol
+ // When this task got into the termination protocol
double _termination_start_time_ms;
- // true when the task is during a concurrent phase, false when it is
+ // True when the task is during a concurrent phase, false when it is
// in the remark phase (so, in the latter case, we do not have to
// check all the things that we have to check during the concurrent
// phase, i.e. SATB buffer availability...)
@@ -740,21 +693,21 @@ private:
TruncatedSeq _marking_step_diffs_ms;
- // it updates the local fields after this task has claimed
+ // Updates the local fields after this task has claimed
// a new region to scan
void setup_for_region(HeapRegion* hr);
- // it brings up-to-date the limit of the region
+ // Makes the limit of the region up-to-date
void update_region_limit();
- // called when either the words scanned or the refs visited limit
+ // Called when either the words scanned or the refs visited limit
// has been reached
void reached_limit();
- // recalculates the words scanned and refs visited limits
+ // Recalculates the words scanned and refs visited limits
void recalculate_limits();
- // decreases the words scanned and refs visited limits when we reach
+ // Decreases the words scanned and refs visited limits when we reach
// an expensive operation
void decrease_limits();
- // it checks whether the words scanned or refs visited reached their
+ // Checks whether the words scanned or refs visited reached their
// respective limit and calls reached_limit() if they have
void check_limits() {
if (_words_scanned >= _words_scanned_limit ||
@@ -762,11 +715,10 @@ private:
reached_limit();
}
}
- // this is supposed to be called regularly during a marking step as
+ // Supposed to be called regularly during a marking step as
// it checks a bunch of conditions that might cause the marking step
// to abort
void regular_clock_call();
- bool concurrent() { return _concurrent; }
// Test whether obj might have already been passed over by the
// mark bitmap scan, and so needs to be pushed onto the mark stack.
@@ -777,10 +729,9 @@ public:
// Apply the closure on the given area of the objArray. Return the number of words
// scanned.
inline size_t scan_objArray(objArrayOop obj, MemRegion mr);
- // It resets the task; it should be called right at the beginning of
- // a marking phase.
- void reset(G1CMBitMap* _nextMarkBitMap);
- // it clears all the fields that correspond to a claimed region.
+ // Resets the task; should be called right at the beginning of a marking phase.
+ void reset(G1CMBitMap* next_mark_bitmap);
+ // Clears all the fields that correspond to a claimed region.
void clear_region_fields();
void set_concurrent(bool concurrent) { _concurrent = concurrent; }
@@ -801,7 +752,7 @@ public:
_elapsed_time_ms = os::elapsedTime() * 1000.0 - _elapsed_time_ms;
}
- // returns the worker ID associated with this task.
+ // Returns the worker ID associated with this task.
uint worker_id() { return _worker_id; }
// From TerminatorTerminator. It determines whether this task should
@@ -818,8 +769,6 @@ public:
bool has_aborted() { return _has_aborted; }
void set_has_aborted() { _has_aborted = true; }
void clear_has_aborted() { _has_aborted = false; }
- bool has_timed_out() { return _has_timed_out; }
- bool claimed() { return _claimed; }
void set_cm_oop_closure(G1CMOopClosure* cm_oop_closure);
@@ -836,10 +785,10 @@ public:
// Precondition: obj is a valid heap object.
inline void deal_with_reference(oop obj);
- // It scans an object and visits its children.
+ // Scans an object and visits its children.
inline void scan_task_entry(G1TaskQueueEntry task_entry);
- // It pushes an object on the local queue.
+ // Pushes an object on the local queue.
inline void push(G1TaskQueueEntry task_entry);
// Move entries to the global stack.
@@ -847,20 +796,20 @@ public:
// Move entries from the global stack, return true if we were successful to do so.
bool get_entries_from_global_stack();
- // It pops and scans objects from the local queue. If partially is
+ // Pops and scans objects from the local queue. If partially is
// true, then it stops when the queue size is of a given limit. If
// partially is false, then it stops when the queue is empty.
void drain_local_queue(bool partially);
- // It moves entries from the global stack to the local queue and
+ // Moves entries from the global stack to the local queue and
// drains the local queue. If partially is true, then it stops when
// both the global stack and the local queue reach a given size. If
// partially if false, it tries to empty them totally.
void drain_global_stack(bool partially);
- // It keeps picking SATB buffers and processing them until no SATB
+ // Keeps picking SATB buffers and processing them until no SATB
// buffers are available.
void drain_satb_buffers();
- // moves the local finger to a new location
+ // Moves the local finger to a new location
inline void move_finger_to(HeapWord* new_finger) {
assert(new_finger >= _finger && new_finger < _region_limit, "invariant");
_finger = new_finger;
@@ -868,10 +817,9 @@ public:
G1CMTask(uint worker_id,
G1ConcurrentMark *cm,
- G1CMTaskQueue* task_queue,
- G1CMTaskQueueSet* task_queues);
+ G1CMTaskQueue* task_queue);
- // it prints statistics associated with this task
+ // Prints statistics associated with this task
void print_stats();
};
@@ -892,14 +840,6 @@ private:
// Accumulator for strong code roots memory size
size_t _total_strong_code_roots_bytes;
- static double perc(size_t val, size_t total) {
- if (total == 0) {
- return 0.0;
- } else {
- return 100.0 * ((double) val / (double) total);
- }
- }
-
static double bytes_to_mb(size_t val) {
return (double) val / (double) M;
}
diff --git a/src/hotspot/share/gc/g1/g1ConcurrentMark.inline.hpp b/src/hotspot/share/gc/g1/g1ConcurrentMark.inline.hpp
index 86cbb25207f..8f11873950c 100644
--- a/src/hotspot/share/gc/g1/g1ConcurrentMark.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.inline.hpp
@@ -29,7 +29,7 @@
#include "gc/g1/g1ConcurrentMark.hpp"
#include "gc/g1/g1ConcurrentMarkBitMap.inline.hpp"
#include "gc/g1/g1ConcurrentMarkObjArrayProcessor.inline.hpp"
-#include "gc/g1/suspendibleThreadSet.hpp"
+#include "gc/shared/suspendibleThreadSet.hpp"
#include "gc/shared/taskqueue.inline.hpp"
#include "utilities/bitMap.inline.hpp"
@@ -51,12 +51,8 @@ inline bool G1ConcurrentMark::mark_in_next_bitmap(HeapRegion* const hr, oop cons
assert(!hr->is_continues_humongous(), "Should not try to mark object " PTR_FORMAT " in Humongous continues region %u above nTAMS " PTR_FORMAT, p2i(obj), hr->hrm_index(), p2i(hr->next_top_at_mark_start()));
HeapWord* const obj_addr = (HeapWord*)obj;
- // Dirty read to avoid CAS.
- if (_nextMarkBitMap->is_marked(obj_addr)) {
- return false;
- }
- return _nextMarkBitMap->par_mark(obj_addr);
+ return _next_mark_bitmap->par_mark(obj_addr);
}
#ifndef PRODUCT
@@ -90,7 +86,7 @@ inline void G1CMTask::push(G1TaskQueueEntry task_entry) {
assert(task_entry.is_array_slice() || !_g1h->is_on_master_free_list(
_g1h->heap_region_containing(task_entry.obj())), "invariant");
assert(task_entry.is_array_slice() || !_g1h->is_obj_ill(task_entry.obj()), "invariant"); // FIXME!!!
- assert(task_entry.is_array_slice() || _nextMarkBitMap->is_marked((HeapWord*)task_entry.obj()), "invariant");
+ assert(task_entry.is_array_slice() || _next_mark_bitmap->is_marked((HeapWord*)task_entry.obj()), "invariant");
if (!_task_queue->push(task_entry)) {
// The local task queue looks full. We need to push some entries
@@ -138,7 +134,7 @@ inline bool G1CMTask::is_below_finger(oop obj, HeapWord* global_finger) const {
template
inline void G1CMTask::process_grey_task_entry(G1TaskQueueEntry task_entry) {
assert(scan || (task_entry.is_oop() && task_entry.obj()->is_typeArray()), "Skipping scan of grey non-typeArray");
- assert(task_entry.is_array_slice() || _nextMarkBitMap->is_marked((HeapWord*)task_entry.obj()),
+ assert(task_entry.is_array_slice() || _next_mark_bitmap->is_marked((HeapWord*)task_entry.obj()),
"Any stolen object should be a slice or marked");
if (scan) {
@@ -211,14 +207,14 @@ inline void G1CMTask::deal_with_reference(oop obj) {
make_reference_grey(obj);
}
-inline void G1ConcurrentMark::markPrev(oop p) {
- assert(!_prevMarkBitMap->is_marked((HeapWord*) p), "sanity");
- _prevMarkBitMap->mark((HeapWord*) p);
+inline void G1ConcurrentMark::mark_in_prev_bitmap(oop p) {
+ assert(!_prev_mark_bitmap->is_marked((HeapWord*) p), "sanity");
+ _prev_mark_bitmap->mark((HeapWord*) p);
}
-bool G1ConcurrentMark::isPrevMarked(oop p) const {
+bool G1ConcurrentMark::is_marked_in_prev_bitmap(oop p) const {
assert(p != NULL && oopDesc::is_oop(p), "expected an oop");
- return _prevMarkBitMap->is_marked((HeapWord*)p);
+ return _prev_mark_bitmap->is_marked((HeapWord*)p);
}
inline bool G1ConcurrentMark::do_yield_check() {
diff --git a/src/hotspot/share/gc/g1/g1DefaultPolicy.cpp b/src/hotspot/share/gc/g1/g1DefaultPolicy.cpp
index 05253457cda..945db6e0491 100644
--- a/src/hotspot/share/gc/g1/g1DefaultPolicy.cpp
+++ b/src/hotspot/share/gc/g1/g1DefaultPolicy.cpp
@@ -538,7 +538,7 @@ CollectionSetChooser* G1DefaultPolicy::cset_chooser() const {
}
bool G1DefaultPolicy::about_to_start_mixed_phase() const {
- return _g1->concurrent_mark()->cmThread()->during_cycle() || collector_state()->last_young_gc();
+ return _g1->concurrent_mark()->cm_thread()->during_cycle() || collector_state()->last_young_gc();
}
bool G1DefaultPolicy::need_to_start_conc_mark(const char* source, size_t alloc_word_size) {
@@ -931,7 +931,7 @@ bool G1DefaultPolicy::force_initial_mark_if_outside_cycle(GCCause::Cause gc_caus
// We actually check whether we are marking here and not if we are in a
// reclamation phase. This means that we will schedule a concurrent mark
// even while we are still in the process of reclaiming memory.
- bool during_cycle = _g1->concurrent_mark()->cmThread()->during_cycle();
+ bool during_cycle = _g1->concurrent_mark()->cm_thread()->during_cycle();
if (!during_cycle) {
log_debug(gc, ergo)("Request concurrent cycle initiation (requested by GC cause). GC cause: %s", GCCause::to_string(gc_cause));
collector_state()->set_initiate_conc_mark_if_possible(true);
@@ -1004,12 +1004,8 @@ void G1DefaultPolicy::record_concurrent_mark_cleanup_end() {
record_pause(Cleanup, _mark_cleanup_start_sec, end_sec);
}
-double G1DefaultPolicy::reclaimable_bytes_perc(size_t reclaimable_bytes) const {
- // Returns the given amount of reclaimable bytes (that represents
- // the amount of reclaimable space still to be collected) as a
- // percentage of the current heap capacity.
- size_t capacity_bytes = _g1->capacity();
- return (double) reclaimable_bytes * 100.0 / (double) capacity_bytes;
+double G1DefaultPolicy::reclaimable_bytes_percent(size_t reclaimable_bytes) const {
+ return percent_of(reclaimable_bytes, _g1->capacity());
}
void G1DefaultPolicy::maybe_start_marking() {
@@ -1083,15 +1079,15 @@ bool G1DefaultPolicy::next_gc_should_be_mixed(const char* true_action_str,
// Is the amount of uncollected reclaimable space above G1HeapWastePercent?
size_t reclaimable_bytes = cset_chooser()->remaining_reclaimable_bytes();
- double reclaimable_perc = reclaimable_bytes_perc(reclaimable_bytes);
+ double reclaimable_percent = reclaimable_bytes_percent(reclaimable_bytes);
double threshold = (double) G1HeapWastePercent;
- if (reclaimable_perc <= threshold) {
+ if (reclaimable_percent <= threshold) {
log_debug(gc, ergo)("%s (reclaimable percentage not over threshold). candidate old regions: %u reclaimable: " SIZE_FORMAT " (%1.2f) threshold: " UINTX_FORMAT,
- false_action_str, cset_chooser()->remaining_regions(), reclaimable_bytes, reclaimable_perc, G1HeapWastePercent);
+ false_action_str, cset_chooser()->remaining_regions(), reclaimable_bytes, reclaimable_percent, G1HeapWastePercent);
return false;
}
log_debug(gc, ergo)("%s (candidate old regions available). candidate old regions: %u reclaimable: " SIZE_FORMAT " (%1.2f) threshold: " UINTX_FORMAT,
- true_action_str, cset_chooser()->remaining_regions(), reclaimable_bytes, reclaimable_perc, G1HeapWastePercent);
+ true_action_str, cset_chooser()->remaining_regions(), reclaimable_bytes, reclaimable_percent, G1HeapWastePercent);
return true;
}
diff --git a/src/hotspot/share/gc/g1/g1DefaultPolicy.hpp b/src/hotspot/share/gc/g1/g1DefaultPolicy.hpp
index 81f35971d02..72db8153411 100644
--- a/src/hotspot/share/gc/g1/g1DefaultPolicy.hpp
+++ b/src/hotspot/share/gc/g1/g1DefaultPolicy.hpp
@@ -238,7 +238,10 @@ public:
uint calc_min_old_cset_length() const;
uint calc_max_old_cset_length() const;
- double reclaimable_bytes_perc(size_t reclaimable_bytes) const;
+ // Returns the given amount of reclaimable bytes (that represents
+ // the amount of reclaimable space still to be collected) as a
+ // percentage of the current heap capacity.
+ double reclaimable_bytes_percent(size_t reclaimable_bytes) const;
jlong collection_pause_end_millis() { return _collection_pause_end_millis; }
diff --git a/src/hotspot/share/gc/g1/g1EvacFailure.cpp b/src/hotspot/share/gc/g1/g1EvacFailure.cpp
index 4d698ec6218..d663f6be1c1 100644
--- a/src/hotspot/share/gc/g1/g1EvacFailure.cpp
+++ b/src/hotspot/share/gc/g1/g1EvacFailure.cpp
@@ -110,8 +110,8 @@ public:
// We consider all objects that we find self-forwarded to be
// live. What we'll do is that we'll update the prev marking
// info so that they are all under PTAMS and explicitly marked.
- if (!_cm->isPrevMarked(obj)) {
- _cm->markPrev(obj);
+ if (!_cm->is_marked_in_prev_bitmap(obj)) {
+ _cm->mark_in_prev_bitmap(obj);
}
if (_during_initial_mark) {
// For the next marking info we'll only mark the
@@ -181,7 +181,7 @@ public:
#endif
}
}
- _cm->clearRangePrevBitmap(mr);
+ _cm->clear_range_in_prev_bitmap(mr);
}
void zap_remainder() {
diff --git a/src/hotspot/share/gc/g1/g1EvacStats.inline.hpp b/src/hotspot/share/gc/g1/g1EvacStats.inline.hpp
index 007f62d3af4..5acd4c146f6 100644
--- a/src/hotspot/share/gc/g1/g1EvacStats.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1EvacStats.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -29,17 +29,17 @@
#include "runtime/atomic.hpp"
inline void G1EvacStats::add_direct_allocated(size_t value) {
- Atomic::add_ptr(value, &_direct_allocated);
+ Atomic::add(value, &_direct_allocated);
}
inline void G1EvacStats::add_region_end_waste(size_t value) {
- Atomic::add_ptr(value, &_region_end_waste);
- Atomic::add_ptr(1, &_regions_filled);
+ Atomic::add(value, &_region_end_waste);
+ Atomic::inc(&_regions_filled);
}
inline void G1EvacStats::add_failure_used_and_waste(size_t used, size_t waste) {
- Atomic::add_ptr(used, &_failure_used);
- Atomic::add_ptr(waste, &_failure_waste);
+ Atomic::add(used, &_failure_used);
+ Atomic::add(waste, &_failure_waste);
}
#endif // SHARE_VM_GC_G1_G1EVACSTATS_INLINE_HPP
diff --git a/src/hotspot/share/gc/g1/g1HeapSizingPolicy.cpp b/src/hotspot/share/gc/g1/g1HeapSizingPolicy.cpp
index 5fa79c87dfc..68e2a8eac03 100644
--- a/src/hotspot/share/gc/g1/g1HeapSizingPolicy.cpp
+++ b/src/hotspot/share/gc/g1/g1HeapSizingPolicy.cpp
@@ -51,9 +51,9 @@ size_t G1HeapSizingPolicy::expansion_amount() {
assert(GCTimeRatio > 0,
"we should have set it to a default value set_g1_gc_flags() "
"if a user set it to 0");
- const double gc_overhead_perc = 100.0 * (1.0 / (1.0 + GCTimeRatio));
+ const double gc_overhead_percent = 100.0 * (1.0 / (1.0 + GCTimeRatio));
- double threshold = gc_overhead_perc;
+ double threshold = gc_overhead_percent;
size_t expand_bytes = 0;
// If the heap is at less than half its maximum size, scale the threshold down,
@@ -107,9 +107,9 @@ size_t G1HeapSizingPolicy::expansion_amount() {
} else {
double const MinScaleDownFactor = 0.2;
double const MaxScaleUpFactor = 2;
- double const StartScaleDownAt = gc_overhead_perc;
- double const StartScaleUpAt = gc_overhead_perc * 1.5;
- double const ScaleUpRange = gc_overhead_perc * 2.0;
+ double const StartScaleDownAt = gc_overhead_percent;
+ double const StartScaleUpAt = gc_overhead_percent * 1.5;
+ double const ScaleUpRange = gc_overhead_percent * 2.0;
double ratio_delta;
if (filled_history_buffer) {
diff --git a/src/hotspot/share/gc/g1/g1HeapVerifier.cpp b/src/hotspot/share/gc/g1/g1HeapVerifier.cpp
index 8b2e0334333..b820d101469 100644
--- a/src/hotspot/share/gc/g1/g1HeapVerifier.cpp
+++ b/src/hotspot/share/gc/g1/g1HeapVerifier.cpp
@@ -161,18 +161,18 @@ class YoungRefCounterClosure : public OopClosure {
void reset_count() { _count = 0; };
};
-class VerifyKlassClosure: public KlassClosure {
+class VerifyCLDClosure: public CLDClosure {
YoungRefCounterClosure _young_ref_counter_closure;
OopClosure *_oop_closure;
public:
- VerifyKlassClosure(G1CollectedHeap* g1h, OopClosure* cl) : _young_ref_counter_closure(g1h), _oop_closure(cl) {}
- void do_klass(Klass* k) {
- k->oops_do(_oop_closure);
+ VerifyCLDClosure(G1CollectedHeap* g1h, OopClosure* cl) : _young_ref_counter_closure(g1h), _oop_closure(cl) {}
+ void do_cld(ClassLoaderData* cld) {
+ cld->oops_do(_oop_closure, false);
_young_ref_counter_closure.reset_count();
- k->oops_do(&_young_ref_counter_closure);
+ cld->oops_do(&_young_ref_counter_closure, false);
if (_young_ref_counter_closure.count() > 0) {
- guarantee(k->has_modified_oops(), "Klass " PTR_FORMAT ", has young refs but is not dirty.", p2i(k));
+ guarantee(cld->has_modified_oops(), "CLD " PTR_FORMAT ", has young %d refs but is not dirty.", p2i(cld), _young_ref_counter_closure.count());
}
}
};
@@ -390,8 +390,7 @@ void G1HeapVerifier::verify(VerifyOption vo) {
log_debug(gc, verify)("Roots");
VerifyRootsClosure rootsCl(vo);
- VerifyKlassClosure klassCl(_g1h, &rootsCl);
- CLDToKlassAndOopClosure cldCl(&klassCl, &rootsCl, false);
+ VerifyCLDClosure cldCl(_g1h, &rootsCl);
// We apply the relevant closures to all the oops in the
// system dictionary, class loader data graph, the string table
@@ -648,8 +647,8 @@ bool G1HeapVerifier::verify_no_bits_over_tams(const char* bitmap_name, const G1C
}
bool G1HeapVerifier::verify_bitmaps(const char* caller, HeapRegion* hr) {
- const G1CMBitMap* const prev_bitmap = _g1h->concurrent_mark()->prevMarkBitMap();
- const G1CMBitMap* const next_bitmap = _g1h->concurrent_mark()->nextMarkBitMap();
+ const G1CMBitMap* const prev_bitmap = _g1h->concurrent_mark()->prev_mark_bitmap();
+ const G1CMBitMap* const next_bitmap = _g1h->concurrent_mark()->next_mark_bitmap();
HeapWord* ptams = hr->prev_top_at_mark_start();
HeapWord* ntams = hr->next_top_at_mark_start();
diff --git a/src/hotspot/share/gc/g1/g1HotCardCache.cpp b/src/hotspot/share/gc/g1/g1HotCardCache.cpp
index 346b4d5128a..1895364dd5e 100644
--- a/src/hotspot/share/gc/g1/g1HotCardCache.cpp
+++ b/src/hotspot/share/gc/g1/g1HotCardCache.cpp
@@ -74,9 +74,9 @@ jbyte* G1HotCardCache::insert(jbyte* card_ptr) {
// card_ptr in favor of the other option, which would be starting over. This
// should be OK since card_ptr will likely be the older card already when/if
// this ever happens.
- jbyte* previous_ptr = (jbyte*)Atomic::cmpxchg_ptr(card_ptr,
- &_hot_cache[masked_index],
- current_ptr);
+ jbyte* previous_ptr = Atomic::cmpxchg(card_ptr,
+ &_hot_cache[masked_index],
+ current_ptr);
return (previous_ptr == current_ptr) ? previous_ptr : card_ptr;
}
diff --git a/src/hotspot/share/gc/g1/g1IHOPControl.cpp b/src/hotspot/share/gc/g1/g1IHOPControl.cpp
index 9b74d50a085..25396801ab4 100644
--- a/src/hotspot/share/gc/g1/g1IHOPControl.cpp
+++ b/src/hotspot/share/gc/g1/g1IHOPControl.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -57,7 +57,7 @@ void G1IHOPControl::print() {
log_debug(gc, ihop)("Basic information (value update), threshold: " SIZE_FORMAT "B (%1.2f), target occupancy: " SIZE_FORMAT "B, current occupancy: " SIZE_FORMAT "B, "
"recent allocation size: " SIZE_FORMAT "B, recent allocation duration: %1.2fms, recent old gen allocation rate: %1.2fB/s, recent marking phase length: %1.2fms",
cur_conc_mark_start_threshold,
- cur_conc_mark_start_threshold * 100.0 / _target_occupancy,
+ percent_of(cur_conc_mark_start_threshold, _target_occupancy),
_target_occupancy,
G1CollectedHeap::heap()->used(),
_last_allocated_bytes,
diff --git a/src/hotspot/share/gc/g1/g1MMUTracker.cpp b/src/hotspot/share/gc/g1/g1MMUTracker.cpp
index fdb99afe7b7..e3e522179e9 100644
--- a/src/hotspot/share/gc/g1/g1MMUTracker.cpp
+++ b/src/hotspot/share/gc/g1/g1MMUTracker.cpp
@@ -29,8 +29,6 @@
#include "runtime/mutexLocker.hpp"
#include "utilities/ostream.hpp"
-#define _DISABLE_MMU 0
-
// can't rely on comparing doubles with tolerating a small margin for error
#define SMALL_MARGIN 0.0000001
#define is_double_leq_0(_value) ( (_value) < SMALL_MARGIN )
@@ -119,9 +117,6 @@ void G1MMUTrackerQueue::add_pause(double start, double end) {
// of other places (debugging)
double G1MMUTrackerQueue::when_sec(double current_time, double pause_time) {
- if (_DISABLE_MMU)
- return 0.0;
-
MutexLockerEx x(MMUTracker_lock, Mutex::_no_safepoint_check_flag);
remove_expired_entries(current_time);
diff --git a/src/hotspot/share/gc/g1/g1MarkSweep.cpp b/src/hotspot/share/gc/g1/g1MarkSweep.cpp
index aad593b8c20..9fd8fa62790 100644
--- a/src/hotspot/share/gc/g1/g1MarkSweep.cpp
+++ b/src/hotspot/share/gc/g1/g1MarkSweep.cpp
@@ -43,6 +43,7 @@
#include "gc/shared/modRefBarrierSet.hpp"
#include "gc/shared/referencePolicy.hpp"
#include "gc/shared/space.hpp"
+#include "gc/shared/weakProcessor.hpp"
#include "oops/instanceRefKlass.hpp"
#include "oops/oop.inline.hpp"
#include "prims/jvmtiExport.hpp"
@@ -184,6 +185,11 @@ void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading,
// This is the point where the entire marking should have completed.
assert(GenMarkSweep::_marking_stack.is_empty(), "Marking should have completed");
+ {
+ GCTraceTime(Debug, gc, phases) trace("Weak Processing", gc_timer());
+ WeakProcessor::weak_oops_do(&GenMarkSweep::is_alive, &do_nothing_cl);
+ }
+
if (ClassUnloading) {
GCTraceTime(Debug, gc, phases) trace("Class Unloading", gc_timer());
@@ -272,7 +278,7 @@ void G1MarkSweep::mark_sweep_phase3() {
// Now adjust pointers in remaining weak roots. (All of which should
// have been cleared if they pointed to non-surviving objects.)
- JNIHandles::weak_oops_do(&GenMarkSweep::adjust_pointer_closure);
+ WeakProcessor::oops_do(&GenMarkSweep::adjust_pointer_closure);
if (G1StringDedup::is_enabled()) {
G1StringDedup::oops_do(&GenMarkSweep::adjust_pointer_closure);
diff --git a/src/hotspot/share/gc/g1/g1OopClosures.cpp b/src/hotspot/share/gc/g1/g1OopClosures.cpp
index 9b04478f2d7..94a7da8b722 100644
--- a/src/hotspot/share/gc/g1/g1OopClosures.cpp
+++ b/src/hotspot/share/gc/g1/g1OopClosures.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,7 @@ G1ParCopyHelper::G1ParCopyHelper(G1CollectedHeap* g1, G1ParScanThreadState* par
_g1(g1),
_par_scan_state(par_scan_state),
_worker_id(par_scan_state->worker_id()),
- _scanned_klass(NULL),
+ _scanned_cld(NULL),
_cm(_g1->concurrent_mark())
{ }
@@ -42,20 +42,20 @@ G1ScanClosureBase::G1ScanClosureBase(G1CollectedHeap* g1, G1ParScanThreadState*
_g1(g1), _par_scan_state(par_scan_state), _from(NULL)
{ }
-void G1KlassScanClosure::do_klass(Klass* klass) {
- // If the klass has not been dirtied we know that there's
+void G1CLDScanClosure::do_cld(ClassLoaderData* cld) {
+ // If the class loader data has not been dirtied we know that there's
// no references into the young gen and we can skip it.
- if (!_process_only_dirty || klass->has_modified_oops()) {
- // Clean the klass since we're going to scavenge all the metadata.
- klass->clear_modified_oops();
+ if (!_process_only_dirty || cld->has_modified_oops()) {
- // Tell the closure that this klass is the Klass to scavenge
+ // Tell the closure that this class loader data is the CLD to scavenge
// and is the one to dirty if oops are left pointing into the young gen.
- _closure->set_scanned_klass(klass);
+ _closure->set_scanned_cld(cld);
- klass->oops_do(_closure);
+ // Clean the cld since we're going to scavenge all the metadata.
+ // Clear modified oops only if this cld is claimed.
+ cld->oops_do(_closure, _must_claim, /*clear_modified_oops*/true);
- _closure->set_scanned_klass(NULL);
+ _closure->set_scanned_cld(NULL);
}
_count++;
}
diff --git a/src/hotspot/share/gc/g1/g1OopClosures.hpp b/src/hotspot/share/gc/g1/g1OopClosures.hpp
index 20a2bd5252b..4d961ac946e 100644
--- a/src/hotspot/share/gc/g1/g1OopClosures.hpp
+++ b/src/hotspot/share/gc/g1/g1OopClosures.hpp
@@ -107,7 +107,7 @@ protected:
G1CollectedHeap* _g1;
G1ParScanThreadState* _par_scan_state;
uint _worker_id; // Cache value from par_scan_state.
- Klass* _scanned_klass;
+ ClassLoaderData* _scanned_cld;
G1ConcurrentMark* _cm;
// Mark the object if it's not already marked. This is used to mark
@@ -124,13 +124,13 @@ protected:
~G1ParCopyHelper() { }
public:
- void set_scanned_klass(Klass* k) { _scanned_klass = k; }
- template inline void do_klass_barrier(T* p, oop new_obj);
+ void set_scanned_cld(ClassLoaderData* cld) { _scanned_cld = cld; }
+ inline void do_cld_barrier(oop new_obj);
};
enum G1Barrier {
G1BarrierNone,
- G1BarrierKlass
+ G1BarrierCLD
};
enum G1Mark {
@@ -150,14 +150,16 @@ public:
virtual void do_oop(narrowOop* p) { do_oop_work(p); }
};
-class G1KlassScanClosure : public KlassClosure {
+class G1CLDScanClosure : public CLDClosure {
G1ParCopyHelper* _closure;
bool _process_only_dirty;
+ bool _must_claim;
int _count;
public:
- G1KlassScanClosure(G1ParCopyHelper* closure, bool process_only_dirty)
- : _process_only_dirty(process_only_dirty), _closure(closure), _count(0) {}
- void do_klass(Klass* klass);
+ G1CLDScanClosure(G1ParCopyHelper* closure,
+ bool process_only_dirty, bool must_claim)
+ : _process_only_dirty(process_only_dirty), _must_claim(must_claim), _closure(closure), _count(0) {}
+ void do_cld(ClassLoaderData* cld);
};
// Closure for iterating over object fields during concurrent marking
diff --git a/src/hotspot/share/gc/g1/g1OopClosures.inline.hpp b/src/hotspot/share/gc/g1/g1OopClosures.inline.hpp
index 017b2df5c13..e3b6887880c 100644
--- a/src/hotspot/share/gc/g1/g1OopClosures.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1OopClosures.inline.hpp
@@ -195,10 +195,9 @@ inline void G1ScanObjsDuringScanRSClosure::do_oop_nv(T* p) {
}
}
-template
-void G1ParCopyHelper::do_klass_barrier(T* p, oop new_obj) {
+void G1ParCopyHelper::do_cld_barrier(oop new_obj) {
if (_g1->heap_region_containing(new_obj)->is_young()) {
- _scanned_klass->record_modified_oops();
+ _scanned_cld->record_modified_oops();
}
}
@@ -249,8 +248,8 @@ void G1ParCopyClosure::do_oop_work(T* p) {
mark_forwarded_object(obj, forwardee);
}
- if (barrier == G1BarrierKlass) {
- do_klass_barrier(p, forwardee);
+ if (barrier == G1BarrierCLD) {
+ do_cld_barrier(forwardee);
}
} else {
if (state.is_humongous()) {
@@ -267,5 +266,4 @@ void G1ParCopyClosure::do_oop_work(T* p) {
}
}
}
-
#endif // SHARE_VM_GC_G1_G1OOPCLOSURES_INLINE_HPP
diff --git a/src/hotspot/share/gc/g1/g1PageBasedVirtualSpace.cpp b/src/hotspot/share/gc/g1/g1PageBasedVirtualSpace.cpp
index 82931884517..bdf01d36940 100644
--- a/src/hotspot/share/gc/g1/g1PageBasedVirtualSpace.cpp
+++ b/src/hotspot/share/gc/g1/g1PageBasedVirtualSpace.cpp
@@ -251,7 +251,7 @@ public:
virtual void work(uint worker_id) {
size_t const actual_chunk_size = MAX2(chunk_size(), _page_size);
while (true) {
- char* touch_addr = (char*)Atomic::add_ptr((intptr_t)actual_chunk_size, (volatile void*) &_cur_addr) - actual_chunk_size;
+ char* touch_addr = Atomic::add(actual_chunk_size, &_cur_addr) - actual_chunk_size;
if (touch_addr < _start_addr || touch_addr >= _end_addr) {
break;
}
diff --git a/src/hotspot/share/gc/g1/g1Policy.hpp b/src/hotspot/share/gc/g1/g1Policy.hpp
index ec1ea47abb9..756191d84a7 100644
--- a/src/hotspot/share/gc/g1/g1Policy.hpp
+++ b/src/hotspot/share/gc/g1/g1Policy.hpp
@@ -89,7 +89,7 @@ public:
// Returns the given amount of uncollected reclaimable space
// as a percentage of the current heap capacity.
- virtual double reclaimable_bytes_perc(size_t reclaimable_bytes) const = 0;
+ virtual double reclaimable_bytes_percent(size_t reclaimable_bytes) const = 0;
virtual ~G1Policy() {}
diff --git a/src/hotspot/share/gc/g1/g1RemSet.cpp b/src/hotspot/share/gc/g1/g1RemSet.cpp
index 0c504db5998..76f54d85004 100644
--- a/src/hotspot/share/gc/g1/g1RemSet.cpp
+++ b/src/hotspot/share/gc/g1/g1RemSet.cpp
@@ -36,8 +36,8 @@
#include "gc/g1/heapRegion.inline.hpp"
#include "gc/g1/heapRegionManager.inline.hpp"
#include "gc/g1/heapRegionRemSet.hpp"
-#include "gc/g1/suspendibleThreadSet.hpp"
#include "gc/shared/gcTraceTime.inline.hpp"
+#include "gc/shared/suspendibleThreadSet.hpp"
#include "memory/iterator.hpp"
#include "memory/resourceArea.hpp"
#include "oops/oop.inline.hpp"
diff --git a/src/hotspot/share/gc/g1/g1SATBCardTableModRefBS.hpp b/src/hotspot/share/gc/g1/g1SATBCardTableModRefBS.hpp
index 0cd8970a89b..65315484aeb 100644
--- a/src/hotspot/share/gc/g1/g1SATBCardTableModRefBS.hpp
+++ b/src/hotspot/share/gc/g1/g1SATBCardTableModRefBS.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -54,8 +54,6 @@ public:
// pre-marking object graph.
static void enqueue(oop pre_val);
- virtual bool has_write_ref_pre_barrier() { return true; }
-
// We export this to make it available in cases where the static
// type of the barrier set is known. Note that it is non-virtual.
template inline void inline_write_ref_field_pre(T* field, oop newVal);
@@ -63,9 +61,6 @@ public:
// These are the more general virtual versions.
inline virtual void write_ref_field_pre_work(oop* field, oop new_val);
inline virtual void write_ref_field_pre_work(narrowOop* field, oop new_val);
- virtual void write_ref_field_pre_work(void* field, oop new_val) {
- guarantee(false, "Not needed");
- }
template void write_ref_array_pre_work(T* dst, int count);
virtual void write_ref_array_pre(oop* dst, int count, bool dest_uninitialized);
diff --git a/src/hotspot/share/gc/g1/g1SharedClosures.hpp b/src/hotspot/share/gc/g1/g1SharedClosures.hpp
index 2c9352394ae..38c54f5b7d4 100644
--- a/src/hotspot/share/gc/g1/g1SharedClosures.hpp
+++ b/src/hotspot/share/gc/g1/g1SharedClosures.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -34,18 +34,17 @@ class G1ParScanThreadState;
template
class G1SharedClosures VALUE_OBJ_CLASS_SPEC {
public:
- G1ParCopyClosure _oops;
- G1ParCopyClosure _oop_in_klass;
- G1KlassScanClosure _klass_in_cld_closure;
- CLDToKlassAndOopClosure _clds;
- G1CodeBlobClosure _codeblobs;
- BufferingOopClosure _buffered_oops;
+ G1ParCopyClosure _oops;
+ G1ParCopyClosure _oops_in_cld;
- G1SharedClosures(G1CollectedHeap* g1h, G1ParScanThreadState* pss, bool process_only_dirty_klasses, bool must_claim_cld) :
+ G1CLDScanClosure _clds;
+ G1CodeBlobClosure _codeblobs;
+ BufferingOopClosure _buffered_oops;
+
+ G1SharedClosures(G1CollectedHeap* g1h, G1ParScanThreadState* pss, bool process_only_dirty, bool must_claim_cld) :
_oops(g1h, pss),
- _oop_in_klass(g1h, pss),
- _klass_in_cld_closure(&_oop_in_klass, process_only_dirty_klasses),
- _clds(&_klass_in_cld_closure, &_oops, must_claim_cld),
+ _oops_in_cld(g1h, pss),
+ _clds(&_oops_in_cld, process_only_dirty, must_claim_cld),
_codeblobs(&_oops),
_buffered_oops(&_oops) {}
};
diff --git a/src/hotspot/share/gc/g1/g1StringDedup.cpp b/src/hotspot/share/gc/g1/g1StringDedup.cpp
index 9819d3eb26a..5d303eb8189 100644
--- a/src/hotspot/share/gc/g1/g1StringDedup.cpp
+++ b/src/hotspot/share/gc/g1/g1StringDedup.cpp
@@ -203,12 +203,12 @@ G1StringDedupUnlinkOrOopsDoClosure::~G1StringDedupUnlinkOrOopsDoClosure() {
// Atomically claims the next available queue for exclusive access by
// the current thread. Returns the queue number of the claimed queue.
size_t G1StringDedupUnlinkOrOopsDoClosure::claim_queue() {
- return (size_t)Atomic::add_ptr(1, &_next_queue) - 1;
+ return Atomic::add((size_t)1, &_next_queue) - 1;
}
// Atomically claims the next available table partition for exclusive
// access by the current thread. Returns the table bucket number where
// the claimed partition starts.
size_t G1StringDedupUnlinkOrOopsDoClosure::claim_table_partition(size_t partition_size) {
- return (size_t)Atomic::add_ptr(partition_size, &_next_bucket) - partition_size;
+ return Atomic::add(partition_size, &_next_bucket) - partition_size;
}
diff --git a/src/hotspot/share/gc/g1/g1StringDedupQueue.cpp b/src/hotspot/share/gc/g1/g1StringDedupQueue.cpp
index b029c3f2b40..546f33d9120 100644
--- a/src/hotspot/share/gc/g1/g1StringDedupQueue.cpp
+++ b/src/hotspot/share/gc/g1/g1StringDedupQueue.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -90,7 +90,7 @@ void G1StringDedupQueue::push(uint worker_id, oop java_string) {
}
} else {
// Queue is full, drop the string and update the statistics
- Atomic::inc_ptr(&_queue->_dropped);
+ Atomic::inc(&_queue->_dropped);
}
}
diff --git a/src/hotspot/share/gc/g1/g1StringDedupStat.cpp b/src/hotspot/share/gc/g1/g1StringDedupStat.cpp
index 6443f6e8a92..7c1a60ad5e1 100644
--- a/src/hotspot/share/gc/g1/g1StringDedupStat.cpp
+++ b/src/hotspot/share/gc/g1/g1StringDedupStat.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -82,7 +82,7 @@ void G1StringDedupStat::print_end(const G1StringDedupStat& last_stat, const G1St
if (total_stat._new_bytes > 0) {
// Avoid division by zero
- total_deduped_bytes_percent = (double)total_stat._deduped_bytes / (double)total_stat._new_bytes * 100.0;
+ total_deduped_bytes_percent = percent_of(total_stat._deduped_bytes, total_stat._new_bytes);
}
log_info(gc, stringdedup)(
@@ -100,48 +100,16 @@ void G1StringDedupStat::print_end(const G1StringDedupStat& last_stat, const G1St
}
void G1StringDedupStat::print_statistics(const G1StringDedupStat& stat, bool total) {
- double young_percent = 0.0;
- double old_percent = 0.0;
- double skipped_percent = 0.0;
- double hashed_percent = 0.0;
- double known_percent = 0.0;
- double new_percent = 0.0;
- double deduped_percent = 0.0;
- double deduped_bytes_percent = 0.0;
- double deduped_young_percent = 0.0;
- double deduped_young_bytes_percent = 0.0;
- double deduped_old_percent = 0.0;
- double deduped_old_bytes_percent = 0.0;
-
- if (stat._inspected > 0) {
- // Avoid division by zero
- skipped_percent = (double)stat._skipped / (double)stat._inspected * 100.0;
- hashed_percent = (double)stat._hashed / (double)stat._inspected * 100.0;
- known_percent = (double)stat._known / (double)stat._inspected * 100.0;
- new_percent = (double)stat._new / (double)stat._inspected * 100.0;
- }
-
- if (stat._new > 0) {
- // Avoid division by zero
- deduped_percent = (double)stat._deduped / (double)stat._new * 100.0;
- }
-
- if (stat._deduped > 0) {
- // Avoid division by zero
- deduped_young_percent = (double)stat._deduped_young / (double)stat._deduped * 100.0;
- deduped_old_percent = (double)stat._deduped_old / (double)stat._deduped * 100.0;
- }
-
- if (stat._new_bytes > 0) {
- // Avoid division by zero
- deduped_bytes_percent = (double)stat._deduped_bytes / (double)stat._new_bytes * 100.0;
- }
-
- if (stat._deduped_bytes > 0) {
- // Avoid division by zero
- deduped_young_bytes_percent = (double)stat._deduped_young_bytes / (double)stat._deduped_bytes * 100.0;
- deduped_old_bytes_percent = (double)stat._deduped_old_bytes / (double)stat._deduped_bytes * 100.0;
- }
+ double skipped_percent = percent_of(stat._skipped, stat._inspected);
+ double hashed_percent = percent_of(stat._hashed, stat._inspected);
+ double known_percent = percent_of(stat._known, stat._inspected);
+ double new_percent = percent_of(stat._new, stat._inspected);
+ double deduped_percent = percent_of(stat._deduped, stat._new);
+ double deduped_bytes_percent = percent_of(stat._deduped_bytes, stat._new_bytes);
+ double deduped_young_percent = percent_of(stat._deduped_young, stat._deduped);
+ double deduped_young_bytes_percent = percent_of(stat._deduped_young_bytes, stat._deduped_bytes);
+ double deduped_old_percent = percent_of(stat._deduped_old, stat._deduped);
+ double deduped_old_bytes_percent = percent_of(stat._deduped_old_bytes, stat._deduped_bytes);
if (total) {
log_debug(gc, stringdedup)(
diff --git a/src/hotspot/share/gc/g1/g1StringDedupTable.cpp b/src/hotspot/share/gc/g1/g1StringDedupTable.cpp
index 1554ef458ba..4b236f9cae0 100644
--- a/src/hotspot/share/gc/g1/g1StringDedupTable.cpp
+++ b/src/hotspot/share/gc/g1/g1StringDedupTable.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -616,7 +616,7 @@ void G1StringDedupTable::print_statistics() {
G1_STRDEDUP_BYTES_PARAM(_table->_size * sizeof(G1StringDedupEntry*) + (_table->_entries + _entry_cache->size()) * sizeof(G1StringDedupEntry)));
log.debug(" Size: " SIZE_FORMAT ", Min: " SIZE_FORMAT ", Max: " SIZE_FORMAT, _table->_size, _min_size, _max_size);
log.debug(" Entries: " UINTX_FORMAT ", Load: " G1_STRDEDUP_PERCENT_FORMAT_NS ", Cached: " UINTX_FORMAT ", Added: " UINTX_FORMAT ", Removed: " UINTX_FORMAT,
- _table->_entries, (double)_table->_entries / (double)_table->_size * 100.0, _entry_cache->size(), _entries_added, _entries_removed);
+ _table->_entries, percent_of(_table->_entries, _table->_size), _entry_cache->size(), _entries_added, _entries_removed);
log.debug(" Resize Count: " UINTX_FORMAT ", Shrink Threshold: " UINTX_FORMAT "(" G1_STRDEDUP_PERCENT_FORMAT_NS "), Grow Threshold: " UINTX_FORMAT "(" G1_STRDEDUP_PERCENT_FORMAT_NS ")",
_resize_count, _table->_shrink_threshold, _shrink_load_factor * 100.0, _table->_grow_threshold, _grow_load_factor * 100.0);
log.debug(" Rehash Count: " UINTX_FORMAT ", Rehash Threshold: " UINTX_FORMAT ", Hash Seed: 0x%x", _rehash_count, _rehash_threshold, _table->_hash_seed);
diff --git a/src/hotspot/share/gc/g1/g1StringDedupThread.cpp b/src/hotspot/share/gc/g1/g1StringDedupThread.cpp
index f0b25d3c6a9..60ec044b3ed 100644
--- a/src/hotspot/share/gc/g1/g1StringDedupThread.cpp
+++ b/src/hotspot/share/gc/g1/g1StringDedupThread.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -28,7 +28,7 @@
#include "gc/g1/g1StringDedupQueue.hpp"
#include "gc/g1/g1StringDedupTable.hpp"
#include "gc/g1/g1StringDedupThread.hpp"
-#include "gc/g1/suspendibleThreadSet.hpp"
+#include "gc/shared/suspendibleThreadSet.hpp"
#include "logging/log.hpp"
#include "oops/oop.inline.hpp"
#include "runtime/atomic.hpp"
diff --git a/src/hotspot/share/gc/g1/g1YoungRemSetSamplingThread.cpp b/src/hotspot/share/gc/g1/g1YoungRemSetSamplingThread.cpp
index d94dbd868e3..4473564b38b 100644
--- a/src/hotspot/share/gc/g1/g1YoungRemSetSamplingThread.cpp
+++ b/src/hotspot/share/gc/g1/g1YoungRemSetSamplingThread.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -29,7 +29,7 @@
#include "gc/g1/g1YoungRemSetSamplingThread.hpp"
#include "gc/g1/heapRegion.inline.hpp"
#include "gc/g1/heapRegionRemSet.hpp"
-#include "gc/g1/suspendibleThreadSet.hpp"
+#include "gc/shared/suspendibleThreadSet.hpp"
#include "runtime/mutexLocker.hpp"
G1YoungRemSetSamplingThread::G1YoungRemSetSamplingThread() :
diff --git a/src/hotspot/share/gc/g1/g1_globals.hpp b/src/hotspot/share/gc/g1/g1_globals.hpp
index 35b5b4580de..d69c1712433 100644
--- a/src/hotspot/share/gc/g1/g1_globals.hpp
+++ b/src/hotspot/share/gc/g1/g1_globals.hpp
@@ -61,10 +61,6 @@
"Confidence level for MMU/pause predictions") \
range(0, 100) \
\
- develop(intx, G1MarkingOverheadPercent, 0, \
- "Overhead of concurrent marking") \
- range(0, 100) \
- \
diagnostic(intx, G1SummarizeRSetStatsPeriod, 0, \
"The period (in number of GCs) at which we will generate " \
"update buffer processing info " \
diff --git a/src/hotspot/share/gc/g1/heapRegion.inline.hpp b/src/hotspot/share/gc/g1/heapRegion.inline.hpp
index 58641f381f1..bad8d41fc09 100644
--- a/src/hotspot/share/gc/g1/heapRegion.inline.hpp
+++ b/src/hotspot/share/gc/g1/heapRegion.inline.hpp
@@ -59,7 +59,7 @@ inline HeapWord* G1ContiguousSpace::par_allocate_impl(size_t min_word_size,
size_t want_to_allocate = MIN2(available, desired_word_size);
if (want_to_allocate >= min_word_size) {
HeapWord* new_top = obj + want_to_allocate;
- HeapWord* result = (HeapWord*)Atomic::cmpxchg_ptr(new_top, top_addr(), obj);
+ HeapWord* result = Atomic::cmpxchg(new_top, top_addr(), obj);
// result can be one of two:
// the old top value: the exchange succeeded
// otherwise: the new value of the top is returned.
@@ -177,7 +177,7 @@ inline size_t HeapRegion::block_size(const HeapWord *addr) const {
return oop(addr)->size();
}
- return block_size_using_bitmap(addr, G1CollectedHeap::heap()->concurrent_mark()->prevMarkBitMap());
+ return block_size_using_bitmap(addr, G1CollectedHeap::heap()->concurrent_mark()->prev_mark_bitmap());
}
inline HeapWord* HeapRegion::par_allocate_no_bot_updates(size_t min_word_size,
@@ -334,7 +334,7 @@ bool HeapRegion::oops_on_card_seq_iterate_careful(MemRegion mr,
}
#endif
- const G1CMBitMap* const bitmap = g1h->concurrent_mark()->prevMarkBitMap();
+ const G1CMBitMap* const bitmap = g1h->concurrent_mark()->prev_mark_bitmap();
do {
oop obj = oop(cur);
assert(oopDesc::is_oop(obj, true), "Not an oop at " PTR_FORMAT, p2i(cur));
diff --git a/src/hotspot/share/gc/g1/heapRegionRemSet.cpp b/src/hotspot/share/gc/g1/heapRegionRemSet.cpp
index a456dbd8ad7..6ea136ad40f 100644
--- a/src/hotspot/share/gc/g1/heapRegionRemSet.cpp
+++ b/src/hotspot/share/gc/g1/heapRegionRemSet.cpp
@@ -113,9 +113,7 @@ protected:
public:
- HeapRegion* hr() const {
- return (HeapRegion*) OrderAccess::load_ptr_acquire(&_hr);
- }
+ HeapRegion* hr() const { return OrderAccess::load_acquire(&_hr); }
jint occupied() const {
// Overkill, but if we ever need it...
@@ -133,7 +131,7 @@ public:
_bm.clear();
// Make sure that the bitmap clearing above has been finished before publishing
// this PRT to concurrent threads.
- OrderAccess::release_store_ptr(&_hr, hr);
+ OrderAccess::release_store(&_hr, hr);
}
void add_reference(OopOrNarrowOopStar from) {
@@ -182,7 +180,7 @@ public:
while (true) {
PerRegionTable* fl = _free_list;
last->set_next(fl);
- PerRegionTable* res = (PerRegionTable*) Atomic::cmpxchg_ptr(prt, &_free_list, fl);
+ PerRegionTable* res = Atomic::cmpxchg(prt, &_free_list, fl);
if (res == fl) {
return;
}
@@ -199,9 +197,7 @@ public:
PerRegionTable* fl = _free_list;
while (fl != NULL) {
PerRegionTable* nxt = fl->next();
- PerRegionTable* res =
- (PerRegionTable*)
- Atomic::cmpxchg_ptr(nxt, &_free_list, fl);
+ PerRegionTable* res = Atomic::cmpxchg(nxt, &_free_list, fl);
if (res == fl) {
fl->init(hr, true);
return fl;
@@ -416,7 +412,7 @@ void OtherRegionsTable::add_reference(OopOrNarrowOopStar from, uint tid) {
// some mark bits may not yet seem cleared or a 'later' update
// performed by a concurrent thread could be undone when the
// zeroing becomes visible). This requires store ordering.
- OrderAccess::release_store_ptr((volatile PerRegionTable*)&_fine_grain_regions[ind], prt);
+ OrderAccess::release_store(&_fine_grain_regions[ind], prt);
_n_fine_entries++;
if (G1HRRSUseSparseTable) {
diff --git a/src/hotspot/share/gc/g1/heapRegionType.hpp b/src/hotspot/share/gc/g1/heapRegionType.hpp
index e0829c00c8c..f6900d2070f 100644
--- a/src/hotspot/share/gc/g1/heapRegionType.hpp
+++ b/src/hotspot/share/gc/g1/heapRegionType.hpp
@@ -32,6 +32,8 @@
assert(is_valid((tag)), "invalid HR type: %u", (uint) (tag))
class HeapRegionType VALUE_OBJ_CLASS_SPEC {
+friend class VMStructs;
+
private:
// We encode the value of the heap region type so the generation can be
// determined quickly. The tag is split into two parts:
diff --git a/src/hotspot/share/gc/g1/sparsePRT.cpp b/src/hotspot/share/gc/g1/sparsePRT.cpp
index 1ef606e749b..6131f3cd692 100644
--- a/src/hotspot/share/gc/g1/sparsePRT.cpp
+++ b/src/hotspot/share/gc/g1/sparsePRT.cpp
@@ -292,9 +292,7 @@ void SparsePRT::add_to_expanded_list(SparsePRT* sprt) {
SparsePRT* hd = _head_expanded_list;
while (true) {
sprt->_next_expanded = hd;
- SparsePRT* res =
- (SparsePRT*)
- Atomic::cmpxchg_ptr(sprt, &_head_expanded_list, hd);
+ SparsePRT* res = Atomic::cmpxchg(sprt, &_head_expanded_list, hd);
if (res == hd) return;
else hd = res;
}
@@ -305,9 +303,7 @@ SparsePRT* SparsePRT::get_from_expanded_list() {
SparsePRT* hd = _head_expanded_list;
while (hd != NULL) {
SparsePRT* next = hd->next_expanded();
- SparsePRT* res =
- (SparsePRT*)
- Atomic::cmpxchg_ptr(next, &_head_expanded_list, hd);
+ SparsePRT* res = Atomic::cmpxchg(next, &_head_expanded_list, hd);
if (res == hd) {
hd->set_next_expanded(NULL);
return hd;
diff --git a/src/hotspot/share/gc/g1/vmStructs_g1.hpp b/src/hotspot/share/gc/g1/vmStructs_g1.hpp
index 5026d6ee7ac..19f338dc20c 100644
--- a/src/hotspot/share/gc/g1/vmStructs_g1.hpp
+++ b/src/hotspot/share/gc/g1/vmStructs_g1.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -35,6 +35,10 @@
static_field(HeapRegion, GrainBytes, size_t) \
static_field(HeapRegion, LogOfHRGrainBytes, int) \
\
+ nonstatic_field(HeapRegion, _type, HeapRegionType) \
+ \
+ nonstatic_field(HeapRegionType, _tag, HeapRegionType::Tag volatile) \
+ \
nonstatic_field(G1ContiguousSpace, _top, HeapWord* volatile) \
\
nonstatic_field(G1HeapRegionTable, _base, address) \
@@ -67,9 +71,16 @@
#define VM_INT_CONSTANTS_G1(declare_constant, declare_constant_with_value) \
+ declare_constant(HeapRegionType::FreeTag) \
+ declare_constant(HeapRegionType::YoungMask) \
+ declare_constant(HeapRegionType::HumongousMask) \
+ declare_constant(HeapRegionType::PinnedMask) \
+ declare_constant(HeapRegionType::OldMask)
-#define VM_TYPES_G1(declare_type, declare_toplevel_type) \
+#define VM_TYPES_G1(declare_type, \
+ declare_toplevel_type, \
+ declare_integer_type) \
\
declare_toplevel_type(G1HeapRegionTable) \
\
@@ -81,9 +92,12 @@
declare_toplevel_type(HeapRegionSetBase) \
declare_toplevel_type(G1MonitoringSupport) \
declare_toplevel_type(PtrQueue) \
+ declare_toplevel_type(HeapRegionType) \
\
declare_toplevel_type(G1CollectedHeap*) \
declare_toplevel_type(HeapRegion*) \
declare_toplevel_type(G1MonitoringSupport*) \
+ \
+ declare_integer_type(HeapRegionType::Tag volatile)
#endif // SHARE_VM_GC_G1_VMSTRUCTS_G1_HPP
diff --git a/src/hotspot/share/gc/parallel/gcTaskThread.cpp b/src/hotspot/share/gc/parallel/gcTaskThread.cpp
index e300f8c2309..440f5124539 100644
--- a/src/hotspot/share/gc/parallel/gcTaskThread.cpp
+++ b/src/hotspot/share/gc/parallel/gcTaskThread.cpp
@@ -77,8 +77,7 @@ GCTaskTimeStamp* GCTaskThread::time_stamp_at(uint index) {
if (_time_stamps == NULL) {
// We allocate the _time_stamps array lazily since logging can be enabled dynamically
GCTaskTimeStamp* time_stamps = NEW_C_HEAP_ARRAY(GCTaskTimeStamp, GCTaskTimeStampEntries, mtGC);
- void* old = Atomic::cmpxchg_ptr(time_stamps, &_time_stamps, NULL);
- if (old != NULL) {
+ if (Atomic::cmpxchg(time_stamps, &_time_stamps, (GCTaskTimeStamp*)NULL) != NULL) {
// Someone already setup the time stamps
FREE_C_HEAP_ARRAY(GCTaskTimeStamp, time_stamps);
}
diff --git a/src/hotspot/share/gc/parallel/mutableNUMASpace.cpp b/src/hotspot/share/gc/parallel/mutableNUMASpace.cpp
index e765d0f2d90..120ff95bae5 100644
--- a/src/hotspot/share/gc/parallel/mutableNUMASpace.cpp
+++ b/src/hotspot/share/gc/parallel/mutableNUMASpace.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2006, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -862,7 +862,7 @@ HeapWord* MutableNUMASpace::cas_allocate(size_t size) {
if (p != NULL) {
HeapWord* cur_top, *cur_chunk_top = p + size;
while ((cur_top = top()) < cur_chunk_top) { // Keep _top updated.
- if (Atomic::cmpxchg_ptr(cur_chunk_top, top_addr(), cur_top) == cur_top) {
+ if (Atomic::cmpxchg(cur_chunk_top, top_addr(), cur_top) == cur_top) {
break;
}
}
diff --git a/src/hotspot/share/gc/parallel/mutableSpace.cpp b/src/hotspot/share/gc/parallel/mutableSpace.cpp
index c0e95e3505e..05b7fd19aec 100644
--- a/src/hotspot/share/gc/parallel/mutableSpace.cpp
+++ b/src/hotspot/share/gc/parallel/mutableSpace.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -192,7 +192,7 @@ HeapWord* MutableSpace::cas_allocate(size_t size) {
HeapWord* obj = top();
if (pointer_delta(end(), obj) >= size) {
HeapWord* new_top = obj + size;
- HeapWord* result = (HeapWord*)Atomic::cmpxchg_ptr(new_top, top_addr(), obj);
+ HeapWord* result = Atomic::cmpxchg(new_top, top_addr(), obj);
// result can be one of two:
// the old top value: the exchange succeeded
// otherwise: the new value of the top is returned.
@@ -211,7 +211,7 @@ HeapWord* MutableSpace::cas_allocate(size_t size) {
// Try to deallocate previous allocation. Returns true upon success.
bool MutableSpace::cas_deallocate(HeapWord *obj, size_t size) {
HeapWord* expected_top = obj + size;
- return (HeapWord*)Atomic::cmpxchg_ptr(obj, top_addr(), expected_top) == expected_top;
+ return Atomic::cmpxchg(obj, top_addr(), expected_top) == expected_top;
}
void MutableSpace::oop_iterate_no_header(OopClosure* cl) {
diff --git a/src/hotspot/share/gc/parallel/parMarkBitMap.cpp b/src/hotspot/share/gc/parallel/parMarkBitMap.cpp
index 8696160c880..6b9888d6a2f 100644
--- a/src/hotspot/share/gc/parallel/parMarkBitMap.cpp
+++ b/src/hotspot/share/gc/parallel/parMarkBitMap.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -89,8 +89,8 @@ ParMarkBitMap::mark_obj(HeapWord* addr, size_t size)
const idx_t end_bit = addr_to_bit(addr + size - 1);
bool end_bit_ok = _end_bits.par_set_bit(end_bit);
assert(end_bit_ok, "concurrency problem");
- DEBUG_ONLY(Atomic::inc_ptr(&mark_bitmap_count));
- DEBUG_ONLY(Atomic::add_ptr(size, &mark_bitmap_size));
+ DEBUG_ONLY(Atomic::inc(&mark_bitmap_count));
+ DEBUG_ONLY(Atomic::add(size, &mark_bitmap_size));
return true;
}
return false;
diff --git a/src/hotspot/share/gc/parallel/parallelScavengeHeap.cpp b/src/hotspot/share/gc/parallel/parallelScavengeHeap.cpp
index dba08b5a92a..27fcf5c1adc 100644
--- a/src/hotspot/share/gc/parallel/parallelScavengeHeap.cpp
+++ b/src/hotspot/share/gc/parallel/parallelScavengeHeap.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -23,6 +23,7 @@
*/
#include "precompiled.hpp"
+#include "code/codeCache.hpp"
#include "gc/parallel/adjoiningGenerations.hpp"
#include "gc/parallel/adjoiningVirtualSpaces.hpp"
#include "gc/parallel/cardTableExtension.hpp"
@@ -169,10 +170,6 @@ bool ParallelScavengeHeap::is_in_reserved(const void* p) const {
return young_gen()->is_in_reserved(p) || old_gen()->is_in_reserved(p);
}
-bool ParallelScavengeHeap::is_scavengable(const void* addr) {
- return is_in_young((oop)addr);
-}
-
// There are two levels of allocation policy here.
//
// When an allocation request fails, the requesting thread must invoke a VM
@@ -574,16 +571,10 @@ void ParallelScavengeHeap::print_gc_threads_on(outputStream* st) const {
}
void ParallelScavengeHeap::print_tracing_info() const {
- if (TraceYoungGenTime) {
- double time = PSScavenge::accumulated_time()->seconds();
- tty->print_cr("[Accumulated GC generation 0 time %3.7f secs]", time);
- }
- if (TraceOldGenTime) {
- double time = UseParallelOldGC ? PSParallelCompact::accumulated_time()->seconds() : PSMarkSweep::accumulated_time()->seconds();
- tty->print_cr("[Accumulated GC generation 1 time %3.7f secs]", time);
- }
-
AdaptiveSizePolicyOutput::print();
+ log_debug(gc, heap, exit)("Accumulated young generation GC time %3.7f secs", PSScavenge::accumulated_time()->seconds());
+ log_debug(gc, heap, exit)("Accumulated old generation GC time %3.7f secs",
+ UseParallelOldGC ? PSParallelCompact::accumulated_time()->seconds() : PSMarkSweep::accumulated_time()->seconds());
}
@@ -671,3 +662,15 @@ void ParallelScavengeHeap::gen_mangle_unused_area() {
}
}
#endif
+
+bool ParallelScavengeHeap::is_scavengable(oop obj) {
+ return is_in_young(obj);
+}
+
+void ParallelScavengeHeap::register_nmethod(nmethod* nm) {
+ CodeCache::register_scavenge_root_nmethod(nm);
+}
+
+void ParallelScavengeHeap::verify_nmethod(nmethod* nm) {
+ CodeCache::verify_scavenge_root_nmethod(nm);
+}
diff --git a/src/hotspot/share/gc/parallel/parallelScavengeHeap.hpp b/src/hotspot/share/gc/parallel/parallelScavengeHeap.hpp
index e83cb3e5f2f..bfdc55f07f7 100644
--- a/src/hotspot/share/gc/parallel/parallelScavengeHeap.hpp
+++ b/src/hotspot/share/gc/parallel/parallelScavengeHeap.hpp
@@ -134,7 +134,9 @@ class ParallelScavengeHeap : public CollectedHeap {
// can be moved in a partial collection. For currently implemented
// generational collectors that means during a collection of
// the young gen.
- virtual bool is_scavengable(const void* addr);
+ virtual bool is_scavengable(oop obj);
+ virtual void register_nmethod(nmethod* nm);
+ virtual void verify_nmethod(nmethod* nmethod);
size_t max_capacity() const;
diff --git a/src/hotspot/share/gc/parallel/pcTasks.cpp b/src/hotspot/share/gc/parallel/pcTasks.cpp
index 8cae47c23ef..f5db42d6d09 100644
--- a/src/hotspot/share/gc/parallel/pcTasks.cpp
+++ b/src/hotspot/share/gc/parallel/pcTasks.cpp
@@ -81,7 +81,6 @@ void MarkFromRootsTask::do_it(GCTaskManager* manager, uint which) {
ParCompactionManager* cm =
ParCompactionManager::gc_thread_compaction_manager(which);
ParCompactionManager::MarkAndPushClosure mark_and_push_closure(cm);
- ParCompactionManager::FollowKlassClosure follow_klass_closure(&mark_and_push_closure);
switch (_root_type) {
case universe:
@@ -117,7 +116,7 @@ void MarkFromRootsTask::do_it(GCTaskManager* manager, uint which) {
break;
case class_loader_data:
- ClassLoaderDataGraph::always_strong_oops_do(&mark_and_push_closure, &follow_klass_closure, true);
+ ClassLoaderDataGraph::always_strong_oops_do(&mark_and_push_closure, true);
break;
case code_cache:
diff --git a/src/hotspot/share/gc/parallel/psCompactionManager.hpp b/src/hotspot/share/gc/parallel/psCompactionManager.hpp
index 9eb8bceff57..4690b1d9e3e 100644
--- a/src/hotspot/share/gc/parallel/psCompactionManager.hpp
+++ b/src/hotspot/share/gc/parallel/psCompactionManager.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -196,17 +196,6 @@ private:
FollowStackClosure(ParCompactionManager* cm) : _compaction_manager(cm) { }
virtual void do_void();
};
-
- // The one and only place to start following the classes.
- // Should only be applied to the ClassLoaderData klasses list.
- class FollowKlassClosure : public KlassClosure {
- private:
- MarkAndPushClosure* _mark_and_push_closure;
- public:
- FollowKlassClosure(MarkAndPushClosure* mark_and_push_closure) :
- _mark_and_push_closure(mark_and_push_closure) { }
- void do_klass(Klass* klass);
- };
};
inline ParCompactionManager* ParCompactionManager::manager_array(uint index) {
diff --git a/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp b/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp
index 7d844c08db7..1376fa65c8e 100644
--- a/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp
+++ b/src/hotspot/share/gc/parallel/psCompactionManager.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -98,15 +98,10 @@ inline void ParCompactionManager::FollowStackClosure::do_void() {
_compaction_manager->follow_marking_stacks();
}
-inline void ParCompactionManager::FollowKlassClosure::do_klass(Klass* klass) {
- klass->oops_do(_mark_and_push_closure);
-}
-
inline void ParCompactionManager::follow_class_loader(ClassLoaderData* cld) {
MarkAndPushClosure mark_and_push_closure(this);
- FollowKlassClosure follow_klass_closure(&mark_and_push_closure);
- cld->oops_do(&mark_and_push_closure, &follow_klass_closure, true);
+ cld->oops_do(&mark_and_push_closure, true);
}
inline void ParCompactionManager::follow_contents(oop obj) {
diff --git a/src/hotspot/share/gc/parallel/psMarkSweep.cpp b/src/hotspot/share/gc/parallel/psMarkSweep.cpp
index 76f0d444f6f..db566bf0ad0 100644
--- a/src/hotspot/share/gc/parallel/psMarkSweep.cpp
+++ b/src/hotspot/share/gc/parallel/psMarkSweep.cpp
@@ -47,6 +47,7 @@
#include "gc/shared/referencePolicy.hpp"
#include "gc/shared/referenceProcessor.hpp"
#include "gc/shared/spaceDecorator.hpp"
+#include "gc/shared/weakProcessor.hpp"
#include "logging/log.hpp"
#include "oops/oop.inline.hpp"
#include "runtime/biasedLocking.hpp"
@@ -173,7 +174,9 @@ bool PSMarkSweep::invoke_no_policy(bool clear_all_softrefs) {
TraceCollectorStats tcs(counters());
TraceMemoryManagerStats tms(true /* Full GC */,gc_cause);
- if (TraceOldGenTime) accumulated_time()->start();
+ if (log_is_enabled(Debug, gc, heap, exit)) {
+ accumulated_time()->start();
+ }
// Let the size policy know we're starting
size_policy->major_collection_begin();
@@ -342,7 +345,9 @@ bool PSMarkSweep::invoke_no_policy(bool clear_all_softrefs) {
// We collected the heap, recalculate the metaspace capacity
MetaspaceGC::compute_new_size();
- if (TraceOldGenTime) accumulated_time()->stop();
+ if (log_is_enabled(Debug, gc, heap, exit)) {
+ accumulated_time()->stop();
+ }
young_gen->print_used_change(young_gen_prev_used);
old_gen->print_used_change(old_gen_prev_used);
@@ -541,6 +546,11 @@ void PSMarkSweep::mark_sweep_phase1(bool clear_all_softrefs) {
// This is the point where the entire marking should have completed.
assert(_marking_stack.is_empty(), "Marking should have completed");
+ {
+ GCTraceTime(Debug, gc, phases) t("Weak Processing", _gc_timer);
+ WeakProcessor::weak_oops_do(is_alive_closure(), &do_nothing_cl);
+ }
+
{
GCTraceTime(Debug, gc, phases) t("Class Unloading", _gc_timer);
@@ -613,7 +623,7 @@ void PSMarkSweep::mark_sweep_phase3() {
// Now adjust pointers in remaining weak roots. (All of which should
// have been cleared if they pointed to non-surviving objects.)
// Global (weak) JNI handles
- JNIHandles::weak_oops_do(adjust_pointer_closure());
+ WeakProcessor::oops_do(adjust_pointer_closure());
CodeBlobToOopClosure adjust_from_blobs(adjust_pointer_closure(), CodeBlobToOopClosure::FixRelocations);
CodeCache::blobs_do(&adjust_from_blobs);
diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.cpp b/src/hotspot/share/gc/parallel/psParallelCompact.cpp
index ae04316cb3e..9fea2221bfc 100644
--- a/src/hotspot/share/gc/parallel/psParallelCompact.cpp
+++ b/src/hotspot/share/gc/parallel/psParallelCompact.cpp
@@ -52,6 +52,7 @@
#include "gc/shared/referencePolicy.hpp"
#include "gc/shared/referenceProcessor.hpp"
#include "gc/shared/spaceDecorator.hpp"
+#include "gc/shared/weakProcessor.hpp"
#include "logging/log.hpp"
#include "memory/resourceArea.hpp"
#include "oops/instanceKlass.inline.hpp"
@@ -520,8 +521,8 @@ void ParallelCompactData::add_obj(HeapWord* addr, size_t len)
const size_t beg_region = obj_ofs >> Log2RegionSize;
const size_t end_region = (obj_ofs + len - 1) >> Log2RegionSize;
- DEBUG_ONLY(Atomic::inc_ptr(&add_obj_count);)
- DEBUG_ONLY(Atomic::add_ptr(len, &add_obj_size);)
+ DEBUG_ONLY(Atomic::inc(&add_obj_count);)
+ DEBUG_ONLY(Atomic::add(len, &add_obj_size);)
if (beg_region == end_region) {
// All in one region.
@@ -838,11 +839,6 @@ PSParallelCompact::IsAliveClosure PSParallelCompact::_is_alive_closure;
bool PSParallelCompact::IsAliveClosure::do_object_b(oop p) { return mark_bitmap()->is_marked(p); }
-void PSParallelCompact::AdjustKlassClosure::do_klass(Klass* klass) {
- PSParallelCompact::AdjustPointerClosure closure(_cm);
- klass->oops_do(&closure);
-}
-
void PSParallelCompact::post_initialize() {
ParallelScavengeHeap* heap = ParallelScavengeHeap::heap();
MemRegion mr = heap->reserved_region();
@@ -1778,7 +1774,9 @@ bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
TraceCollectorStats tcs(counters());
TraceMemoryManagerStats tms(true /* Full GC */,gc_cause);
- if (TraceOldGenTime) accumulated_time()->start();
+ if (log_is_enabled(Debug, gc, heap, exit)) {
+ accumulated_time()->start();
+ }
// Let the size policy know we're starting
size_policy->major_collection_begin();
@@ -1897,7 +1895,7 @@ bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
// Resize the metaspace capacity after a collection
MetaspaceGC::compute_new_size();
- if (TraceOldGenTime) {
+ if (log_is_enabled(Debug, gc, heap, exit)) {
accumulated_time()->stop();
}
@@ -2124,6 +2122,11 @@ void PSParallelCompact::marking_phase(ParCompactionManager* cm,
// This is the point where the entire marking should have completed.
assert(cm->marking_stacks_empty(), "Marking should have completed");
+ {
+ GCTraceTime(Debug, gc, phases) tm("Weak Processing", &_gc_timer);
+ WeakProcessor::weak_oops_do(is_alive_closure(), &do_nothing_cl);
+ }
+
{
GCTraceTime(Debug, gc, phases) tm_m("Class Unloading", &_gc_timer);
@@ -2160,7 +2163,6 @@ void PSParallelCompact::adjust_roots(ParCompactionManager* cm) {
ClassLoaderDataGraph::clear_claimed_marks();
PSParallelCompact::AdjustPointerClosure oop_closure(cm);
- PSParallelCompact::AdjustKlassClosure klass_closure(cm);
// General strong roots.
Universe::oops_do(&oop_closure);
@@ -2170,12 +2172,11 @@ void PSParallelCompact::adjust_roots(ParCompactionManager* cm) {
Management::oops_do(&oop_closure);
JvmtiExport::oops_do(&oop_closure);
SystemDictionary::oops_do(&oop_closure);
- ClassLoaderDataGraph::oops_do(&oop_closure, &klass_closure, true);
+ ClassLoaderDataGraph::oops_do(&oop_closure, true);
// Now adjust pointers in remaining weak roots. (All of which should
// have been cleared if they pointed to non-surviving objects.)
- // Global (weak) JNI handles
- JNIHandles::weak_oops_do(&oop_closure);
+ WeakProcessor::oops_do(&oop_closure);
CodeBlobToOopClosure adjust_from_blobs(&oop_closure, CodeBlobToOopClosure::FixRelocations);
CodeCache::blobs_do(&adjust_from_blobs);
diff --git a/src/hotspot/share/gc/parallel/psParallelCompact.hpp b/src/hotspot/share/gc/parallel/psParallelCompact.hpp
index 6bf8270d7fd..0060886dc28 100644
--- a/src/hotspot/share/gc/parallel/psParallelCompact.hpp
+++ b/src/hotspot/share/gc/parallel/psParallelCompact.hpp
@@ -517,7 +517,7 @@ ParallelCompactData::RegionData::set_blocks_filled()
OrderAccess::release();
_blocks_filled = true;
// Debug builds count the number of times the table was filled.
- DEBUG_ONLY(Atomic::inc_ptr(&_blocks_filled_count));
+ DEBUG_ONLY(Atomic::inc(&_blocks_filled_count));
}
inline void
@@ -586,7 +586,7 @@ inline void ParallelCompactData::RegionData::set_highest_ref(HeapWord* addr)
#ifdef ASSERT
HeapWord* tmp = _highest_ref;
while (addr > tmp) {
- tmp = (HeapWord*)Atomic::cmpxchg_ptr(addr, &_highest_ref, tmp);
+ tmp = Atomic::cmpxchg(addr, &_highest_ref, tmp);
}
#endif // #ifdef ASSERT
}
diff --git a/src/hotspot/share/gc/parallel/psScavenge.cpp b/src/hotspot/share/gc/parallel/psScavenge.cpp
index db50e7bcf7b..3cf3e2302a0 100644
--- a/src/hotspot/share/gc/parallel/psScavenge.cpp
+++ b/src/hotspot/share/gc/parallel/psScavenge.cpp
@@ -45,6 +45,7 @@
#include "gc/shared/referencePolicy.hpp"
#include "gc/shared/referenceProcessor.hpp"
#include "gc/shared/spaceDecorator.hpp"
+#include "gc/shared/weakProcessor.hpp"
#include "memory/resourceArea.hpp"
#include "logging/log.hpp"
#include "oops/oop.inline.hpp"
@@ -306,7 +307,9 @@ bool PSScavenge::invoke_no_policy() {
TraceCollectorStats tcs(counters());
TraceMemoryManagerStats tms(false /* not full GC */,gc_cause);
- if (TraceYoungGenTime) accumulated_time()->start();
+ if (log_is_enabled(Debug, gc, heap, exit)) {
+ accumulated_time()->start();
+ }
// Let the size policy know we're starting
size_policy->minor_collection_begin();
@@ -438,13 +441,24 @@ bool PSScavenge::invoke_no_policy() {
pt.print_enqueue_phase();
}
+ assert(promotion_manager->stacks_empty(),"stacks should be empty at this point");
+
+ PSScavengeRootsClosure root_closure(promotion_manager);
+
+ {
+ GCTraceTime(Debug, gc, phases) tm("Weak Processing", &_gc_timer);
+ WeakProcessor::weak_oops_do(&_is_alive_closure, &root_closure);
+ }
+
{
GCTraceTime(Debug, gc, phases) tm("Scrub String Table", &_gc_timer);
// Unlink any dead interned Strings and process the remaining live ones.
- PSScavengeRootsClosure root_closure(promotion_manager);
StringTable::unlink_or_oops_do(&_is_alive_closure, &root_closure);
}
+ // Verify that usage of root_closure didn't copy any objects.
+ assert(promotion_manager->stacks_empty(),"stacks should be empty at this point");
+
// Finally, flush the promotion_manager's labs, and deallocate its stacks.
promotion_failure_occurred = PSPromotionManager::post_scavenge(_gc_tracer);
if (promotion_failure_occurred) {
@@ -607,7 +621,9 @@ bool PSScavenge::invoke_no_policy() {
CardTableExtension::verify_all_young_refs_imprecise();
}
- if (TraceYoungGenTime) accumulated_time()->stop();
+ if (log_is_enabled(Debug, gc, heap, exit)) {
+ accumulated_time()->stop();
+ }
young_gen->print_used_change(pre_gc_values.young_gen_used());
old_gen->print_used_change(pre_gc_values.old_gen_used());
diff --git a/src/hotspot/share/gc/parallel/psScavenge.inline.hpp b/src/hotspot/share/gc/parallel/psScavenge.inline.hpp
index a944277d00f..70bb4ba4afc 100644
--- a/src/hotspot/share/gc/parallel/psScavenge.inline.hpp
+++ b/src/hotspot/share/gc/parallel/psScavenge.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -85,15 +85,15 @@ class PSRootsClosure: public OopClosure {
typedef PSRootsClosure*promote_immediately=*/false> PSScavengeRootsClosure;
typedef PSRootsClosure*promote_immediately=*/true> PSPromoteRootsClosure;
-// Scavenges a single oop in a Klass.
-class PSScavengeFromKlassClosure: public OopClosure {
+// Scavenges a single oop in a ClassLoaderData.
+class PSScavengeFromCLDClosure: public OopClosure {
private:
PSPromotionManager* _pm;
- // Used to redirty a scanned klass if it has oops
+ // Used to redirty a scanned cld if it has oops
// pointing to the young generation after being scanned.
- Klass* _scanned_klass;
+ ClassLoaderData* _scanned_cld;
public:
- PSScavengeFromKlassClosure(PSPromotionManager* pm) : _pm(pm), _scanned_klass(NULL) { }
+ PSScavengeFromCLDClosure(PSPromotionManager* pm) : _pm(pm), _scanned_cld(NULL) { }
void do_oop(narrowOop* p) { ShouldNotReachHere(); }
void do_oop(oop* p) {
ParallelScavengeHeap* psh = ParallelScavengeHeap::heap();
@@ -111,48 +111,46 @@ class PSScavengeFromKlassClosure: public OopClosure {
oopDesc::encode_store_heap_oop_not_null(p, new_obj);
if (PSScavenge::is_obj_in_young(new_obj)) {
- do_klass_barrier();
+ do_cld_barrier();
}
}
}
- void set_scanned_klass(Klass* klass) {
- assert(_scanned_klass == NULL || klass == NULL, "Should always only handling one klass at a time");
- _scanned_klass = klass;
+ void set_scanned_cld(ClassLoaderData* cld) {
+ assert(_scanned_cld == NULL || cld == NULL, "Should always only handling one cld at a time");
+ _scanned_cld = cld;
}
private:
- void do_klass_barrier() {
- assert(_scanned_klass != NULL, "Should not be called without having a scanned klass");
- _scanned_klass->record_modified_oops();
+ void do_cld_barrier() {
+ assert(_scanned_cld != NULL, "Should not be called without having a scanned cld");
+ _scanned_cld->record_modified_oops();
}
-
};
-// Scavenges the oop in a Klass.
-class PSScavengeKlassClosure: public KlassClosure {
+// Scavenges the oop in a ClassLoaderData.
+class PSScavengeCLDClosure: public CLDClosure {
private:
- PSScavengeFromKlassClosure _oop_closure;
+ PSScavengeFromCLDClosure _oop_closure;
protected:
public:
- PSScavengeKlassClosure(PSPromotionManager* pm) : _oop_closure(pm) { }
- void do_klass(Klass* klass) {
- // If the klass has not been dirtied we know that there's
+ PSScavengeCLDClosure(PSPromotionManager* pm) : _oop_closure(pm) { }
+ void do_cld(ClassLoaderData* cld) {
+ // If the cld has not been dirtied we know that there's
// no references into the young gen and we can skip it.
- if (klass->has_modified_oops()) {
- // Clean the klass since we're going to scavenge all the metadata.
- klass->clear_modified_oops();
-
- // Setup the promotion manager to redirty this klass
+ if (cld->has_modified_oops()) {
+ // Setup the promotion manager to redirty this cld
// if references are left in the young gen.
- _oop_closure.set_scanned_klass(klass);
+ _oop_closure.set_scanned_cld(cld);
- klass->oops_do(&_oop_closure);
+ // Clean the cld since we're going to scavenge all the metadata.
+ cld->oops_do(&_oop_closure, false, /*clear_modified_oops*/true);
- _oop_closure.set_scanned_klass(NULL);
+ _oop_closure.set_scanned_cld(NULL);
}
}
};
+
#endif // SHARE_VM_GC_PARALLEL_PSSCAVENGE_INLINE_HPP
diff --git a/src/hotspot/share/gc/parallel/psTasks.cpp b/src/hotspot/share/gc/parallel/psTasks.cpp
index 35e63dc52c3..3effcc6d1f6 100644
--- a/src/hotspot/share/gc/parallel/psTasks.cpp
+++ b/src/hotspot/share/gc/parallel/psTasks.cpp
@@ -79,8 +79,8 @@ void ScavengeRootsTask::do_it(GCTaskManager* manager, uint which) {
case class_loader_data:
{
- PSScavengeKlassClosure klass_closure(pm);
- ClassLoaderDataGraph::oops_do(&roots_closure, &klass_closure, false);
+ PSScavengeCLDClosure cld_closure(pm);
+ ClassLoaderDataGraph::cld_do(&cld_closure);
}
break;
diff --git a/src/hotspot/share/gc/serial/defNewGeneration.cpp b/src/hotspot/share/gc/serial/defNewGeneration.cpp
index db6977aa362..f6bf3437f3e 100644
--- a/src/hotspot/share/gc/serial/defNewGeneration.cpp
+++ b/src/hotspot/share/gc/serial/defNewGeneration.cpp
@@ -41,6 +41,7 @@
#include "gc/shared/space.inline.hpp"
#include "gc/shared/spaceDecorator.hpp"
#include "gc/shared/strongRootsScope.hpp"
+#include "gc/shared/weakProcessor.hpp"
#include "logging/log.hpp"
#include "memory/iterator.hpp"
#include "memory/resourceArea.hpp"
@@ -121,7 +122,7 @@ void DefNewGeneration::FastEvacuateFollowersClosure::do_void() {
}
ScanClosure::ScanClosure(DefNewGeneration* g, bool gc_barrier) :
- OopsInKlassOrGenClosure(g), _g(g), _gc_barrier(gc_barrier)
+ OopsInClassLoaderDataOrGenClosure(g), _g(g), _gc_barrier(gc_barrier)
{
_boundary = _g->reserved().end();
}
@@ -130,7 +131,7 @@ void ScanClosure::do_oop(oop* p) { ScanClosure::do_oop_work(p); }
void ScanClosure::do_oop(narrowOop* p) { ScanClosure::do_oop_work(p); }
FastScanClosure::FastScanClosure(DefNewGeneration* g, bool gc_barrier) :
- OopsInKlassOrGenClosure(g), _g(g), _gc_barrier(gc_barrier)
+ OopsInClassLoaderDataOrGenClosure(g), _g(g), _gc_barrier(gc_barrier)
{
_boundary = _g->reserved().end();
}
@@ -138,30 +139,28 @@ FastScanClosure::FastScanClosure(DefNewGeneration* g, bool gc_barrier) :
void FastScanClosure::do_oop(oop* p) { FastScanClosure::do_oop_work(p); }
void FastScanClosure::do_oop(narrowOop* p) { FastScanClosure::do_oop_work(p); }
-void KlassScanClosure::do_klass(Klass* klass) {
+void CLDScanClosure::do_cld(ClassLoaderData* cld) {
NOT_PRODUCT(ResourceMark rm);
- log_develop_trace(gc, scavenge)("KlassScanClosure::do_klass " PTR_FORMAT ", %s, dirty: %s",
- p2i(klass),
- klass->external_name(),
- klass->has_modified_oops() ? "true" : "false");
+ log_develop_trace(gc, scavenge)("CLDScanClosure::do_cld " PTR_FORMAT ", %s, dirty: %s",
+ p2i(cld),
+ cld->loader_name(),
+ cld->has_modified_oops() ? "true" : "false");
- // If the klass has not been dirtied we know that there's
+ // If the cld has not been dirtied we know that there's
// no references into the young gen and we can skip it.
- if (klass->has_modified_oops()) {
+ if (cld->has_modified_oops()) {
if (_accumulate_modified_oops) {
- klass->accumulate_modified_oops();
+ cld->accumulate_modified_oops();
}
- // Clear this state since we're going to scavenge all the metadata.
- klass->clear_modified_oops();
-
- // Tell the closure which Klass is being scanned so that it can be dirtied
+ // Tell the closure which CLD is being scanned so that it can be dirtied
// if oops are left pointing into the young gen.
- _scavenge_closure->set_scanned_klass(klass);
+ _scavenge_closure->set_scanned_cld(cld);
- klass->oops_do(_scavenge_closure);
+ // Clean the cld since we're going to scavenge all the metadata.
+ cld->oops_do(_scavenge_closure, false, /*clear_modified_oops*/true);
- _scavenge_closure->set_scanned_klass(NULL);
+ _scavenge_closure->set_scanned_cld(NULL);
}
}
@@ -177,12 +176,6 @@ void ScanWeakRefClosure::do_oop(narrowOop* p) { ScanWeakRefClosure::do_oop_work(
void FilteringClosure::do_oop(oop* p) { FilteringClosure::do_oop_work(p); }
void FilteringClosure::do_oop(narrowOop* p) { FilteringClosure::do_oop_work(p); }
-KlassScanClosure::KlassScanClosure(OopsInKlassOrGenClosure* scavenge_closure,
- KlassRemSet* klass_rem_set)
- : _scavenge_closure(scavenge_closure),
- _accumulate_modified_oops(klass_rem_set->accumulate_modified_oops()) {}
-
-
DefNewGeneration::DefNewGeneration(ReservedSpace rs,
size_t initial_size,
const char* policy)
@@ -629,11 +622,8 @@ void DefNewGeneration::collect(bool full,
FastScanClosure fsc_with_no_gc_barrier(this, false);
FastScanClosure fsc_with_gc_barrier(this, true);
- KlassScanClosure klass_scan_closure(&fsc_with_no_gc_barrier,
- gch->rem_set()->klass_rem_set());
- CLDToKlassAndOopClosure cld_scan_closure(&klass_scan_closure,
- &fsc_with_no_gc_barrier,
- false);
+ CLDScanClosure cld_scan_closure(&fsc_with_no_gc_barrier,
+ gch->rem_set()->cld_rem_set()->accumulate_modified_oops());
set_promo_failure_scan_stack_closure(&fsc_with_no_gc_barrier);
FastEvacuateFollowersClosure evacuate_followers(gch,
@@ -669,6 +659,13 @@ void DefNewGeneration::collect(bool full,
gc_tracer.report_tenuring_threshold(tenuring_threshold());
pt.print_all_references();
+ assert(gch->no_allocs_since_save_marks(), "save marks have not been newly set.");
+
+ WeakProcessor::weak_oops_do(&is_alive, &keep_alive);
+
+ // Verify that the usage of keep_alive didn't copy any objects.
+ assert(gch->no_allocs_since_save_marks(), "save marks have not been newly set.");
+
if (!_promotion_failed) {
// Swap the survivor spaces.
eden()->clear(SpaceDecorator::Mangle);
@@ -745,8 +742,11 @@ void DefNewGeneration::remove_forwarding_pointers() {
RemoveForwardedPointerClosure rspc;
eden()->object_iterate(&rspc);
from()->object_iterate(&rspc);
+ restore_preserved_marks();
+}
- SharedRestorePreservedMarksTaskExecutor task_executor(GenCollectedHeap::heap()->workers());
+void DefNewGeneration::restore_preserved_marks() {
+ SharedRestorePreservedMarksTaskExecutor task_executor(NULL);
_preserved_marks_set.restore(&task_executor);
}
diff --git a/src/hotspot/share/gc/serial/defNewGeneration.hpp b/src/hotspot/share/gc/serial/defNewGeneration.hpp
index e2ff971ddda..19ca2ac4707 100644
--- a/src/hotspot/share/gc/serial/defNewGeneration.hpp
+++ b/src/hotspot/share/gc/serial/defNewGeneration.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -89,6 +89,8 @@ protected:
// therefore we must remove their forwarding pointers.
void remove_forwarding_pointers();
+ virtual void restore_preserved_marks();
+
// Preserved marks
PreservedMarksSet _preserved_marks_set;
diff --git a/src/hotspot/share/gc/serial/genMarkSweep.cpp b/src/hotspot/share/gc/serial/genMarkSweep.cpp
index e7c58edf722..8f22aea7a19 100644
--- a/src/hotspot/share/gc/serial/genMarkSweep.cpp
+++ b/src/hotspot/share/gc/serial/genMarkSweep.cpp
@@ -43,6 +43,7 @@
#include "gc/shared/referencePolicy.hpp"
#include "gc/shared/space.hpp"
#include "gc/shared/strongRootsScope.hpp"
+#include "gc/shared/weakProcessor.hpp"
#include "oops/instanceRefKlass.hpp"
#include "oops/oop.inline.hpp"
#include "prims/jvmtiExport.hpp"
@@ -220,6 +221,11 @@ void GenMarkSweep::mark_sweep_phase1(bool clear_all_softrefs) {
// This is the point where the entire marking should have completed.
assert(_marking_stack.is_empty(), "Marking should have completed");
+ {
+ GCTraceTime(Debug, gc, phases) tm_m("Weak Processing", gc_timer());
+ WeakProcessor::weak_oops_do(&is_alive, &do_nothing_cl);
+ }
+
{
GCTraceTime(Debug, gc, phases) tm_m("Class Unloading", gc_timer());
diff --git a/src/hotspot/share/gc/shared/barrierSet.hpp b/src/hotspot/share/gc/shared/barrierSet.hpp
index 39ce1161127..eaec4af4fcc 100644
--- a/src/hotspot/share/gc/shared/barrierSet.hpp
+++ b/src/hotspot/share/gc/shared/barrierSet.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -80,50 +80,11 @@ public:
// End of fake RTTI support.
-public:
- enum Flags {
- None = 0,
- TargetUninitialized = 1
- };
-
protected:
- // Some barrier sets create tables whose elements correspond to parts of
- // the heap; the CardTableModRefBS is an example. Such barrier sets will
- // normally reserve space for such tables, and commit parts of the table
- // "covering" parts of the heap that are committed. At most one covered
- // region per generation is needed.
- static const int _max_covered_regions = 2;
-
BarrierSet(const FakeRtti& fake_rtti) : _fake_rtti(fake_rtti) { }
~BarrierSet() { }
public:
-
- // These operations indicate what kind of barriers the BarrierSet has.
- virtual bool has_read_ref_barrier() = 0;
- virtual bool has_read_prim_barrier() = 0;
- virtual bool has_write_ref_barrier() = 0;
- virtual bool has_write_ref_pre_barrier() = 0;
- virtual bool has_write_prim_barrier() = 0;
-
- // These functions indicate whether a particular access of the given
- // kinds requires a barrier.
- virtual bool read_ref_needs_barrier(void* field) = 0;
- virtual bool read_prim_needs_barrier(HeapWord* field, size_t bytes) = 0;
- virtual bool write_prim_needs_barrier(HeapWord* field, size_t bytes,
- juint val1, juint val2) = 0;
-
- // The first four operations provide a direct implementation of the
- // barrier set. An interpreter loop, for example, could call these
- // directly, as appropriate.
-
- // Invoke the barrier, if any, necessary when reading the given ref field.
- virtual void read_ref_field(void* field) = 0;
-
- // Invoke the barrier, if any, necessary when reading the given primitive
- // "field" of "bytes" bytes in "obj".
- virtual void read_prim_field(HeapWord* field, size_t bytes) = 0;
-
// Invoke the barrier, if any, necessary when writing "new_val" into the
// ref field at "offset" in "obj".
// (For efficiency reasons, this operation is specialized for certain
@@ -131,48 +92,19 @@ public:
// virtual "_work" function below, which must implement the barrier.)
// First the pre-write versions...
template inline void write_ref_field_pre(T* field, oop new_val);
-private:
- // Helper for write_ref_field_pre and friends, testing for specialized cases.
- bool devirtualize_reference_writes() const;
-
- // Keep this private so as to catch violations at build time.
- virtual void write_ref_field_pre_work( void* field, oop new_val) { guarantee(false, "Not needed"); };
-protected:
- virtual void write_ref_field_pre_work( oop* field, oop new_val) {};
- virtual void write_ref_field_pre_work(narrowOop* field, oop new_val) {};
-public:
// ...then the post-write version.
inline void write_ref_field(void* field, oop new_val, bool release = false);
+
protected:
+ virtual void write_ref_field_pre_work( oop* field, oop new_val) {};
+ virtual void write_ref_field_pre_work(narrowOop* field, oop new_val) {};
virtual void write_ref_field_work(void* field, oop new_val, bool release) = 0;
+
public:
-
- // Invoke the barrier, if any, necessary when writing the "bytes"-byte
- // value(s) "val1" (and "val2") into the primitive "field".
- virtual void write_prim_field(HeapWord* field, size_t bytes,
- juint val1, juint val2) = 0;
-
// Operations on arrays, or general regions (e.g., for "clone") may be
// optimized by some barriers.
- // The first six operations tell whether such an optimization exists for
- // the particular barrier.
- virtual bool has_read_ref_array_opt() = 0;
- virtual bool has_read_prim_array_opt() = 0;
- virtual bool has_write_ref_array_pre_opt() { return true; }
- virtual bool has_write_ref_array_opt() = 0;
- virtual bool has_write_prim_array_opt() = 0;
-
- virtual bool has_read_region_opt() = 0;
- virtual bool has_write_region_opt() = 0;
-
- // These operations should assert false unless the corresponding operation
- // above returns true. Otherwise, they should perform an appropriate
- // barrier for an array whose elements are all in the given memory region.
- virtual void read_ref_array(MemRegion mr) = 0;
- virtual void read_prim_array(MemRegion mr) = 0;
-
// Below length is the # array elements being written
virtual void write_ref_array_pre(oop* dst, int length,
bool dest_uninitialized = false) {}
@@ -193,17 +125,16 @@ public:
protected:
virtual void write_ref_array_work(MemRegion mr) = 0;
+
public:
- virtual void write_prim_array(MemRegion mr) = 0;
-
- virtual void read_region(MemRegion mr) = 0;
-
// (For efficiency reasons, this operation is specialized for certain
// barrier types. Semantically, it should be thought of as a call to the
// virtual "_work" function below, which must implement the barrier.)
void write_region(MemRegion mr);
+
protected:
virtual void write_region_work(MemRegion mr) = 0;
+
public:
// Inform the BarrierSet that the the covered heap region that starts
// with "base" has been changed to have the given size (possibly from 0,
diff --git a/src/hotspot/share/gc/shared/barrierSet.inline.hpp b/src/hotspot/share/gc/shared/barrierSet.inline.hpp
index d8ffd4da8e5..56b567a87d5 100644
--- a/src/hotspot/share/gc/shared/barrierSet.inline.hpp
+++ b/src/hotspot/share/gc/shared/barrierSet.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -26,37 +26,15 @@
#define SHARE_VM_GC_SHARED_BARRIERSET_INLINE_HPP
#include "gc/shared/barrierSet.hpp"
-#include "gc/shared/cardTableModRefBS.inline.hpp"
#include "utilities/align.hpp"
-// Inline functions of BarrierSet, which de-virtualize certain
-// performance-critical calls when the barrier is the most common
-// card-table kind.
-
-inline bool BarrierSet::devirtualize_reference_writes() const {
- switch (kind()) {
- case CardTableForRS:
- case CardTableExtension:
- return true;
- default:
- return false;
- }
-}
template void BarrierSet::write_ref_field_pre(T* field, oop new_val) {
- if (devirtualize_reference_writes()) {
- barrier_set_cast(this)->inline_write_ref_field_pre(field, new_val);
- } else {
- write_ref_field_pre_work(field, new_val);
- }
+ write_ref_field_pre_work(field, new_val);
}
void BarrierSet::write_ref_field(void* field, oop new_val, bool release) {
- if (devirtualize_reference_writes()) {
- barrier_set_cast(this)->inline_write_ref_field(field, new_val, release);
- } else {
- write_ref_field_work(field, new_val, release);
- }
+ write_ref_field_work(field, new_val, release);
}
// count is number of array elements being written
@@ -84,11 +62,7 @@ void BarrierSet::write_ref_array(HeapWord* start, size_t count) {
inline void BarrierSet::write_region(MemRegion mr) {
- if (devirtualize_reference_writes()) {
- barrier_set_cast(this)->inline_write_region(mr);
- } else {
- write_region_work(mr);
- }
+ write_region_work(mr);
}
#endif // SHARE_VM_GC_SHARED_BARRIERSET_INLINE_HPP
diff --git a/src/hotspot/share/gc/shared/cardTableModRefBS.hpp b/src/hotspot/share/gc/shared/cardTableModRefBS.hpp
index 80d9cb8b072..5254bfeb6f3 100644
--- a/src/hotspot/share/gc/shared/cardTableModRefBS.hpp
+++ b/src/hotspot/share/gc/shared/cardTableModRefBS.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -73,7 +73,15 @@ class CardTableModRefBS: public ModRefBarrierSet {
size_t _byte_map_size; // in bytes
jbyte* _byte_map; // the card marking array
+ // Some barrier sets create tables whose elements correspond to parts of
+ // the heap; the CardTableModRefBS is an example. Such barrier sets will
+ // normally reserve space for such tables, and commit parts of the table
+ // "covering" parts of the heap that are committed. At most one covered
+ // region per generation is needed.
+ static const int _max_covered_regions = 2;
+
int _cur_covered_regions;
+
// The covered regions should be in address order.
MemRegion* _covered;
// The committed regions correspond one-to-one to the covered regions.
@@ -89,7 +97,6 @@ class CardTableModRefBS: public ModRefBarrierSet {
// uncommit the MemRegion for that page.
MemRegion _guard_region;
- protected:
inline size_t compute_byte_map_size();
// Finds and return the index of the region, if any, to which the given
@@ -135,7 +142,6 @@ class CardTableModRefBS: public ModRefBarrierSet {
return byte_for(p) + 1;
}
- protected:
// Dirty the bytes corresponding to "mr" (not all of which must be
// covered.)
void dirty_MemRegion(MemRegion mr);
@@ -144,7 +150,7 @@ class CardTableModRefBS: public ModRefBarrierSet {
// all of which must be covered.)
void clear_MemRegion(MemRegion mr);
-public:
+ public:
// Constants
enum SomePublicConstants {
card_shift = 9,
@@ -163,8 +169,6 @@ public:
// *** Barrier set functions.
- bool has_write_ref_pre_barrier() { return false; }
-
// Initialization utilities; covered_words is the size of the covered region
// in, um, words.
inline size_t cards_required(size_t covered_words) {
@@ -173,8 +177,7 @@ public:
return words / card_size_in_words + 1;
}
-protected:
-
+ protected:
CardTableModRefBS(MemRegion whole_heap, const BarrierSet::FakeRtti& fake_rtti);
~CardTableModRefBS();
@@ -185,29 +188,18 @@ protected:
void write_ref_field_work(oop obj, size_t offset, oop newVal);
virtual void write_ref_field_work(void* field, oop newVal, bool release);
-public:
- bool has_write_ref_array_opt() { return true; }
- bool has_write_region_opt() { return true; }
-
- inline void inline_write_region(MemRegion mr) {
- dirty_MemRegion(mr);
- }
-protected:
+ protected:
void write_region_work(MemRegion mr) {
- inline_write_region(mr);
- }
-public:
-
- inline void inline_write_ref_array(MemRegion mr) {
dirty_MemRegion(mr);
}
-protected:
- void write_ref_array_work(MemRegion mr) {
- inline_write_ref_array(mr);
- }
-public:
+ protected:
+ void write_ref_array_work(MemRegion mr) {
+ dirty_MemRegion(mr);
+ }
+
+ public:
bool is_aligned(HeapWord* addr) {
return is_card_aligned(addr);
}
diff --git a/src/hotspot/share/gc/shared/cardTableModRefBS.inline.hpp b/src/hotspot/share/gc/shared/cardTableModRefBS.inline.hpp
index 7b01a379558..bce8661cf86 100644
--- a/src/hotspot/share/gc/shared/cardTableModRefBS.inline.hpp
+++ b/src/hotspot/share/gc/shared/cardTableModRefBS.inline.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -30,10 +30,10 @@
#include "runtime/orderAccess.inline.hpp"
template inline void CardTableModRefBS::inline_write_ref_field(T* field, oop newVal, bool release) {
- jbyte* byte = byte_for((void*)field);
+ volatile jbyte* byte = byte_for((void*)field);
if (release) {
// Perform a releasing store if requested.
- OrderAccess::release_store((volatile jbyte*) byte, dirty_card);
+ OrderAccess::release_store(byte, jbyte(dirty_card));
} else {
*byte = dirty_card;
}
diff --git a/src/hotspot/share/gc/shared/cardTableRS.cpp b/src/hotspot/share/gc/shared/cardTableRS.cpp
index 0c87676ce02..27d015110c4 100644
--- a/src/hotspot/share/gc/shared/cardTableRS.cpp
+++ b/src/hotspot/share/gc/shared/cardTableRS.cpp
@@ -34,16 +34,16 @@
#include "runtime/os.hpp"
#include "utilities/macros.hpp"
-class HasAccumulatedModifiedOopsClosure : public KlassClosure {
+class HasAccumulatedModifiedOopsClosure : public CLDClosure {
bool _found;
public:
HasAccumulatedModifiedOopsClosure() : _found(false) {}
- void do_klass(Klass* klass) {
+ void do_cld(ClassLoaderData* cld) {
if (_found) {
return;
}
- if (klass->has_accumulated_modified_oops()) {
+ if (cld->has_accumulated_modified_oops()) {
_found = true;
}
}
@@ -52,28 +52,29 @@ class HasAccumulatedModifiedOopsClosure : public KlassClosure {
}
};
-bool KlassRemSet::mod_union_is_clear() {
+bool CLDRemSet::mod_union_is_clear() {
HasAccumulatedModifiedOopsClosure closure;
- ClassLoaderDataGraph::classes_do(&closure);
+ ClassLoaderDataGraph::cld_do(&closure);
return !closure.found();
}
-class ClearKlassModUnionClosure : public KlassClosure {
+class ClearCLDModUnionClosure : public CLDClosure {
public:
- void do_klass(Klass* klass) {
- if (klass->has_accumulated_modified_oops()) {
- klass->clear_accumulated_modified_oops();
+ void do_cld(ClassLoaderData* cld) {
+ if (cld->has_accumulated_modified_oops()) {
+ cld->clear_accumulated_modified_oops();
}
}
};
-void KlassRemSet::clear_mod_union() {
- ClearKlassModUnionClosure closure;
- ClassLoaderDataGraph::classes_do(&closure);
+void CLDRemSet::clear_mod_union() {
+ ClearCLDModUnionClosure closure;
+ ClassLoaderDataGraph::cld_do(&closure);
}
+
CardTableRS::CardTableRS(MemRegion whole_heap) :
_bs(NULL),
_cur_youngergen_card_val(youngergenP1_card)
diff --git a/src/hotspot/share/gc/shared/cardTableRS.hpp b/src/hotspot/share/gc/shared/cardTableRS.hpp
index 5139580b61f..5713f04b5ce 100644
--- a/src/hotspot/share/gc/shared/cardTableRS.hpp
+++ b/src/hotspot/share/gc/shared/cardTableRS.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -31,11 +31,11 @@
class Space;
class OopsInGenClosure;
-// Helper to remember modified oops in all klasses.
-class KlassRemSet {
+// Helper to remember modified oops in all clds.
+class CLDRemSet {
bool _accumulate_modified_oops;
public:
- KlassRemSet() : _accumulate_modified_oops(false) {}
+ CLDRemSet() : _accumulate_modified_oops(false) {}
void set_accumulate_modified_oops(bool value) { _accumulate_modified_oops = value; }
bool accumulate_modified_oops() { return _accumulate_modified_oops; }
bool mod_union_is_clear();
@@ -64,7 +64,7 @@ class CardTableRS: public CHeapObj {
return CardTableModRefBSForCTRS::card_is_dirty_wrt_gen_iter(cv);
}
- KlassRemSet _klass_rem_set;
+ CLDRemSet _cld_rem_set;
BarrierSet* _bs;
CardTableModRefBSForCTRS* _ct_bs;
@@ -121,7 +121,7 @@ public:
// Set the barrier set.
void set_bs(BarrierSet* bs) { _bs = bs; }
- KlassRemSet* klass_rem_set() { return &_klass_rem_set; }
+ CLDRemSet* cld_rem_set() { return &_cld_rem_set; }
CardTableModRefBSForCTRS* ct_bs() { return _ct_bs; }
diff --git a/src/hotspot/share/gc/shared/collectedHeap.cpp b/src/hotspot/share/gc/shared/collectedHeap.cpp
index deb9da3cbd8..b97ae63ca19 100644
--- a/src/hotspot/share/gc/shared/collectedHeap.cpp
+++ b/src/hotspot/share/gc/shared/collectedHeap.cpp
@@ -135,14 +135,6 @@ void CollectedHeap::print_on_error(outputStream* st) const {
_barrier_set->print_on(st);
}
-void CollectedHeap::register_nmethod(nmethod* nm) {
- assert_locked_or_safepoint(CodeCache_lock);
-}
-
-void CollectedHeap::unregister_nmethod(nmethod* nm) {
- assert_locked_or_safepoint(CodeCache_lock);
-}
-
void CollectedHeap::trace_heap(GCWhen::Type when, const GCTracer* gc_tracer) {
const GCHeapSummary& heap_summary = create_heap_summary();
gc_tracer->report_gc_heap_summary(when, heap_summary);
@@ -355,7 +347,6 @@ void CollectedHeap::flush_deferred_store_barrier(JavaThread* thread) {
"Mismatch: multiple objects?");
}
BarrierSet* bs = barrier_set();
- assert(bs->has_write_region_opt(), "No write_region() on BarrierSet");
bs->write_region(deferred);
// "Clear" the deferred_card_mark field
thread->set_deferred_card_mark(MemRegion());
@@ -438,7 +429,6 @@ oop CollectedHeap::new_store_pre_barrier(JavaThread* thread, oop new_obj) {
} else {
// Do the card mark
BarrierSet* bs = barrier_set();
- assert(bs->has_write_region_opt(), "No write_region() on BarrierSet");
bs->write_region(mr);
}
}
diff --git a/src/hotspot/share/gc/shared/collectedHeap.hpp b/src/hotspot/share/gc/shared/collectedHeap.hpp
index 10b9fb302e2..740f50282b3 100644
--- a/src/hotspot/share/gc/shared/collectedHeap.hpp
+++ b/src/hotspot/share/gc/shared/collectedHeap.hpp
@@ -83,6 +83,7 @@ class GCHeapLog : public EventLogBase {
// GenCollectedHeap
// G1CollectedHeap
// ParallelScavengeHeap
+// CMSHeap
//
class CollectedHeap : public CHeapObj