This commit is contained in:
@ -2,3 +2,4 @@
@ -114,3 +114,5 @@ f75a1efb141210901aabe00a834e0fc32bb8b337 jdk7-b135
d1cf7d4ee16c341f5b8c7e7f1d68a8c412b6c693 jdk7-b137
62b8e328f8c8c66c14b0713222116f2add473f3f jdk7-b138
955488f34ca418f6cdab843d61c20d2c615637d9 jdk7-b139
f4298bc3f4b6baa315643be06966f09684290068 jdk7-b140
5d86d0c7692e8f4a58d430d68c03594e2d3403b3 jdk7-b141
@ -114,3 +114,5 @@ ddc2fcb3682ffd27f44354db666128827be7e3c3 jdk7-b134
7654afc6a29e43cb0a1343ce7f1287bf690d5e5f jdk7-b137
fc47c97bbbd91b1f774d855c48a7e285eb1a351a jdk7-b138
7ed6d0b9aaa12320832a7ddadb88d6d8d0dda4c1 jdk7-b139
dcfe74f1c6553c556e7d361c30b0b614eb5e40f6 jdk7-b140
c6569c5585851dfd39b8de8e021c3c312f51af12 jdk7-b141
@ -97,7 +97,7 @@ define StopTimer
# Generic build of basic repo series
generic_build_repo_series:: $(SOURCE_TIPS)
$(MKDIR) -p $(OUTPUTDIR)/j2sdk-image
@$(call StartTimer)
@ -243,6 +243,14 @@ product_build:: build_product_image
debug_build:: build_debug_image
fastdebug_build:: build_fastdebug_image
# The source tips are stored with the relative path to the repo.
# This file will be used when constructing the jdk image.
source_tips: $(SOURCE_TIPS)
$(CAT) $<
@$(call GetSourceTips)
$(RM) -r $(OUTPUTDIR)/*
@ -1,3 +1,4 @@
@ -114,3 +114,5 @@ e0b72ae5dc5e824b342801c8d1d336a55eb54e2c jdk7-b135
a66c01d8bf895261715955df0b95545c000ed6a8 jdk7-b137
78d8cf04697e9df54f7f11e195b7da29b8e345a2 jdk7-b138
60b074ec6fcf5cdf9efce22fdfb02326ed8fa2d3 jdk7-b139
cdf5d19ec142424489549025e9c42e51f32cf688 jdk7-b140
a58635cdd921bafef353f4864184a0481353197b jdk7-b141
@ -23,9 +23,9 @@
# questions.
orbd.usage=Uso: {0} <options> \n\nem que <options> inclui:\n -port porta de ativa\u00E7\u00E3o na qual o ORBD deve ser iniciado, default 1049 (opcional)\n -defaultdb diret\u00F3rio dos arquivos ORBD, default "./orb.db" (opcional)\n -serverid id do servidor para ORBD, default 1 (opcional)\n -ORBInitialPort porta inicial (necess\u00E1rio)\n -ORBInitialHost nome de host inicial (necess\u00E1rio)\n
orbd.usage=Uso: {0} <op\u00E7\u00F5es> \n\nem que <op\u00E7\u00F5es> inclui:\n -port porta de ativa\u00E7\u00E3o na qual o ORBD deve ser iniciado, default 1049 (opcional)\n -defaultdb diret\u00F3rio dos arquivos ORBD, default "./orb.db" (opcional)\n -serverid id do servidor para ORBD, default 1 (opcional)\n -ORBInitialPort porta inicial (obrigat\u00F3rio)\n -ORBInitialHost nome de host inicial (obrigat\u00F3rio)\n
servertool.usage=Uso: {0} <options> \n\nem que <options> inclui:\n -ORBInitialPort porta inicial (necess\u00E1rio)\n -ORBInitialHost nome de host inicial (necess\u00E1rio)\n
servertool.usage=Uso: {0} <op\u00E7\u00F5es> \n\nem que <op\u00E7\u00F5es> inclui:\n -ORBInitialPort porta inicial (obrigat\u00F3rio)\n -ORBInitialHost nome de host inicial (obrigat\u00F3rio)\n
servertool.banner=\n\nBem-vindo \u00E0 Ferramenta de Servidor IDL Java \ninsira os comandos no prompt \n
servertool.shorthelp=\n\n\tComandos Dispon\u00EDveis: \n\t------------------- \n
servertool.baddef=Defini\u00E7\u00E3o do servidor inv\u00E1lida: {0}
@ -40,23 +40,23 @@ servertool.args=\targs - {0}
servertool.vmargs=\tvmargs - {0}
servertool.serverid=\tserver id - {0}
servertool.servernotrunning=\to servidor n\u00E3o est\u00E1 em execu\u00E7\u00E3o.
servertool.register=\n\n\tregister -server <server class name> \n\t -applicationName <alternate server name> \n\t -classpath <classpath to server> \n\t -args <args to server> \n\t -vmargs <args to server Java VM>\n
servertool.register=\n\n\tregister -server <nome da classe do servidor> \n\t -applicationName <nome do servidor alternativo> \n\t -classpath <classpath para o servidor> \n\t -args <args para o servidor> \n\t -vmargs <args para a VM Java do servidor>\n
servertool.register1=registra um servidor ativ\u00E1vel
servertool.register2=\tservidor registrado (serverid = {0}).
servertool.register3=\tservidor registrado, mas em espera (serverid = {0}).
servertool.register4=\tservidor j\u00E1 registrado (serverid = {0}).
servertool.unregister=\n\tunregister [ -serverid <server id> | -applicationName <name> ] \n
servertool.unregister=\n\tunregister [ -serverid <id do servidor> | -applicationName <nome> ] \n
servertool.unregister1=cancela o registro de um servidor registrado
servertool.unregister2=\tservidor n\u00E3o registrado.
servertool.locate=\n\tlocate [ -serverid <server id> | -applicationName <name> ] [ <-endpointType <endpointType> ] \n
servertool.locate=\n\tlocate [ -serverid <id do servidor> | -applicationName <nome> ] [ <-endpointType <endpointType> ] \n
servertool.locate1=localiza portas de tipo espec\u00EDfico para um servidor registrado
servertool.locate2=\n\n\tNome do Host {0} \n\n\t\tPorta\t\tTipo de Porta\t\tId do ORB\n\t\t----\t\t---------\t\t------\n
servertool.locateorb=\n\tlocateperorb [ -serverid <server id> | -applicationName <name> ] [ -orbid <ORB name> ]\n
servertool.locateorb=\n\tlocateperorb [ -serverid <id do servidor> | -applicationName <nome> ] [ -orbid <nome ORB> ]\n
servertool.locateorb1=localiza portas para um orb espec\u00EDfico de servidor registrado
servertool.locateorb2=\n\n\tNome do Host {0} \n\n\t\tPorta\t\tTipo de Porta\t\tId do ORB\n\t\t----\t\t--------\t\t------\n
servertool.getserverid=\n\tgetserverid [ -applicationName <name> ] \n
servertool.getserverid=\n\tgetserverid [ -applicationName <nome> ] \n
servertool.getserverid1=retorna o id do servidor de um applicationName
servertool.getserverid2=\tID do Servidor de applicationName {0} \u00E9 {1}
@ -69,33 +69,33 @@ servertool.listappnames=\tlistappnames\n
servertool.listappnames1=lista os applicationNames atualmente definidos
servertool.listappnames2=applicationNames do servidor definidos atualmente:
servertool.shutdown=\n\tshutdown [ -serverid <server id> | -applicationName <name> ]\n
servertool.shutdown=\n\tshutdown [ -serverid <id do servidor> | -applicationName <nome> ]\n
servertool.shutdown1=faz shutdown de um servidor registrado
servertool.shutdown2=\tshutdown do servidor bem-sucedido.
servertool.startserver=\n\tstartup [ -serverid <server id> | -applicationName <name> ]\n
servertool.startserver=\n\tstartup [ -serverid <id do servidor> | -applicationName <nome> ]\n
servertool.startserver1=inicia um servidor registrado
servertool.startserver2=\tservidor iniciado com \u00EAxito.
servertool.quit1=sai desta ferramenta
||||\thelp\n\tOR\n\thelp <command name>\n
||||\thelp\n\tOR\n\thelp <nome do comando>\n
servertool.help1=obt\u00E9m ajuda
servertool.orbidmap=\tUso: orblist [ -serverid <server id> | -applicationName <name> ]\n
servertool.orbidmap=\tUso: orblist [ -serverid <id do servidor> | -applicationName <nome> ]\n
servertool.orbidmap1=lista de nomes de orb e seus mapeamentos
servertool.orbidmap2=\n\tId de ORB\t\tNome de ORB\n\t------\t\t--------\n
pnameserv.success=NameServer Persistente Iniciado com \u00CAxito
bootstrap.usage=Uso: {0} <options> \n\nem que <options> inclui:\n -ORBInitialPort porta inicial (necess\u00E1rio)\n -InitialServicesFile arquivo que cont\u00E9m a lista de servi\u00E7os iniciais (necess\u00E1rio)\n
bootstrap.usage=Uso: {0} <op\u00E7\u00F5es> \n\nem que <op\u00E7\u00F5es> inclui:\n -ORBInitialPort porta inicial (obrigat\u00F3rio)\n -InitialServicesFile arquivo que cont\u00E9m a lista de servi\u00E7os iniciais (obrigat\u00F3rio)\n
bootstrap.success=definindo porta para {0} e lendo servi\u00E7os de {1}
bootstrap.filenotreadable=o arquivo {0} n\u00E3o \u00E9 leg\u00EDvel
bootstrap.filenotfound=arquivo {0} n\u00E3o encontrado
bootstrap.exception=exce\u00E7\u00E3o capturada ao salvar as propriedades no Arquivo {0}: exce\u00E7\u00E3o {1}
tnameserv.exception=uma exce\u00E7\u00E3o capturada ao iniciar o servi\u00E7o de inicializa\u00E7\u00E3o na porta {0}
tnameserv.usage=tente usar outra porta com os argumentos de linha de comandos -ORBInitialPort <portno>
tnameserv.usage=tente usar outra porta com os argumentos de linha de comandos -ORBInitialPort <n\u00BA da porta>
tnameserv.invalidhostoption=ORBInitialHost n\u00E3o \u00E9 uma op\u00E7\u00E3o v\u00E1lida para NameService
tnameserv.orbinitialport0=ORBInitialPort 0 n\u00E3o \u00E9 uma op\u00E7\u00E3o v\u00E1lida para NameService
tnameserv.hs1=Contexto de Nomea\u00E7\u00E3o Inicial:\n{0}
@ -5,3 +5,4 @@
@ -166,3 +166,7 @@ bd586e392d93b7ed7a1636dcc8da2b6a4203a102 hs21-b06
0930dc920c185afbf40fed9a655290b8e5b16783 hs21-b08
611e19a16519d6fb5deea9ab565336e6e6ee475d jdk7-b139
611e19a16519d6fb5deea9ab565336e6e6ee475d hs21-b09
d283b82966712b353fa307845a1316da42a355f4 jdk7-b140
d283b82966712b353fa307845a1316da42a355f4 hs21-b10
5d07913abd59261c77f24cc04a759cb75d804099 jdk7-b141
3aea9e9feb073f5500e031be6186666bcae89aa2 hs21-b11
@ -257,7 +257,7 @@ RMIC = $(JDK_HOME)/bin/rmic
all: filelist
@mkdir -p $(OUTPUT_DIR)
$(JAVAC) -source 1.4 -classpath $(CLASSPATH) -deprecation -sourcepath $(SRC_DIR) -g -d $(OUTPUT_DIR) @filelist
$(JAVAC) -classpath $(CLASSPATH) -deprecation -sourcepath $(SRC_DIR) -g -d $(OUTPUT_DIR) @filelist
$(RMIC) -classpath $(OUTPUT_DIR) -d $(OUTPUT_DIR) sun.jvm.hotspot.debugger.remote.RemoteDebuggerServer
rm -f $(OUTPUT_DIR)/sun/jvm/hotspot/utilities/soql/sa.js
cp $(SRC_DIR)/sun/jvm/hotspot/utilities/soql/sa.js $(OUTPUT_DIR)/sun/jvm/hotspot/utilities/soql
@ -269,7 +269,7 @@ all: filelist
allprof: filelist
@mkdir -p $(OUTPUT_DIR)
$(JAVAC) -source 1.4 -J-Xprof -classpath $(CLASSPATH) -deprecation -sourcepath $(SRC_DIR) -g -d $(OUTPUT_DIR) @filelist
$(JAVAC) -J-Xprof -classpath $(CLASSPATH) -deprecation -sourcepath $(SRC_DIR) -g -d $(OUTPUT_DIR) @filelist
$(RMIC) -classpath $(OUTPUT_DIR) -d $(OUTPUT_DIR) sun.jvm.hotspot.debugger.remote.RemoteDebuggerServer
rm -f $(OUTPUT_DIR)/sun/jvm/hotspot/utilities/soql/sa.js
cp $(SRC_DIR)/sun/jvm/hotspot/utilities/soql/sa.js $(OUTPUT_DIR)/sun/jvm/hotspot/utilities/soql
@ -1,5 +1,5 @@
* Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
@ -420,7 +420,22 @@ extern uintptr_t Ppltdest(struct ps_prochandle *, uintptr_t, int *);
* Stack frame iteration interface.
#ifdef SOLARIS_11_B159_OR_LATER
/* building on Nevada-B159 or later so define the new callback */
typedef int proc_stack_f(
void *, /* the cookie given to Pstack_iter() */
const prgregset_t, /* the frame's registers */
uint_t, /* argc for the frame's function */
const long *, /* argv for the frame's function */
int, /* bitwise flags describing the frame (see below) */
int); /* a signal number */
#define PR_SIGNAL_FRAME 1 /* called by a signal handler */
#define PR_FOUND_SIGNAL 2 /* we found the corresponding signal number */
/* building on Nevada-B158 or earlier so define the old callback */
typedef int proc_stack_f(void *, const prgregset_t, uint_t, const long *);
extern int Pstack_iter(struct ps_prochandle *,
const prgregset_t, proc_stack_f *, void *);
@ -1,5 +1,5 @@
* Copyright (c) 2003, 2005, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
@ -101,7 +101,23 @@ extern int Pstop(struct ps_prochandle *, uint_t);
* Stack frame iteration interface.
#ifdef SOLARIS_11_B159_OR_LATER
/* building on Nevada-B159 or later so define the new callback */
typedef int proc_stack_f(
void *, /* the cookie given to Pstack_iter() */
const prgregset_t, /* the frame's registers */
uint_t, /* argc for the frame's function */
const long *, /* argv for the frame's function */
int, /* bitwise flags describing the frame (see below) */
int); /* a signal number */
#define PR_SIGNAL_FRAME 1 /* called by a signal handler */
#define PR_FOUND_SIGNAL 2 /* we found the corresponding signal number */
/* building on Nevada-B158 or earlier so define the old callback */
typedef int proc_stack_f(void *, const prgregset_t, uint_t, const long *);
extern int Pstack_iter(struct ps_prochandle *,
const prgregset_t, proc_stack_f *, void *);
@ -1,5 +1,5 @@
* Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
@ -24,6 +24,9 @@
#include "salibproc.h"
#include "sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal.h"
#ifndef SOLARIS_11_B159_OR_LATER
#include <sys/utsname.h>
#include <thread_db.h>
#include <strings.h>
#include <limits.h>
@ -40,8 +43,22 @@
#define SYMBOL_BUF_SIZE 256
#define ERR_MSG_SIZE (PATH_MAX + 256)
// debug mode
// debug modes
static int _libsaproc_debug = 0;
#ifndef SOLARIS_11_B159_OR_LATER
static bool _Pstack_iter_debug = false;
static void dprintf_2(const char* format,...) {
if (_Pstack_iter_debug) {
va_list alist;
va_start(alist, format);
fputs("Pstack_iter DEBUG: ", stderr);
vfprintf(stderr, format, alist);
#endif // !SOLARIS_11_B159_OR_LATER
static void print_debug(const char* format,...) {
if (_libsaproc_debug) {
@ -450,6 +467,7 @@ fill_load_object_list(void *cd, const prmap_t* pmp, const char* obj_name) {
return 0;
// Pstack_iter() proc_stack_f callback prior to Nevada-B159
static int
fill_cframe_list(void *cd, const prgregset_t regs, uint_t argc, const long *argv) {
DebuggerWith2Objects* dbgo2 = (DebuggerWith2Objects*) cd;
@ -472,6 +490,14 @@ fill_cframe_list(void *cd, const prgregset_t regs, uint_t argc, const long *argv
return 0;
// Pstack_iter() proc_stack_f callback in Nevada-B159 or later
static int
wrapper_fill_cframe_list(void *cd, const prgregset_t regs, uint_t argc,
const long *argv, int frame_flags, int sig) {
return(fill_cframe_list(cd, regs, argc, argv));
// part of the class sharing workaround
@ -970,6 +996,11 @@ JNIEXPORT void JNICALL Java_sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal_fill
#ifndef SOLARIS_11_B159_OR_LATER
// building on Nevada-B158 or earlier so more hoops to jump through
static bool has_newer_Pstack_iter = false; // older version by default
* Class: sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal
* Method: fillCFrameList0
@ -997,7 +1028,24 @@ JNIEXPORT jobject JNICALL Java_sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal_f
env->ReleaseLongArrayElements(regsArray, ptr, JNI_ABORT);
Pstack_iter((struct ps_prochandle*) p_ps_prochandle, gregs, fill_cframe_list, &dbgo2);
#ifdef SOLARIS_11_B159_OR_LATER
// building on Nevada-B159 or later so use the new callback
Pstack_iter((struct ps_prochandle*) p_ps_prochandle, gregs,
wrapper_fill_cframe_list, &dbgo2);
// building on Nevada-B158 or earlier so figure out which callback to use
if (has_newer_Pstack_iter) {
// Since we're building on Nevada-B158 or earlier, we have to
// cast wrapper_fill_cframe_list to make the compiler happy.
Pstack_iter((struct ps_prochandle*) p_ps_prochandle, gregs,
(proc_stack_f *)wrapper_fill_cframe_list, &dbgo2);
} else {
Pstack_iter((struct ps_prochandle*) p_ps_prochandle, gregs,
fill_cframe_list, &dbgo2);
#endif // SOLARIS_11_B159_OR_LATER
return dbgo2.obj;
@ -1218,6 +1266,102 @@ JNIEXPORT jstring JNICALL Java_sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal_d
return res;
#ifndef SOLARIS_11_B159_OR_LATER
// Determine if the OS we're running on has the newer version
// of libproc's Pstack_iter.
// Set env var PSTACK_ITER_DEBUG=true to debug this logic.
// Set env var PSTACK_ITER_DEBUG_RELEASE to simulate a 'release' value.
// Set env var PSTACK_ITER_DEBUG_VERSION to simulate a 'version' value.
// frankenputer 'uname -r -v': 5.10 Generic_141445-09
// jurassic 'uname -r -v': 5.11 snv_164
// lonepeak 'uname -r -v': 5.11 snv_127
static void set_has_newer_Pstack_iter(JNIEnv *env) {
static bool done_set = false;
if (done_set) {
// already set has_newer_Pstack_iter
struct utsname name;
if (uname(&name) == -1) {
dprintf_2("release='%s' version='%s'\n", name.release, name.version);
if (_Pstack_iter_debug) {
char *override = getenv("PSTACK_ITER_DEBUG_RELEASE");
if (override != NULL) {
strncpy(name.release, override, SYS_NMLN - 1);
name.release[SYS_NMLN - 2] = '\0';
dprintf_2("overriding with release='%s'\n", name.release);
override = getenv("PSTACK_ITER_DEBUG_VERSION");
if (override != NULL) {
strncpy(name.version, override, SYS_NMLN - 1);
name.version[SYS_NMLN - 2] = '\0';
dprintf_2("overriding with version='%s'\n", name.version);
// the major number corresponds to the old SunOS major number
int major = atoi(name.release);
if (major >= 6) {
dprintf_2("release is SunOS 6 or later\n");
has_newer_Pstack_iter = true;
done_set = true;
if (major < 5) {
dprintf_2("release is SunOS 4 or earlier\n");
done_set = true;
// some SunOS 5.* build so now check for Solaris versions
char *dot = strchr(name.release, '.');
int minor = 0;
if (dot != NULL) {
// release is major.minor format
*dot = NULL;
minor = atoi(dot + 1);
if (minor <= 10) {
dprintf_2("release is Solaris 10 or earlier\n");
done_set = true;
} else if (minor >= 12) {
dprintf_2("release is Solaris 12 or later\n");
has_newer_Pstack_iter = true;
done_set = true;
// some Solaris 11 build so now check for internal build numbers
if (strncmp(name.version, "snv_", 4) != 0) {
dprintf_2("release is Solaris 11 post-GA or later\n");
has_newer_Pstack_iter = true;
done_set = true;
// version begins with "snv_" so a pre-GA build of Solaris 11
int build = atoi(&name.version[4]);
if (build >= 159) {
dprintf_2("release is Nevada-B159 or later\n");
has_newer_Pstack_iter = true;
} else {
dprintf_2("release is Nevada-B158 or earlier\n");
done_set = true;
#endif // !SOLARIS_11_B159_OR_LATER
* Class: sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal
* Method: initIDs
@ -1237,6 +1381,14 @@ JNIEXPORT void JNICALL Java_sun_jvm_hotspot_debugger_proc_ProcDebuggerLocal_init
if (libproc_handle == 0)
THROW_NEW_DEBUGGER_EXCEPTION("can't load, if you are using Solaris 5.7 or below, copy from 5.8!");
#ifndef SOLARIS_11_B159_OR_LATER
_Pstack_iter_debug = getenv("PSTACK_ITER_DEBUG") != NULL;
dprintf_2("has_newer_Pstack_iter=%d\n", has_newer_Pstack_iter);
p_ps_prochandle_ID = env->GetFieldID(clazz, "p_ps_prochandle", "J");
@ -24,7 +24,8 @@
# This file defines variables and macros which are used in the makefiles to
# allow distributions to augment or replace common hotspot code with
# distribution-specific source files.
# distribution-specific source files. This capability is disabled when
# an OPENJDK build is requested, unless HS_ALT_SRC_REL has been set externally.
# Requires: GAMMADIR
# Provides:
@ -33,14 +34,17 @@
# This needs to be changed to a more generic location, but we keep it as this
# for now for compatibility
ifneq ($(OPENJDK),true)
# This needs to be changed to a more generic location, but we keep it
# as this for now for compatibility
## altsrc-equiv
# Convert a common source path to an alternative source path
@ -22,29 +22,23 @@
# The cscope.out file is made in the current directory and spans the entire
# source tree.
# Things to note:
# 1. We use relative names for cscope.
# 2. We *don't* remove the old cscope.out file, because cscope is smart
# enough to only build what has changed. It can be confused, however,
# if files are renamed or removed, so it may be necessary to manually
# remove cscope.out if a lot of reorganization has occurred.
# The cscope.out file is generated in the current directory. The old cscope.out
# file is *not* removed because cscope is smart enough to only build what has
# changed. cscope can be confused if files are renamed or removed, so it may be
# necessary to remove cscope.out (gmake cscope.clean) if a lot of reorganization
# has occurred.
include $(GAMMADIR)/make/scm.make
NAWK = /usr/xpg4/bin/awk
RM = rm -f
HG = hg
CS_TOP = ../..
CSDIRS = $(CS_TOP)/src $(CS_TOP)/make
CSCOPE = cscope
CSCOPE_OUT = cscope.out
# Allow .java files to be added from the environment (CSCLASSES=yes).
@ -61,25 +55,22 @@ ifndef CSHEADERS
RMCCHEADERS= -o -name CClassHeaders
# Use CS_GENERATED=x to include auto-generated files in the make directories.
CS_ADD_GENERATED = -o -name '*.incl'
CS_PRUNE_GENERATED = -o -name '${OS}_*_core' -o -name '${OS}_*_compiler?'
# Ignore build products.
CS_PRUNE_GENERATED = -o -name '${OSNAME}_*_core' -o \
-name '${OSNAME}_*_compiler?'
# O/S-specific files for all systems are included by default. Set CS_OS to a
# space-separated list of identifiers to include only those systems.
ifdef CS_OS
CS_PRUNE_OS = $(patsubst %,-o -name '*%*',\
$(filter-out ${CS_OS},linux macos solaris windows))
# OS-specific files for other systems are excluded by default. Use CS_OS=yes
# to include platform-specific files for other platforms.
ifndef CS_OS
CS_OS = linux macos solaris win32
CS_PRUNE_OS = $(patsubst %,-o -name '*%*',$(filter-out ${OS},${CS_OS}))
# Processor-specific files for other processors are excluded by default. Use
# CS_CPU=x to include platform-specific files for other platforms.
ifndef CS_CPU
CS_CPU = i486 sparc amd64 ia64
CS_PRUNE_CPU = $(patsubst %,-o -name '*%*',$(filter-out ${SRCARCH},${CS_CPU}))
# CPU-specific files for all processors are included by default. Set CS_CPU
# space-separated list identifiers to include only those CPUs.
ifdef CS_CPU
CS_PRUNE_CPU = $(patsubst %,-o -name '*%*',\
$(filter-out ${CS_CPU},arm ppc sparc x86 zero))
# What files should we include? A simple rule might be just those files under
@ -95,10 +86,14 @@ CS_PRUNE_STD = $(SCM_DIRS) \
-o -name '*demo' \
-o -name pkgarchive
# Placeholder for user-defined excludes.
# File names to include.
@ -114,49 +109,33 @@ CSFILENAMES = -name '*.[ch]pp' \
-o -name '*.ad' \
.PHONY: cscope cscope.clean cscope.scratch TAGS.clean FORCE
.PRECIOUS: cscope.out
cscope cscope.out: cscope.files FORCE
cscope $(CSCOPE_OUT): cscope.files FORCE
# The .raw file is reordered here in an attempt to make cscope display the most
# relevant files first.
cscope.files: .cscope.files.raw
echo "$(CSINCS)" > $@
-egrep -v "\.java|\/make\/" $< >> $@
-fgrep ".java" $< >> $@
-fgrep "/make/" $< >> $@
$(QUIETLY) $(RM) $(CSCOPE_OUT) cscope.files
.cscope.files.raw: .nametable.files
-find $(CSDIRS) -type d \( $(CS_PRUNE) \) -prune -o \
-type f \( $(CSFILENAMES) \) -print > $@
cscope.scratch: cscope.clean cscope
cscope.clean: nametable.clean
-$(RM) cscope.out cscope.files .cscope.files.raw
# The raw list is reordered so cscope displays the most relevant files first.
raw=cscope.$$$$; \
find $(CSDIRS) -type d \( $(CS_PRUNE) \) -prune -o \
-type f \( $(CSFILENAMES) \) -print > $$raw; \
{ \
echo "$(CSINCS)"; \
egrep -v "\.java|/make/" $$raw; \
fgrep ".java" $$raw; \
fgrep "/make/" $$raw; \
} > $@; \
rm -f $$raw
TAGS: cscope.files FORCE
egrep -v '^-|^$$' $< | etags --members -
TAGS.clean: nametable.clean
# .nametable.files and .nametable.files.tmp are used to determine if any files
# were added to/deleted from/renamed in the workspace. If not, then there's
# normally no need to rebuild the cscope database. To force a rebuild of
# the cscope database: gmake nametable.clean.
.nametable.files: .nametable.files.tmp
( cmp -s $@ $< ) || ( cp $< $@ )
-$(RM) $<
# `hg status' is slightly faster than `hg fstatus'. Both are
# quite a bit slower on an NFS mounted file system, so this is
# really geared towards repos on local file systems.
-$(HG) fstatus -acmn > $@
-$(RM) .nametable.files .nametable.files.tmp
.PHONY: cscope cscope.clean TAGS.clean nametable.clean FORCE
@ -35,7 +35,7 @@ HOTSPOT_VM_COPYRIGHT=Copyright 2011
Normal file
Normal file
@ -0,0 +1,32 @@
# Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
# This code is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 2 only, as
# published by the Free Software Foundation.
# This code is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# version 2 for more details (a copy is included in the LICENSE file that
# accompanied this code).
# You should have received a copy of the GNU General Public License version
# 2 along with this work; if not, write to the Free Software Foundation,
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
# or visit if you need additional information or have any
# questions.
# This file format must remain compatible with both
# GNU Makefile and Microsoft nmake formats.
# Don't put quotes (fail windows build).
COMPANY_NAME=Sun Microsystems, Inc.
@ -359,7 +359,7 @@ clean_compiler1 clean_compiler2 clean_core clean_zero clean_shark:
clean: clean_compiler2 clean_compiler1 clean_core clean_zero clean_shark clean_docs
include $(GAMMADIR)/make/$(OSNAME)/makefiles/cscope.make
include $(GAMMADIR)/make/cscope.make
@ -1,160 +0,0 @@
# Copyright (c) 2005, 2008, Oracle and/or its affiliates. All rights reserved.
# This code is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 2 only, as
# published by the Free Software Foundation.
# This code is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# version 2 for more details (a copy is included in the LICENSE file that
# accompanied this code).
# You should have received a copy of the GNU General Public License version
# 2 along with this work; if not, write to the Free Software Foundation,
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
# or visit if you need additional information or have any
# questions.
# The cscope.out file is made in the current directory and spans the entire
# source tree.
# Things to note:
# 1. We use relative names for cscope.
# 2. We *don't* remove the old cscope.out file, because cscope is smart
# enough to only build what has changed. It can be confused, however,
# if files are renamed or removed, so it may be necessary to manually
# remove cscope.out if a lot of reorganization has occurred.
include $(GAMMADIR)/make/scm.make
NAWK = awk
RM = rm -f
HG = hg
CS_TOP = ../..
CSDIRS = $(CS_TOP)/src $(CS_TOP)/build
CSCOPE = cscope
# Allow .java files to be added from the environment (CSCLASSES=yes).
ADDCLASSES= -o -name '*.java'
# Adding CClassHeaders also pushes the file count of a full workspace up about
# 200 files (these files also don't exist in a new workspace, and thus will
# cause the recreation of the database as they get created, which might seem
# a little confusing). Thus allow these files to be added from the environment
# (CSHEADERS=yes).
RMCCHEADERS= -o -name CClassHeaders
# Use CS_GENERATED=x to include auto-generated files in the build directories.
CS_ADD_GENERATED = -o -name '*.incl'
CS_PRUNE_GENERATED = -o -name '${OS}_*_core' -o -name '${OS}_*_compiler?'
# OS-specific files for other systems are excluded by default. Use CS_OS=yes
# to include platform-specific files for other platforms.
ifndef CS_OS
CS_OS = linux macos solaris win32
CS_PRUNE_OS = $(patsubst %,-o -name '*%*',$(filter-out ${OS},${CS_OS}))
# Processor-specific files for other processors are excluded by default. Use
# CS_CPU=x to include platform-specific files for other platforms.
ifndef CS_CPU
CS_CPU = i486 sparc amd64 ia64
CS_PRUNE_CPU = $(patsubst %,-o -name '*%*',$(filter-out ${SRCARCH},${CS_CPU}))
# What files should we include? A simple rule might be just those files under
# SCCS control, however this would miss files we create like the opcodes and
# CClassHeaders. The following attempts to find everything that is *useful*.
# (.del files are created by sccsrm, demo directories contain many .java files
# that probably aren't useful for development, and the pkgarchive may contain
# duplicates of files within the source hierarchy).
# Directories to exclude.
-o -name '.del-*' \
-o -name '*demo' \
-o -name pkgarchive
# File names to include.
CSFILENAMES = -name '*.[ch]pp' \
-o -name '*.[Ccshlxy]' \
-o -name '*.il' \
-o -name '*.cc' \
-o -name '*[Mm]akefile*' \
-o -name '*.gmk' \
-o -name '*.make' \
-o -name '*.ad' \
.PRECIOUS: cscope.out
cscope cscope.out: cscope.files FORCE
# The .raw file is reordered here in an attempt to make cscope display the most
# relevant files first.
cscope.files: .cscope.files.raw
echo "$(CSINCS)" > $@
-egrep -v "\.java|\/make\/" $< >> $@
-fgrep ".java" $< >> $@
-fgrep "/make/" $< >> $@
.cscope.files.raw: .nametable.files
-find $(CSDIRS) -type d \( $(CS_PRUNE) \) -prune -o \
-type f \( $(CSFILENAMES) \) -print > $@
cscope.clean: nametable.clean
-$(RM) cscope.out cscope.files .cscope.files.raw
TAGS: cscope.files FORCE
egrep -v '^-|^$$' $< | etags --members -
TAGS.clean: nametable.clean
# .nametable.files and .nametable.files.tmp are used to determine if any files
# were added to/deleted from/renamed in the workspace. If not, then there's
# normally no need to rebuild the cscope database. To force a rebuild of
# the cscope database: gmake nametable.clean.
.nametable.files: .nametable.files.tmp
( cmp -s $@ $< ) || ( cp $< $@ )
-$(RM) $<
# `hg status' is slightly faster than `hg fstatus'. Both are
# quite a bit slower on an NFS mounted file system, so this is
# really geared towards repos on local file systems.
-$(HG) fstatus -acmn > $@
-$(RM) .nametable.files .nametable.files.tmp
.PHONY: cscope cscope.clean TAGS.clean nametable.clean FORCE
@ -205,7 +205,7 @@ SONAMEFLAG = -Xlinker -soname=SONAME
SHARED_FLAG = -shared
# Keep symbols even they are not used
AOUT_FLAGS += -export-dynamic
AOUT_FLAGS += -Xlinker -export-dynamic
# Debug flags
@ -102,6 +102,10 @@ CFLAGS += $(CFLAGS/NOEX)
# Don't set excutable bit on stack segment
# the same could be done by separate execstack command
LFLAGS += -Xlinker -z -Xlinker noexecstack
LIBS += -lm -ldl -lpthread
# By default, link the *.o into the library, not the executable.
@ -296,7 +296,7 @@ clean_compiler1 clean_compiler2 clean_core clean_kernel:
clean: clean_compiler2 clean_compiler1 clean_core clean_docs clean_kernel
include $(GAMMADIR)/make/$(OSNAME)/makefiles/cscope.make
include $(GAMMADIR)/make/cscope.make
@ -1,5 +1,5 @@
# Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
# This code is free software; you can redistribute it and/or modify it
@ -56,6 +56,30 @@ else
SA_LFLAGS += -mt -xnolib -norunpath
# The libproc Pstack_iter() interface changed in Nevada-B159.
# This logic needs to match
# agent/src/os/solaris/proc/saproc.cpp: set_has_newer_Pstack_iter():
# - skip SunOS 4 or older
# - skip Solaris 10 or older
# - skip two digit Nevada builds
# - skip three digit Nevada builds thru 149
# - skip Nevada builds 150-158
$(shell uname -r -v \
| sed -n ' \
/^[0-3]\. /b \
/^5\.[0-9] /b \
/^5\.10 /b \
/ snv_[0-9][0-9]$/b \
/ snv_[01][0-4][0-9]$/b \
/ snv_15[0-8]$/b \
s/.*/-DSOLARIS_11_B159_OR_LATER/p \
# Uncomment the following to simulate building on Nevada-B159 or later
# when actually building on Nevada-B158 or earlier:
$(QUIETLY) if [ "$(BOOT_JAVA_HOME)" = "" ]; then \
echo "ALT_BOOTDIR, BOOTDIR or JAVA_HOME needs to be defined to build SA"; \
@ -68,6 +92,7 @@ $(LIBSAPROC): $(SASRCFILES) $(SAMAPFILE)
-I$(BOOT_JAVA_HOME)/include \
-I$(BOOT_JAVA_HOME)/include/$(Platform_os_family) \
-o $@ \
@ -100,11 +100,6 @@ JVM_CHECK_SYMBOLS = $(NM) -u -p $(LIBJVM.o) | \
# Some interfaces (_lwp_create) changed with LP64 and Solaris 7
$(shell uname -r | awk -F. '{ if ($$2 >= 7) print "-DSOLARIS_7_OR_LATER"; }')
# New architecture options started in SS12 (5.9), we need both styles to build.
# The older arch options for SS11 (5.8) or older and also for /usr/ccs/bin/as.
# Note: default for 32bit sparc is now the same as v8plus, so the
@ -1,5 +1,5 @@
# Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
# This code is free software; you can redistribute it and/or modify it
@ -125,7 +125,25 @@ VARIANT_TEXT=Kernel
# or make/hotspot_distro.
!if exists($(WorkSpace)\src\closed)
# if the build is for JDK6 or earlier version, it should include jdk6_hotspot_distro,
# instead of hotspot_distro.
!if "$(JDK_MAJOR_VERSION)" != "" && "$(JDK_MINOR_VERSION)" != "" && "$(JDK_MICRO_VERSION)" != ""
!if $(JDK_MAJOR_VER) == 1 && $(JDK_MINOR_VER) < 7
!if $(JDK6_OR_EARLIER) == 1
!include $(WorkSpace)\make\jdk6_hotspot_distro
!include $(WorkSpace)\make\hotspot_distro
!include $(WorkSpace)\make\openjdk_distro
@ -260,7 +278,7 @@ $(variantDir)\local.make: checks
@ echo Variant=$(realVariant) >> $@
@ echo WorkSpace=$(WorkSpace) >> $@
@ echo BootStrapDir=$(BootStrapDir) >> $@
@ if "$(USERNAME)" NEQ "" echo BuildUser=$(USERNAME) >> $@
@ if "$(USERNAME)" NEQ "" echo BuildUser=$(USERNAME) >> $@
@ echo HS_VER=$(HS_VER) >> $@
@ echo HS_DOTVER=$(HS_DOTVER) >> $@
@ -1,5 +1,5 @@
* Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
@ -806,3 +806,34 @@ intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize) - 1;
return &interpreter_frame_tos_address()[index];
#ifdef ASSERT
#define DESCRIBE_FP_OFFSET(name) \
values.describe(-1, fp() + frame::name##_offset, #name)
void frame::describe_pd(FrameValues& values, int frame_no) {
for (int w = 0; w < frame::register_save_words; w++) {
values.describe(frame_no, sp() + w, err_msg("register save area word %d", w), 1);
if (is_interpreted_frame()) {
if (!is_compiled_frame()) {
if (frame::callee_aggregate_return_pointer_words != 0) {
values.describe(frame_no, sp() + frame::callee_aggregate_return_pointer_sp_offset, "callee_aggregate_return_pointer_word");
for (int w = 0; w < frame::callee_register_argument_save_area_words; w++) {
values.describe(frame_no, sp() + frame::callee_register_argument_save_area_sp_offset + w,
err_msg("callee_register_argument_save_area_words %d", w));
@ -350,8 +350,9 @@ void MethodHandles::remove_arg_slots(MacroAssembler* _masm,
#ifndef PRODUCT
extern "C" void print_method_handle(oop mh);
void trace_method_handle_stub(const char* adaptername,
oopDesc* mh) {
printf("MH %s mh="INTPTR_FORMAT"\n", adaptername, (intptr_t) mh);
oopDesc* mh,
intptr_t* saved_sp) {
tty->print_cr("MH %s mh="INTPTR_FORMAT " saved_sp=" INTPTR_FORMAT, adaptername, (intptr_t) mh, saved_sp);
void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
@ -361,6 +362,7 @@ void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adapt
__ save_frame(16);
__ set((intptr_t) adaptername, O0);
__ mov(G3_method_handle, O1);
__ mov(I5_savedSP, O2);
__ mov(G3_method_handle, L3);
__ mov(Gargs, L4);
__ mov(G5_method_type, L5);
@ -486,7 +488,7 @@ void MethodHandles::generate_method_handle_stub(MacroAssembler* _masm, MethodHan
if (ek == _invokespecial_mh) {
// Must load & check the first argument before entering the target method.
__ load_method_handle_vmslots(O0_argslot, G3_method_handle, O1_scratch);
__ ld_ptr(__ argument_address(O0_argslot), G3_method_handle);
__ ld_ptr(__ argument_address(O0_argslot, -1), G3_method_handle);
__ null_check(G3_method_handle);
__ verify_oop(G3_method_handle);
@ -643,9 +645,10 @@ void MethodHandles::generate_method_handle_stub(MacroAssembler* _masm, MethodHan
// Live at this point:
// - G5_klass : klass required by the target method
// - O0_argslot : argslot index in vmarg; may be required in the failing path
// - O1_scratch : argument klass to test
// - G3_method_handle: adapter method handle
__ check_klass_subtype(O1_scratch, G5_klass, O0_argslot, O2_scratch, done);
__ check_klass_subtype(O1_scratch, G5_klass, O2_scratch, O3_scratch, done);
// If we get here, the type check failed!
__ load_heap_oop(G3_amh_argument, O2_required); // required class
@ -1698,35 +1698,21 @@ int AbstractInterpreter::layout_activation(methodOop method,
int local_words = method->max_locals() * Interpreter::stackElementWords;
int parm_words = method->size_of_parameters() * Interpreter::stackElementWords;
intptr_t* locals;
if (caller->is_interpreted_frame()) {
// Can force the locals area to end up properly overlapping the top of the expression stack.
intptr_t* Lesp_ptr = caller->interpreter_frame_tos_address() - 1;
// Note that this computation means we replace size_of_parameters() values from the caller
// interpreter frame's expression stack with our argument locals
locals = Lesp_ptr + parm_words;
int delta = local_words - parm_words;
int computed_sp_adjustment = (delta > 0) ? round_to(delta, WordsPerLong) : 0;
*interpreter_frame->register_addr(I5_savedSP) = (intptr_t) (fp + computed_sp_adjustment) - STACK_BIAS;
if (caller->is_compiled_frame()) {
// Compiled frames do not allocate a varargs area so place them
// next to the register save area.
locals = fp + frame::register_save_words + local_words - 1;
// Caller wants his own SP back
int caller_frame_size = caller->cb()->frame_size();
*interpreter_frame->register_addr(I5_savedSP) = (intptr_t)(caller->fp() - caller_frame_size) - STACK_BIAS;
} else {
assert(caller->is_compiled_frame() || caller->is_entry_frame(), "only possible cases");
// Don't have Lesp available; lay out locals block in the caller
// adjacent to the register window save area.
// Compiled frames do not allocate a varargs area which is why this if
// statement is needed.
if (caller->is_compiled_frame()) {
locals = fp + frame::register_save_words + local_words - 1;
} else {
locals = fp + frame::memory_parameter_word_sp_offset + local_words - 1;
if (!caller->is_entry_frame()) {
// Caller wants his own SP back
int caller_frame_size = caller->cb()->frame_size();
*interpreter_frame->register_addr(I5_savedSP) = (intptr_t)(caller->fp() - caller_frame_size) - STACK_BIAS;
assert(caller->is_interpreted_frame() || caller->is_entry_frame(), "only possible cases");
// The entry and interpreter frames are laid out like normal C
// frames so place the locals adjacent to the varargs area.
locals = fp + frame::memory_parameter_word_sp_offset + local_words - 1;
if (caller->is_interpreted_frame()) {
*interpreter_frame->register_addr(I5_savedSP) = (intptr_t) (fp + rounded_cls) - STACK_BIAS;
if (TraceDeoptimization) {
@ -3293,8 +3293,6 @@ void TemplateTable::invokedynamic(int byte_no) {
/*virtual*/ false, /*vfinal*/ false, /*indy*/ true);
__ mov(SP, O5_savedSP); // record SP that we wanted the callee to restore
__ verify_oop(G5_callsite);
// profile this call
__ profile_call(O4);
@ -3307,8 +3305,10 @@ void TemplateTable::invokedynamic(int byte_no) {
__ sll(Rret, LogBytesPerWord, Rret);
__ ld_ptr(Rtemp, Rret, Rret); // get return address
__ verify_oop(G5_callsite);
__ load_heap_oop(G5_callsite, __ delayed_value(java_lang_invoke_CallSite::target_offset_in_bytes, Rscratch), G3_method_handle);
__ null_check(G3_method_handle);
__ verify_oop(G3_method_handle);
// Adjust Rret first so Llast_SP can be same as Rret
__ add(Rret, -frame::pc_return_offset, O7);
@ -6039,6 +6039,43 @@ void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register
call_VM_leaf(entry_point, 3);
void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
pass_arg0(this, arg_0);
MacroAssembler::call_VM_leaf_base(entry_point, 1);
void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
pass_arg1(this, arg_1);
pass_arg0(this, arg_0);
MacroAssembler::call_VM_leaf_base(entry_point, 2);
void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
pass_arg2(this, arg_2);
LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
pass_arg1(this, arg_1);
pass_arg0(this, arg_0);
MacroAssembler::call_VM_leaf_base(entry_point, 3);
void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg"));
LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg"));
LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg"));
pass_arg3(this, arg_3);
LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg"));
LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg"));
pass_arg2(this, arg_2);
LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg"));
pass_arg1(this, arg_1);
pass_arg0(this, arg_0);
MacroAssembler::call_VM_leaf_base(entry_point, 4);
void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
@ -1655,6 +1655,14 @@ class MacroAssembler: public Assembler {
void call_VM_leaf(address entry_point,
Register arg_1, Register arg_2, Register arg_3);
// These always tightly bind to MacroAssembler::call_VM_leaf_base
// bypassing the virtual implementation
void super_call_VM_leaf(address entry_point);
void super_call_VM_leaf(address entry_point, Register arg_1);
void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4);
// last Java Frame (fills frame anchor)
void set_last_Java_frame(Register thread,
Register last_java_sp,
@ -1,5 +1,5 @@
* Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
@ -669,3 +669,23 @@ intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
return &interpreter_frame_tos_address()[index];
#ifdef ASSERT
#define DESCRIBE_FP_OFFSET(name) \
values.describe(-1, fp() + frame::name##_offset, #name)
void frame::describe_pd(FrameValues& values, int frame_no) {
if (is_interpreted_frame()) {
@ -383,32 +383,6 @@ void InterpreterMacroAssembler::store_ptr(int n, Register val) {
movptr(Address(rsp, Interpreter::expr_offset_in_bytes(n)), val);
void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point) {
MacroAssembler::call_VM_leaf_base(entry_point, 0);
void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point, Register arg_1) {
MacroAssembler::call_VM_leaf_base(entry_point, 1);
void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
MacroAssembler::call_VM_leaf_base(entry_point, 2);
void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
MacroAssembler::call_VM_leaf_base(entry_point, 3);
void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {
// set sender sp
lea(rsi, Address(rsp, wordSize));
@ -1,5 +1,5 @@
* Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
@ -124,12 +124,6 @@ class InterpreterMacroAssembler: public MacroAssembler {
void load_ptr(int n, Register val);
void store_ptr(int n, Register val);
// Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
void super_call_VM_leaf(address entry_point);
void super_call_VM_leaf(address entry_point, Register arg_1);
void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
// Generate a subtype check: branch to ok_is_subtype if sub_klass is
// a subtype of super_klass. EAX holds the super_klass. Blows ECX
// and EDI. Register sub_klass cannot be any of the above.
@ -381,56 +381,6 @@ void InterpreterMacroAssembler::store_ptr(int n, Register val) {
void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point) {
MacroAssembler::call_VM_leaf_base(entry_point, 0);
void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point,
Register arg_1) {
if (c_rarg0 != arg_1) {
mov(c_rarg0, arg_1);
MacroAssembler::call_VM_leaf_base(entry_point, 1);
void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point,
Register arg_1,
Register arg_2) {
assert(c_rarg0 != arg_2, "smashed argument");
assert(c_rarg1 != arg_1, "smashed argument");
if (c_rarg0 != arg_1) {
mov(c_rarg0, arg_1);
if (c_rarg1 != arg_2) {
mov(c_rarg1, arg_2);
MacroAssembler::call_VM_leaf_base(entry_point, 2);
void InterpreterMacroAssembler::super_call_VM_leaf(address entry_point,
Register arg_1,
Register arg_2,
Register arg_3) {
assert(c_rarg0 != arg_2, "smashed argument");
assert(c_rarg0 != arg_3, "smashed argument");
assert(c_rarg1 != arg_1, "smashed argument");
assert(c_rarg1 != arg_3, "smashed argument");
assert(c_rarg2 != arg_1, "smashed argument");
assert(c_rarg2 != arg_2, "smashed argument");
if (c_rarg0 != arg_1) {
mov(c_rarg0, arg_1);
if (c_rarg1 != arg_2) {
mov(c_rarg1, arg_2);
if (c_rarg2 != arg_3) {
mov(c_rarg2, arg_3);
MacroAssembler::call_VM_leaf_base(entry_point, 3);
void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {
// set sender sp
lea(r13, Address(rsp, wordSize));
@ -1,5 +1,5 @@
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
@ -136,13 +136,6 @@ class InterpreterMacroAssembler: public MacroAssembler {
void load_ptr(int n, Register val);
void store_ptr(int n, Register val);
// Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
void super_call_VM_leaf(address entry_point);
void super_call_VM_leaf(address entry_point, Register arg_1);
void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
void super_call_VM_leaf(address entry_point,
Register arg_1, Register arg_2, Register arg_3);
// Generate a subtype check: branch to ok_is_subtype if sub_klass is
// a subtype of super_klass.
void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
@ -315,56 +315,38 @@ void MethodHandles::remove_arg_slots(MacroAssembler* _masm,
#ifndef PRODUCT
extern "C" void print_method_handle(oop mh);
void trace_method_handle_stub(const char* adaptername,
oop mh,
intptr_t* saved_regs,
intptr_t* entry_sp,
intptr_t* saved_sp,
intptr_t* saved_bp) {
oop mh,
intptr_t* sp) {
// called as a leaf from native code: do not block the JVM!
intptr_t* last_sp = (intptr_t*) saved_bp[frame::interpreter_frame_last_sp_offset];
intptr_t* base_sp = (intptr_t*) saved_bp[frame::interpreter_frame_monitor_block_top_offset];
adaptername, (intptr_t)mh, (intptr_t)entry_sp, (intptr_t)(saved_sp - entry_sp), (intptr_t)(base_sp - last_sp), (intptr_t)saved_bp);
if (last_sp != saved_sp && last_sp != NULL)
printf("*** last_sp="INTPTR_FORMAT"\n", (intptr_t)last_sp);
intptr_t* entry_sp = sp + LP64_ONLY(16) NOT_LP64(8);
tty->print_cr("MH %s mh="INTPTR_FORMAT" sp="INTPTR_FORMAT" saved_sp="INTPTR_FORMAT")",
adaptername, (intptr_t)mh, (intptr_t)entry_sp, saved_sp);
if (Verbose) {
printf(" reg dump: ");
int saved_regs_count = (entry_sp-1) - saved_regs;
// 32 bit: rdi rsi rbp rsp; rbx rdx rcx (*) rax
int i;
for (i = 0; i <= saved_regs_count; i++) {
if (i > 0 && i % 4 == 0 && i != saved_regs_count)
printf("\n + dump: ");
printf(" %d: "INTPTR_FORMAT, i, saved_regs[i]);
int stack_dump_count = 16;
if (stack_dump_count < (int)(saved_bp + 2 - saved_sp))
stack_dump_count = (int)(saved_bp + 2 - saved_sp);
if (stack_dump_count > 64) stack_dump_count = 48;
for (i = 0; i < stack_dump_count; i += 4) {
i, (intptr_t) &entry_sp[i+0], entry_sp[i+0], entry_sp[i+1], entry_sp[i+2], entry_sp[i+3]);
void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
if (!TraceMethodHandles) return;
BLOCK_COMMENT("trace_method_handle {");
__ push(rax);
__ lea(rax, Address(rsp, wordSize*6)); // entry_sp
__ pusha();
#ifdef _LP64
// Pass arguments carefully since the registers overlap with the calling convention.
// rcx: method handle
// r13: saved sp
__ mov(c_rarg2, rcx); // mh
__ mov(c_rarg1, r13); // saved sp
__ mov(c_rarg3, rsp); // sp
__ movptr(c_rarg0, (intptr_t) adaptername);
__ super_call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub), c_rarg0, c_rarg1, c_rarg2, c_rarg3);
// arguments:
__ push(rbp); // interpreter frame pointer
__ push(rsi); // saved_sp
__ push(rax); // entry_sp
__ push(rcx); // mh
__ push(rcx);
__ movptr(Address(rsp, 0), (intptr_t) adaptername);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub), 5);
// rcx: method handle
// rsi: saved sp
__ movptr(rbx, (intptr_t) adaptername);
__ super_call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub), rbx, rsi, rcx, rsp);
__ popa();
__ pop(rax);
BLOCK_COMMENT("} trace_method_handle");
#endif //PRODUCT
@ -422,7 +422,7 @@ void TemplateTable::fast_aldc(bool wide) {
Label L_done, L_throw_exception;
const Register con_klass_temp = rcx; // same as Rcache
__ movptr(con_klass_temp, Address(rax, oopDesc::klass_offset_in_bytes()));
__ load_klass(con_klass_temp, rax);
__ cmpptr(con_klass_temp, ExternalAddress((address)Universe::systemObjArrayKlassObj_addr()));
__ jcc(Assembler::notEqual, L_done);
__ cmpl(Address(rax, arrayOopDesc::length_offset_in_bytes()), 0);
@ -432,7 +432,7 @@ void TemplateTable::fast_aldc(bool wide) {
// Load the exception from the system-array which wraps it:
__ bind(L_throw_exception);
__ movptr(rax, Address(rax, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
__ load_heap_oop(rax, Address(rax, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
__ jump(ExternalAddress(Interpreter::throw_exception_entry()));
__ bind(L_done);
@ -946,9 +946,9 @@ void TemplateTable::aastore() {
__ jcc(Assembler::zero, is_null);
// Move subklass into EBX
__ movptr(rbx, Address(rax, oopDesc::klass_offset_in_bytes()));
__ load_klass(rbx, rax);
// Move superklass into EAX
__ movptr(rax, Address(rdx, oopDesc::klass_offset_in_bytes()));
__ load_klass(rax, rdx);
__ movptr(rax, Address(rax, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes()));
// Compress array+index*wordSize+12 into a single register. Frees ECX.
__ lea(rdx, element_address);
@ -2001,7 +2001,7 @@ void TemplateTable::_return(TosState state) {
if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
assert(state == vtos, "only valid state");
__ movptr(rax, aaddress(0));
__ movptr(rdi, Address(rax, oopDesc::klass_offset_in_bytes()));
__ load_klass(rdi, rax);
__ movl(rdi, Address(rdi, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc)));
__ testl(rdi, JVM_ACC_HAS_FINALIZER);
Label skip_register_finalizer;
@ -2948,7 +2948,7 @@ void TemplateTable::invokevirtual_helper(Register index, Register recv,
// get receiver klass
__ null_check(recv, oopDesc::klass_offset_in_bytes());
// Keep recv in rcx for callee expects it there
__ movptr(rax, Address(recv, oopDesc::klass_offset_in_bytes()));
__ load_klass(rax, recv);
__ verify_oop(rax);
// profile this call
@ -3028,7 +3028,7 @@ void TemplateTable::invokeinterface(int byte_no) {
// Get receiver klass into rdx - also a null check
__ restore_locals(); // restore rdi
__ movptr(rdx, Address(rcx, oopDesc::klass_offset_in_bytes()));
__ load_klass(rdx, rcx);
__ verify_oop(rdx);
// profile this call
@ -3083,6 +3083,7 @@ void TemplateTable::invokeinterface(int byte_no) {
void TemplateTable::invokedynamic(int byte_no) {
transition(vtos, vtos);
assert(byte_no == f1_oop, "use this argument");
if (!EnableInvokeDynamic) {
// We should not encounter this bytecode if !EnableInvokeDynamic.
@ -3095,7 +3096,6 @@ void TemplateTable::invokedynamic(int byte_no) {
assert(byte_no == f1_oop, "use this argument");
prepare_invoke(rax, rbx, byte_no);
// rax: CallSite object (f1)
@ -3106,14 +3106,14 @@ void TemplateTable::invokedynamic(int byte_no) {
Register rax_callsite = rax;
Register rcx_method_handle = rcx;
if (ProfileInterpreter) {
// %%% should make a type profile for any invokedynamic that takes a ref argument
// profile this call
__ profile_call(rsi);
// %%% should make a type profile for any invokedynamic that takes a ref argument
// profile this call
__ profile_call(rsi);
__ movptr(rcx_method_handle, Address(rax_callsite, __ delayed_value(java_lang_invoke_CallSite::target_offset_in_bytes, rcx)));
__ verify_oop(rax_callsite);
__ load_heap_oop(rcx_method_handle, Address(rax_callsite, __ delayed_value(java_lang_invoke_CallSite::target_offset_in_bytes, rdx)));
__ null_check(rcx_method_handle);
__ verify_oop(rcx_method_handle);
__ prepare_to_jump_from_interpreted();
__ jump_to_method_handle_entry(rcx_method_handle, rdx);
@ -3258,7 +3258,7 @@ void TemplateTable::_new() {
(int32_t)markOopDesc::prototype()); // header
__ pop(rcx); // get saved klass back in the register.
__ movptr(Address(rax, oopDesc::klass_offset_in_bytes()), rcx); // klass
__ store_klass(rax, rcx); // klass
SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
@ -3333,7 +3333,7 @@ void TemplateTable::checkcast() {
__ movptr(rax, Address(rcx, rbx, Address::times_ptr, sizeof(constantPoolOopDesc)));
__ bind(resolved);
__ movptr(rbx, Address(rdx, oopDesc::klass_offset_in_bytes()));
__ load_klass(rbx, rdx);
// Generate subtype check. Blows ECX. Resets EDI. Object in EDX.
// Superklass in EAX. Subklass in EBX.
@ -3376,12 +3376,12 @@ void TemplateTable::instanceof() {
__ push(atos);
call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc) );
__ pop_ptr(rdx);
__ movptr(rdx, Address(rdx, oopDesc::klass_offset_in_bytes()));
__ load_klass(rdx, rdx);
__ jmp(resolved);
// Get superklass in EAX and subklass in EDX
__ bind(quicked);
__ movptr(rdx, Address(rax, oopDesc::klass_offset_in_bytes()));
__ load_klass(rdx, rax);
__ movptr(rax, Address(rcx, rbx, Address::times_ptr, sizeof(constantPoolOopDesc)));
__ bind(resolved);
@ -436,7 +436,7 @@ void TemplateTable::fast_aldc(bool wide) {
Label L_done, L_throw_exception;
const Register con_klass_temp = rcx; // same as cache
const Register array_klass_temp = rdx; // same as index
__ movptr(con_klass_temp, Address(rax, oopDesc::klass_offset_in_bytes()));
__ load_klass(con_klass_temp, rax);
__ lea(array_klass_temp, ExternalAddress((address)Universe::systemObjArrayKlassObj_addr()));
__ cmpptr(con_klass_temp, Address(array_klass_temp, 0));
__ jcc(Assembler::notEqual, L_done);
@ -447,7 +447,7 @@ void TemplateTable::fast_aldc(bool wide) {
// Load the exception from the system-array which wraps it:
__ bind(L_throw_exception);
__ movptr(rax, Address(rax, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
__ load_heap_oop(rax, Address(rax, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
__ jump(ExternalAddress(Interpreter::throw_exception_entry()));
__ bind(L_done);
@ -3137,7 +3137,6 @@ void TemplateTable::invokedynamic(int byte_no) {
assert(byte_no == f1_oop, "use this argument");
prepare_invoke(rax, rbx, byte_no);
// rax: CallSite object (f1)
@ -3148,14 +3147,14 @@ void TemplateTable::invokedynamic(int byte_no) {
Register rax_callsite = rax;
Register rcx_method_handle = rcx;
if (ProfileInterpreter) {
// %%% should make a type profile for any invokedynamic that takes a ref argument
// profile this call
__ profile_call(r13);
// %%% should make a type profile for any invokedynamic that takes a ref argument
// profile this call
__ profile_call(r13);
__ load_heap_oop(rcx_method_handle, Address(rax_callsite, __ delayed_value(java_lang_invoke_CallSite::target_offset_in_bytes, rcx)));
__ verify_oop(rax_callsite);
__ load_heap_oop(rcx_method_handle, Address(rax_callsite, __ delayed_value(java_lang_invoke_CallSite::target_offset_in_bytes, rdx)));
__ null_check(rcx_method_handle);
__ verify_oop(rcx_method_handle);
__ prepare_to_jump_from_interpreted();
__ jump_to_method_handle_entry(rcx_method_handle, rdx);
@ -441,12 +441,25 @@ void VM_Version::get_processor_features() {
// On family 21 processors default is no sw prefetch
if ( cpu_family() == 21 ) {
// some defaults for AMD family 15h
if ( cpu_family() == 0x15 ) {
// On family 15h processors default is no sw prefetch
if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
AllocatePrefetchStyle = 0;
// Also, if some other prefetch style is specified, default instruction type is PREFETCHW
if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
AllocatePrefetchInstr = 3;
// On family 15h processors use XMM and UnalignedLoadStores for Array Copy
if( FLAG_IS_DEFAULT(UseXMMForArrayCopy) ) {
UseXMMForArrayCopy = true;
if( FLAG_IS_DEFAULT(UseUnalignedLoadStores) && UseXMMForArrayCopy ) {
UseUnalignedLoadStores = true;
if( is_intel() ) { // Intel cpus specific settings
@ -12988,6 +12988,53 @@ instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
ins_pipe( pipe_slow );
// ============================================================================
// Counted Loop limit node which represents exact final iterator value.
// Note: the resulting value should fit into integer range since
// counted loops have limit check on overflow.
instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
match(Set limit (LoopLimit (Binary init limit) stride));
effect(TEMP limit_hi, TEMP tmp, KILL flags);
format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
ins_encode %{
int strd = (int)$stride$$constant;
assert(strd != 1 && strd != -1, "sanity");
int m1 = (strd > 0) ? 1 : -1;
// Convert limit to long (EAX:EDX)
__ cdql();
// Convert init to long (init:tmp)
__ movl($tmp$$Register, $init$$Register);
__ sarl($tmp$$Register, 31);
// $limit - $init
__ subl($limit$$Register, $init$$Register);
__ sbbl($limit_hi$$Register, $tmp$$Register);
// + ($stride - 1)
if (strd > 0) {
__ addl($limit$$Register, (strd - 1));
__ adcl($limit_hi$$Register, 0);
__ movl($tmp$$Register, strd);
} else {
__ addl($limit$$Register, (strd + 1));
__ adcl($limit_hi$$Register, -1);
__ lneg($limit_hi$$Register, $limit$$Register);
__ movl($tmp$$Register, -strd);
// signed devision: (EAX:EDX) / pos_stride
__ idivl($tmp$$Register);
if (strd < 0) {
// restore sign
__ negl($tmp$$Register);
// (EAX) * stride
__ mull($tmp$$Register);
// + init (ignore upper bits)
__ addl($limit$$Register, $init$$Register);
ins_pipe( pipe_slow );
// ============================================================================
// Branch Instructions
// Jump Table
@ -1,5 +1,5 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
@ -29,19 +29,25 @@
// Defines Linux specific flags. They are not available on other platforms.
#define RUNTIME_OS_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct) \
product(bool, UseOprofile, false, \
"enable support for Oprofile profiler") \
product(bool, UseLinuxPosixThreadCPUClocks, true, \
"enable fast Linux Posix clocks where available")
// NB: The default value of UseLinuxPosixThreadCPUClocks may be
// overridden in Arguments::parse_each_vm_init_arg.
product(bool, UseOprofile, false, \
"enable support for Oprofile profiler") \
product(bool, UseLinuxPosixThreadCPUClocks, true, \
"enable fast Linux Posix clocks where available") \
/* NB: The default value of UseLinuxPosixThreadCPUClocks may be \
overridden in Arguments::parse_each_vm_init_arg. */ \
product(bool, UseHugeTLBFS, false, \
"Use MAP_HUGETLB for large pages") \
product(bool, UseSHM, false, \
"Use SYSV shared memory for large pages")
// Defines Linux-specific default values. The flags are available on all
// platforms, but they may have different default values on other platforms.
define_pd_global(bool, UseLargePages, false);
define_pd_global(bool, UseLargePages, true);
define_pd_global(bool, UseLargePagesIndividualAllocation, false);
define_pd_global(bool, UseOSErrorReporting, false);
define_pd_global(bool, UseThreadPriorities, true) ;
@ -2465,16 +2465,40 @@ bool os::commit_memory(char* addr, size_t size, bool exec) {
return res != (uintptr_t) MAP_FAILED;
// Define MAP_HUGETLB here so we can build HotSpot on old systems.
#define MAP_HUGETLB 0x40000
// Define MADV_HUGEPAGE here so we can build HotSpot on old systems.
#define MADV_HUGEPAGE 14
bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
bool exec) {
if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
uintptr_t res =
(uintptr_t) ::mmap(addr, size, prot,
-1, 0);
return res != (uintptr_t) MAP_FAILED;
return commit_memory(addr, size, exec);
void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
// We don't check the return value: madvise(MADV_HUGEPAGE) may not
// be supported or the memory may already be backed by huge pages.
::madvise(addr, bytes, MADV_HUGEPAGE);
void os::free_memory(char *addr, size_t bytes) {
::mmap(addr, bytes, PROT_READ | PROT_WRITE,
::madvise(addr, bytes, MADV_DONTNEED);
void os::numa_make_global(char *addr, size_t bytes) {
@ -2812,6 +2836,43 @@ bool os::unguard_memory(char* addr, size_t size) {
return linux_mprotect(addr, size, PROT_READ|PROT_WRITE);
bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) {
bool result = false;
void *p = mmap (NULL, page_size, PROT_READ|PROT_WRITE,
-1, 0);
if (p != (void *) -1) {
// We don't know if this really is a huge page or not.
FILE *fp = fopen("/proc/self/maps", "r");
if (fp) {
while (!feof(fp)) {
char chars[257];
long x = 0;
if (fgets(chars, sizeof(chars), fp)) {
if (sscanf(chars, "%lx-%*lx", &x) == 1
&& x == (long)p) {
if (strstr (chars, "hugepage")) {
result = true;
munmap (p, page_size);
if (result)
return true;
if (warn) {
warning("HugeTLBFS is not supported by the operating system.");
return result;
* Set the coredump_filter bits to include largepages in core dump (bit 6)
@ -2853,8 +2914,22 @@ static void set_coredump_filter(void) {
static size_t _large_page_size = 0;
bool os::large_page_init() {
if (!UseLargePages) return false;
void os::large_page_init() {
if (!UseLargePages) {
UseHugeTLBFS = false;
UseSHM = false;
// If UseLargePages is specified on the command line try both methods,
// if it's default, then try only HugeTLBFS.
if (FLAG_IS_DEFAULT(UseLargePages)) {
UseHugeTLBFS = true;
} else {
UseHugeTLBFS = UseSHM = true;
if (LargePageSizeInBytes) {
_large_page_size = LargePageSizeInBytes;
@ -2899,20 +2974,24 @@ bool os::large_page_init() {
// print a warning if any large page related flag is specified on command line
bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS);
const size_t default_page_size = (size_t)Linux::page_size();
if (_large_page_size > default_page_size) {
_page_sizes[0] = _large_page_size;
_page_sizes[1] = default_page_size;
_page_sizes[2] = 0;
UseHugeTLBFS = UseHugeTLBFS &&
Linux::hugetlbfs_sanity_check(warn_on_failure, _large_page_size);
if (UseHugeTLBFS)
UseSHM = false;
UseLargePages = UseHugeTLBFS || UseSHM;
// Large page support is available on 2.6 or newer kernel, some vendors
// (e.g. Redhat) have backported it to their 2.4 based distributions.
// We optimistically assume the support is available. If later it turns out
// not true, VM will automatically switch to use regular page size.
return true;
@ -2922,7 +3001,7 @@ bool os::large_page_init() {
char* os::reserve_memory_special(size_t bytes, char* req_addr, bool exec) {
// "exec" is passed in but not used. Creating the shared image for
// the code cache doesn't have an SHM_X executable permission to check.
assert(UseLargePages, "only for large pages");
assert(UseLargePages && UseSHM, "only for SHM large pages");
key_t key = IPC_PRIVATE;
char *addr;
@ -2989,16 +3068,15 @@ size_t os::large_page_size() {
return _large_page_size;
// Linux does not support anonymous mmap with large page memory. The only way
// to reserve large page memory without file backing is through SysV shared
// memory API. The entire memory region is committed and pinned upfront.
// Hopefully this will change in the future...
// HugeTLBFS allows application to commit large page memory on demand;
// with SysV SHM the entire memory region must be allocated as shared
// memory.
bool os::can_commit_large_page_memory() {
return false;
return UseHugeTLBFS;
bool os::can_execute_large_page_memory() {
return false;
return UseHugeTLBFS;
// Reserve memory at an arbitrary address, only if that area is
@ -4038,7 +4116,7 @@ jint os::init_2(void)
FLAG_SET_DEFAULT(UseLargePages, os::large_page_init());
// initialize suspend/resume support - must do this before signal_sets_init()
if (SR_initialize() != 0) {
@ -4090,6 +4168,23 @@ jint os::init_2(void)
UseNUMA = false;
// With SHM large pages we cannot uncommit a page, so there's not way
// we can make the adaptive lgrp chunk resizing work. If the user specified
// both UseNUMA and UseLargePages (or UseSHM) on the command line - warn and
// disable adaptive resizing.
if (UseNUMA && UseLargePages && UseSHM) {
UseLargePages = false;
} else {
warning("UseNUMA is not fully compatible with SHM large pages, disabling adaptive resizing");
UseAdaptiveSizePolicy = false;
UseAdaptiveNUMAChunkSizing = false;
} else {
UseNUMA = false;
if (!UseNUMA && ForceNUMA) {
UseNUMA = true;
@ -86,6 +86,9 @@ class Linux {
static void rebuild_cpu_to_node_map();
static GrowableArray<int>* cpu_to_node() { return _cpu_to_node; }
static bool hugetlbfs_sanity_check(bool warn, size_t page_size);
static void init_thread_fpu_state();
static int get_fpu_control_word();
@ -2826,7 +2826,9 @@ bool os::remove_stack_guard_pages(char* addr, size_t size) {
void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
assert((intptr_t)addr % alignment_hint == 0, "Address should be aligned.");
assert((intptr_t)(addr + bytes) % alignment_hint == 0, "End should be aligned.");
Solaris::set_mpss_range(addr, bytes, alignment_hint);
if (UseLargePages && UseMPSS) {
Solaris::set_mpss_range(addr, bytes, alignment_hint);
// Tell the OS to make the range local to the first-touching LWP
@ -3334,11 +3336,11 @@ bool os::Solaris::mpss_sanity_check(bool warn, size_t * page_size) {
return true;
bool os::large_page_init() {
void os::large_page_init() {
if (!UseLargePages) {
UseISM = false;
UseMPSS = false;
return false;
// print a warning if any large page related flag is specified on command line
@ -3359,7 +3361,6 @@ bool os::large_page_init() {
Solaris::mpss_sanity_check(warn_on_failure, &_large_page_size);
UseLargePages = UseISM || UseMPSS;
return UseLargePages;
bool os::Solaris::set_mpss_range(caddr_t start, size_t bytes, size_t align) {
@ -4990,7 +4991,7 @@ jint os::init_2(void) {
FLAG_SET_DEFAULT(UseLargePages, os::large_page_init());
// Check minimum allowable stack size for thread creation and to initialize
// the java system classes, including StackOverflowError - depends on page
@ -5044,6 +5045,20 @@ jint os::init_2(void) {
UseNUMA = false;
// ISM is not compatible with the NUMA allocator - it always allocates
// pages round-robin across the lgroups.
if (UseNUMA && UseLargePages && UseISM) {
UseLargePages = false;
} else {
warning("UseNUMA is not compatible with ISM large pages, disabling NUMA allocator");
UseNUMA = false;
} else {
UseNUMA = false;
if (!UseNUMA && ForceNUMA) {
UseNUMA = true;
@ -2762,8 +2762,8 @@ static void cleanup_after_large_page_init() {
_hToken = NULL;
bool os::large_page_init() {
if (!UseLargePages) return false;
void os::large_page_init() {
if (!UseLargePages) return;
// print a warning if any large page related flag is specified on command line
bool warn_on_failure = !FLAG_IS_DEFAULT(UseLargePages) ||
@ -2808,7 +2808,7 @@ bool os::large_page_init() {
return success;
UseLargePages = success;
// On win32, one cannot release just a part of reserved memory, it's an
@ -3561,7 +3561,7 @@ jint os::init_2(void) {
FLAG_SET_DEFAULT(UseLargePages, os::large_page_init());
// Setup Windows Exceptions
@ -93,7 +93,7 @@ inline void OrderAccess::release_store_ptr(volatile void* p, void* v)
inline void OrderAccess::store_fence(jbyte* p, jbyte v) {
__asm__ volatile ( "xchgb (%2),%0"
: "=r" (v)
: "=q" (v)
: "0" (v), "r" (p)
: "memory");
@ -155,7 +155,7 @@ inline void OrderAccess::store_ptr_fence(void** p, void* v) {
// Must duplicate definitions instead of calling store_fence because we don't want to cast away volatile.
inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) {
__asm__ volatile ( "xchgb (%2),%0"
: "=r" (v)
: "=q" (v)
: "0" (v), "r" (p)
: "memory");
@ -1026,9 +1026,21 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* thread, Runtime1::StubID stub_i
// first replace the tail, then the call
#ifdef ARM
if(stub_id == Runtime1::load_klass_patching_id && !VM_Version::supports_movw()) {
nmethod* nm = CodeCache::find_nmethod(instr_pc);
oop* oop_addr = NULL;
assert(nm != NULL, "invalid nmethod_pc");
RelocIterator oops(nm, copy_buff, copy_buff + 1);
while ( {
if (oops.type() == relocInfo::oop_type) {
oop_Relocation* r = oops.oop_reloc();
oop_addr = r->oop_addr();
assert(oop_addr != NULL, "oop relocation must exist");
copy_buff -= *byte_count;
NativeMovConstReg* n_copy2 = nativeMovConstReg_at(copy_buff);
n_copy2->set_data((intx) (load_klass()), instr_pc);
n_copy2->set_pc_relative_offset((address)oop_addr, instr_pc);
@ -232,14 +232,7 @@ void BCEscapeAnalyzer::invoke(StateInfo &state, Bytecodes::Code code, ciMethod*
// compute size of arguments
int arg_size = target->arg_size();
if (code == Bytecodes::_invokedynamic) {
assert(!target->is_static(), "receiver explicit in method");
arg_size--; // implicit, not really on stack
if (!target->is_loaded() && code == Bytecodes::_invokestatic) {
int arg_size = target->invoke_arg_size(code);
int arg_base = MAX2(state._stack_height - arg_size, 0);
// direct recursive calls are skipped if they can be bound statically without introducing
@ -756,7 +756,7 @@ ciMethod* ciEnv::get_fake_invokedynamic_method_impl(constantPoolHandle cpool,
assert(bc == Bytecodes::_invokedynamic, "must be invokedynamic");
bool is_resolved = cpool->cache()->main_entry_at(index)->is_resolved(bc);
if (is_resolved && (oop) cpool->cache()->secondary_entry_at(index)->f1() == NULL)
if (is_resolved && cpool->cache()->secondary_entry_at(index)->is_f1_null())
// FIXME: code generation could allow for null (unlinked) call site
is_resolved = false;
@ -770,7 +770,7 @@ ciMethod* ciEnv::get_fake_invokedynamic_method_impl(constantPoolHandle cpool,
// Get the invoker methodOop from the constant pool.
oop f1_value = cpool->cache()->main_entry_at(index)->f1();
methodOop signature_invoker = methodOop(f1_value);
methodOop signature_invoker = (methodOop) f1_value;
assert(signature_invoker != NULL && signature_invoker->is_method() && signature_invoker->is_method_handle_invoke(),
"correct result from LinkResolver::resolve_invokedynamic");
@ -127,7 +127,24 @@ class ciMethod : public ciObject {
ciSignature* signature() const { return _signature; }
ciType* return_type() const { return _signature->return_type(); }
int arg_size_no_receiver() const { return _signature->size(); }
int arg_size() const { return _signature->size() + (_flags.is_static() ? 0 : 1); }
// Can only be used on loaded ciMethods
int arg_size() const {
return _signature->size() + (_flags.is_static() ? 0 : 1);
// Report the number of elements on stack when invoking this method.
// This is different than the regular arg_size because invokdynamic
// has an implicit receiver.
int invoke_arg_size(Bytecodes::Code code) const {
int arg_size = _signature->size();
// Add a receiver argument, maybe:
if (code != Bytecodes::_invokestatic &&
code != Bytecodes::_invokedynamic) {
return arg_size;
// Method code and related information.
address code() { if (_code == NULL) load_code(); return _code; }
@ -276,9 +293,9 @@ class ciMethod : public ciObject {
void print_short_name(outputStream* st = tty);
methodOop get_method_handle_target() {
klassOop receiver_limit_oop = NULL;
int flags = 0;
return MethodHandles::decode_method(get_oop(), receiver_limit_oop, flags);
KlassHandle receiver_limit; int flags = 0;
methodHandle m = MethodHandles::decode_method(get_oop(), receiver_limit, flags);
return m();
@ -194,6 +194,16 @@ bool ciObject::can_be_constant() {
// ciObject::should_be_constant()
bool ciObject::should_be_constant() {
if (ScavengeRootsInCode >= 2) return true; // force everybody to be a constant
if (!JavaObjectsInPerm && !is_null_object()) {
// We want Strings and Classes to be embeddable by default since
// they used to be in the perm world. Not all Strings used to be
// embeddable but there's no easy way to distinguish the interned
// from the regulars ones so just treat them all that way.
ciEnv* env = CURRENT_ENV;
if (klass() == env->String_klass() || klass() == env->Class_klass()) {
return true;
return handle() == NULL || !is_scavengable();
@ -1357,7 +1357,7 @@ class BacktraceBuilder: public StackObj {
void java_lang_Throwable::fill_in_stack_trace(Handle throwable, TRAPS) {
void java_lang_Throwable::fill_in_stack_trace(Handle throwable, methodHandle method, TRAPS) {
if (!StackTraceInThrowable) return;
ResourceMark rm(THREAD);
@ -1374,6 +1374,16 @@ void java_lang_Throwable::fill_in_stack_trace(Handle throwable, TRAPS) {
JavaThread* thread = (JavaThread*)THREAD;
BacktraceBuilder bt(CHECK);
// If there is no Java frame just return the method that was being called
// with bci 0
if (!thread->has_last_Java_frame()) {
if (max_depth >= 1 && method() != NULL) {
bt.push(method(), 0, CHECK);
set_backtrace(throwable(), bt.backtrace());
// Instead of using vframe directly, this version of fill_in_stack_trace
// basically handles everything by hand. This significantly improved the
// speed of this method call up to 28.5% on Solaris sparc. 27.1% on Windows.
@ -1477,7 +1487,7 @@ void java_lang_Throwable::fill_in_stack_trace(Handle throwable, TRAPS) {
set_backtrace(throwable(), bt.backtrace());
void java_lang_Throwable::fill_in_stack_trace(Handle throwable) {
void java_lang_Throwable::fill_in_stack_trace(Handle throwable, methodHandle method) {
// No-op if stack trace is disabled
if (!StackTraceInThrowable) {
@ -1491,7 +1501,7 @@ void java_lang_Throwable::fill_in_stack_trace(Handle throwable) {
JavaThread* thread = JavaThread::active();
fill_in_stack_trace(throwable, thread);
fill_in_stack_trace(throwable, method, thread);
// ignore exceptions thrown during stack trace filling
@ -440,8 +440,8 @@ class java_lang_Throwable: AllStatic {
static void fill_in_stack_trace_of_preallocated_backtrace(Handle throwable);
// Fill in current stack trace, can cause GC
static void fill_in_stack_trace(Handle throwable, TRAPS);
static void fill_in_stack_trace(Handle throwable);
static void fill_in_stack_trace(Handle throwable, methodHandle method, TRAPS);
static void fill_in_stack_trace(Handle throwable, methodHandle method = methodHandle());
// Programmatic access to stack trace
static oop get_stack_trace_element(oop throwable, int index, TRAPS);
static int get_stack_trace_depth(oop throwable, TRAPS);
@ -976,6 +976,15 @@ void CompileBroker::compile_method_base(methodHandle method,
// If the requesting thread is holding the pending list lock
// then we just return. We can't risk blocking while holding
// the pending list lock or a 3-way deadlock may occur
// between the reference handler thread, a GC (instigated
// by a compiler thread), and compiled method registration.
if (instanceRefKlass::owns_pending_list_lock(JavaThread::current())) {
// Outputs from the following MutexLocker block:
CompileTask* task = NULL;
bool blocking = false;
@ -1304,17 +1313,8 @@ uint CompileBroker::assign_compile_id(methodHandle method, int osr_bci) {
// Should the current thread be blocked until this compilation request
// has been fulfilled?
bool CompileBroker::is_compile_blocking(methodHandle method, int osr_bci) {
if (!BackgroundCompilation) {
Symbol* class_name = method->method_holder()->klass_part()->name();
if (class_name->starts_with("java/lang/ref/Reference", 23)) {
// The reference handler thread can dead lock with the GC if compilation is blocking,
// so we avoid blocking compiles for anything in the java.lang.ref.Reference class,
// including inner classes such as ReferenceHandler.
return false;
return true;
return false;
assert(!instanceRefKlass::owns_pending_list_lock(JavaThread::current()), "possible deadlock");
return !BackgroundCompilation;
@ -1963,10 +1963,21 @@ CompactibleFreeListSpace::gc_epilogue() {
// Iteration support, mostly delegated from a CMS generation
void CompactibleFreeListSpace::save_marks() {
// mark the "end" of the used space at the time of this call;
"Global variable should only be set when single-threaded");
// Mark the "end" of the used space at the time of this call;
// note, however, that promoted objects from this point
// on are tracked in the _promoInfo below.
#ifdef ASSERT
// Check the sanity of save_marks() etc.
MemRegion ur = used_region();
MemRegion urasm = used_region_at_save_marks();
err_msg(" Error at save_marks(): [" PTR_FORMAT "," PTR_FORMAT ")"
" should contain [" PTR_FORMAT "," PTR_FORMAT ")",
ur.start(), ur.end(), urasm.start(), urasm.end()));
// inform allocator that promotions should be tracked.
assert(_promoInfo.noPromotions(), "_promoInfo inconsistency");
@ -3189,10 +3189,9 @@ bool ConcurrentMarkSweepGeneration::is_too_full() const {
void CMSCollector::setup_cms_unloading_and_verification_state() {
const bool should_verify = VerifyBeforeGC || VerifyAfterGC || VerifyDuringGC
const bool should_verify = VerifyBeforeGC || VerifyAfterGC || VerifyDuringGC
|| VerifyBeforeExit;
const int rso = SharedHeap::SO_Symbols | SharedHeap::SO_Strings
| SharedHeap::SO_CodeCache;
const int rso = SharedHeap::SO_Strings | SharedHeap::SO_CodeCache;
if (should_unload_classes()) { // Should unload classes this cycle
remove_root_scanning_option(rso); // Shrink the root set appropriately
@ -826,6 +826,14 @@ public:
void ConcurrentMark::checkpointRootsInitialPost() {
G1CollectedHeap* g1h = G1CollectedHeap::heap();
// If we force an overflow during remark, the remark operation will
// actually abort and we'll restart concurrent marking. If we always
// force an oveflow during remark we'll never actually complete the
// marking phase. So, we initilize this here, at the start of the
// cycle, so that at the remaining overflow number will decrease at
// every remark and we'll eventually not need to cause one.
// For each region note start of marking.
NoteStartOfMarkHRClosure startcl;
@ -893,27 +901,37 @@ void ConcurrentMark::checkpointRootsInitial() {
Notice that in the next two methods, we actually leave the STS
during the barrier sync and join it immediately afterwards. If we
do not do this, this then the following deadlock can occur: one
thread could be in the barrier sync code, waiting for the other
thread to also sync up, whereas another one could be trying to
yield, while also waiting for the other threads to sync up too.
Because the thread that does the sync barrier has left the STS, it
is possible to be suspended for a Full GC or an evacuation pause
could occur. This is actually safe, since the entering the sync
barrier is one of the last things do_marking_step() does, and it
doesn't manipulate any data structures afterwards.
* Notice that in the next two methods, we actually leave the STS
* during the barrier sync and join it immediately afterwards. If we
* do not do this, the following deadlock can occur: one thread could
* be in the barrier sync code, waiting for the other thread to also
* sync up, whereas another one could be trying to yield, while also
* waiting for the other threads to sync up too.
* Note, however, that this code is also used during remark and in
* this case we should not attempt to leave / enter the STS, otherwise
* we'll either hit an asseert (debug / fastdebug) or deadlock
* (product). So we should only leave / enter the STS if we are
* operating concurrently.
* Because the thread that does the sync barrier has left the STS, it
* is possible to be suspended for a Full GC or an evacuation pause
* could occur. This is actually safe, since the entering the sync
* barrier is one of the last things do_marking_step() does, and it
* doesn't manipulate any data structures afterwards.
void ConcurrentMark::enter_first_sync_barrier(int task_num) {
if (verbose_low())
gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
if (concurrent()) {
if (concurrent()) {
// at this point everyone should have synced up and not be doing any
// more work
@ -923,7 +941,12 @@ void ConcurrentMark::enter_first_sync_barrier(int task_num) {
// let task 0 do this
if (task_num == 0) {
// task 0 is responsible for clearing the global data structures
// We should be here because of an overflow. During STW we should
// not clear the overflow flag since we rely on it being true when
// we exit this method to abort the pause and restart concurent
// marking.
clear_marking_state(concurrent() /* clear_overflow */);
if (PrintGC) {
@ -940,15 +963,45 @@ void ConcurrentMark::enter_second_sync_barrier(int task_num) {
if (verbose_low())
gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
if (concurrent()) {
if (concurrent()) {
// at this point everything should be re-initialised and ready to go
if (verbose_low())
gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
#ifndef PRODUCT
void ForceOverflowSettings::init() {
_num_remaining = G1ConcMarkForceOverflow;
_force = false;
void ForceOverflowSettings::update() {
if (_num_remaining > 0) {
_num_remaining -= 1;
_force = true;
} else {
_force = false;
bool ForceOverflowSettings::should_force() {
if (_force) {
_force = false;
return true;
} else {
return false;
#endif // !PRODUCT
void ConcurrentMark::grayRoot(oop p) {
HeapWord* addr = (HeapWord*) p;
// We can't really check against _heap_start and _heap_end, since it
@ -1117,6 +1170,7 @@ void ConcurrentMark::markFromRoots() {
_restart_for_overflow = false;
size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
set_phase(active_workers, true /* concurrent */);
CMConcurrentMarkingTask markingTask(this, cmThread());
@ -1845,7 +1899,7 @@ void ConcurrentMark::completeCleanup() {
while (!_cleanup_list.is_empty()) {
HeapRegion* hr = _cleanup_list.remove_head();
assert(hr != NULL, "the list was not empty");
// Instead of adding one region at a time to the secondary_free_list,
@ -2703,12 +2757,16 @@ void ConcurrentMark::oops_do(OopClosure* cl) {
void ConcurrentMark::clear_marking_state() {
void ConcurrentMark::clear_marking_state(bool clear_overflow) {
if (clear_overflow) {
} else {
assert(has_overflown(), "pre-condition");
_finger = _heap_start;
for (int i = 0; i < (int)_max_task_num; ++i) {
@ -4279,6 +4337,15 @@ void CMTask::do_marking_step(double time_target_ms,
// If we are about to wrap up and go into termination, check if we
// should raise the overflow flag.
if (do_termination && !has_aborted()) {
if (_cm->force_overflow()->should_force()) {
// We still haven't aborted. Now, let's try to get into the
// termination protocol.
if (do_termination && !has_aborted()) {
@ -316,6 +316,19 @@ public:
void setEmpty() { _index = 0; clear_overflow(); }
class ForceOverflowSettings VALUE_OBJ_CLASS_SPEC {
#ifndef PRODUCT
uintx _num_remaining;
bool _force;
#endif // !defined(PRODUCT)
void init() PRODUCT_RETURN;
void update() PRODUCT_RETURN;
bool should_force() PRODUCT_RETURN_( return false; );
// this will enable a variety of different statistics per GC task
#define _MARKING_STATS_ 0
// this will enable the higher verbose levels
@ -462,6 +475,9 @@ protected:
WorkGang* _parallel_workers;
ForceOverflowSettings _force_overflow_conc;
ForceOverflowSettings _force_overflow_stw;
void weakRefsWork(bool clear_all_soft_refs);
void swapMarkBitMaps();
@ -470,7 +486,7 @@ protected:
// task local ones; should be called during initial mark.
void reset();
// It resets all the marking data structures.
void clear_marking_state();
void clear_marking_state(bool clear_overflow = true);
// It should be called to indicate which phase we're in (concurrent
// mark or remark) and how many threads are currently active.
@ -547,6 +563,22 @@ protected:
void enter_first_sync_barrier(int task_num);
void enter_second_sync_barrier(int task_num);
ForceOverflowSettings* force_overflow_conc() {
return &_force_overflow_conc;
ForceOverflowSettings* force_overflow_stw() {
return &_force_overflow_stw;
ForceOverflowSettings* force_overflow() {
if (concurrent()) {
return force_overflow_conc();
} else {
return force_overflow_stw();
// Manipulation of the global mark stack.
// Notice that the first mark_stack_push is CAS-based, whereas the
@ -1161,6 +1161,7 @@ bool G1CollectedHeap::do_collection(bool explicit_gc,
TraceTime t(system_gc ? "Full GC (System.gc())" : "Full GC",
PrintGC, true, gclog_or_tty);
TraceCollectorStats tcs(g1mm()->full_collection_counters());
TraceMemoryManagerStats tms(true /* fullGC */);
double start = os::elapsedTime();
@ -1339,6 +1340,7 @@ bool G1CollectedHeap::do_collection(bool explicit_gc,
if (PrintHeapAtGC) {
return true;
@ -1971,6 +1973,10 @@ jint G1CollectedHeap::initialize() {
// Do create of the monitoring and management support so that
// values in the heap have been properly initialized.
_g1mm = new G1MonitoringSupport(this, &_g1_storage);
return JNI_OK;
@ -2113,6 +2119,28 @@ bool G1CollectedHeap::should_do_concurrent_full_gc(GCCause::Cause cause) {
(cause == GCCause::_java_lang_system_gc && ExplicitGCInvokesConcurrent));
#ifndef PRODUCT
void G1CollectedHeap::allocate_dummy_regions() {
// Let's fill up most of the region
size_t word_size = HeapRegion::GrainWords - 1024;
// And as a result the region we'll allocate will be humongous.
guarantee(isHumongous(word_size), "sanity");
for (uintx i = 0; i < G1DummyRegionsPerGC; ++i) {
// Let's use the existing mechanism for the allocation
HeapWord* dummy_obj = humongous_obj_allocate(word_size);
if (dummy_obj != NULL) {
MemRegion mr(dummy_obj, word_size);
} else {
// If we can't allocate once, we probably cannot allocate
// again. Let's get out of the loop.
#endif // !PRODUCT
void G1CollectedHeap::increment_full_collections_completed(bool concurrent) {
MonitorLockerEx x(FullGCCount_lock, Mutex::_no_safepoint_check_flag);
@ -2777,17 +2805,26 @@ void G1CollectedHeap::verify(bool allow_dirty,
bool silent,
bool use_prev_marking) {
if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) {
if (!silent) { gclog_or_tty->print("roots "); }
if (!silent) { gclog_or_tty->print("Roots (excluding permgen) "); }
VerifyRootsClosure rootsCl(use_prev_marking);
CodeBlobToOopClosure blobsCl(&rootsCl, /*do_marking=*/ false);
process_strong_roots(true, // activate StrongRootsScope
// We apply the relevant closures to all the oops in the
// system dictionary, the string table and the code cache.
const int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings | SharedHeap::SO_CodeCache;
process_strong_roots(true, // activate StrongRootsScope
true, // we set "collecting perm gen" to true,
// so we don't reset the dirty cards in the perm gen.
SharedHeap::ScanningOption(so), // roots scanning options
// Since we used "collecting_perm_gen" == true above, we will not have
// checked the refs from perm into the G1-collected heap. We check those
// references explicitly below. Whether the relevant cards are dirty
// is checked further below in the rem set verification.
if (!silent) { gclog_or_tty->print("Permgen roots "); }
bool failures = rootsCl.failures();
rem_set()->invalidate(perm_gen()->used_region(), false);
if (!silent) { gclog_or_tty->print("HeapRegionSets "); }
if (!silent) { gclog_or_tty->print("HeapRegions "); }
@ -3164,6 +3201,7 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
TraceTime t(verbose_str, PrintGC && !PrintGCDetails, true, gclog_or_tty);
TraceCollectorStats tcs(g1mm()->incremental_collection_counters());
TraceMemoryManagerStats tms(false /* fullGC */);
// If the secondary_free_list is not empty, append it to the
@ -3338,6 +3376,8 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
gclog_or_tty->print_cr("\nEnd of the pause.\nYoung_list:");
@ -3401,6 +3441,8 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) {
if (PrintHeapAtGC) {
if (G1SummarizeRSetStats &&
(G1SummarizeRSetStatsPeriod > 0) &&
(total_collections() % G1SummarizeRSetStatsPeriod == 0)) {
@ -3933,6 +3975,9 @@ void G1CollectedHeap::drain_evac_failure_scan_stack() {
G1CollectedHeap::handle_evacuation_failure_par(OopsInHeapRegionClosure* cl,
oop old) {
err_msg("obj: "PTR_FORMAT" should still be in the CSet",
(HeapWord*) old));
markOop m = old->mark();
oop forward_ptr = old->forward_to_atomic(old);
if (forward_ptr == NULL) {
@ -3955,7 +4000,13 @@ G1CollectedHeap::handle_evacuation_failure_par(OopsInHeapRegionClosure* cl,
return old;
} else {
// Someone else had a place to copy it.
// Forward-to-self failed. Either someone else managed to allocate
// space for this object (old != forward_ptr) or they beat us in
// self-forwarding it (old == forward_ptr).
assert(old == forward_ptr || !obj_in_cs(forward_ptr),
err_msg("obj: "PTR_FORMAT" forwarded to: "PTR_FORMAT" "
"should not be in the CSet",
(HeapWord*) old, (HeapWord*) forward_ptr));
return forward_ptr;
@ -4266,11 +4317,10 @@ template <class T> void G1ParCopyHelper::mark_forwardee(T* p) {
T heap_oop = oopDesc::load_heap_oop(p);
if (!oopDesc::is_null(heap_oop)) {
oop obj = oopDesc::decode_heap_oop(heap_oop);
assert((_g1->evacuation_failed()) || (!_g1->obj_in_cs(obj)),
"shouldn't still be in the CSet if evacuation didn't fail.");
HeapWord* addr = (HeapWord*)obj;
if (_g1->is_in_g1_reserved(addr))
if (_g1->is_in_g1_reserved(addr)) {
@ -4919,36 +4969,45 @@ public:
#ifndef PRODUCT
class G1VerifyCardTableCleanup: public HeapRegionClosure {
G1CollectedHeap* _g1h;
CardTableModRefBS* _ct_bs;
G1VerifyCardTableCleanup(CardTableModRefBS* ct_bs)
: _ct_bs(ct_bs) { }
G1VerifyCardTableCleanup(G1CollectedHeap* g1h, CardTableModRefBS* ct_bs)
: _g1h(g1h), _ct_bs(ct_bs) { }
virtual bool doHeapRegion(HeapRegion* r) {
MemRegion mr(r->bottom(), r->end());
if (r->is_survivor()) {
} else {
return false;
void G1CollectedHeap::verify_not_dirty_region(HeapRegion* hr) {
// All of the region should be clean.
CardTableModRefBS* ct_bs = (CardTableModRefBS*)barrier_set();
MemRegion mr(hr->bottom(), hr->end());
void G1CollectedHeap::verify_dirty_region(HeapRegion* hr) {
// We cannot guarantee that [bottom(),end()] is dirty. Threads
// dirty allocated blocks as they allocate them. The thread that
// retires each region and replaces it with a new one will do a
// maximal allocation to fill in [pre_dummy_top(),end()] but will
// not dirty that area (one less thing to have to do while holding
// a lock). So we can only verify that [bottom(),pre_dummy_top()]
// is dirty.
CardTableModRefBS* ct_bs = (CardTableModRefBS*) barrier_set();
MemRegion mr(hr->bottom(), hr->pre_dummy_top());
void G1CollectedHeap::verify_dirty_young_list(HeapRegion* head) {
CardTableModRefBS* ct_bs = (CardTableModRefBS*) (barrier_set());
CardTableModRefBS* ct_bs = (CardTableModRefBS*) barrier_set();
for (HeapRegion* hr = head; hr != NULL; hr = hr->get_next_young_region()) {
// We cannot guarantee that [bottom(),end()] is dirty. Threads
// dirty allocated blocks as they allocate them. The thread that
// retires each region and replaces it with a new one will do a
// maximal allocation to fill in [pre_dummy_top(),end()] but will
// not dirty that area (one less thing to have to do while holding
// a lock). So we can only verify that [bottom(),pre_dummy_top()]
// is dirty. Also note that verify_dirty_region() requires
// mr.start() and mr.end() to be card aligned and pre_dummy_top()
// is not guaranteed to be.
MemRegion mr(hr->bottom(),
@ -4991,7 +5050,7 @@ void G1CollectedHeap::cleanUpCardTable() {
g1_policy()->record_clear_ct_time( elapsed * 1000.0);
#ifndef PRODUCT
if (G1VerifyCTCleanup || VerifyAfterGC) {
G1VerifyCardTableCleanup cleanup_verifier(ct_bs);
G1VerifyCardTableCleanup cleanup_verifier(this, ct_bs);
@ -5314,6 +5373,7 @@ HeapRegion* G1CollectedHeap::new_mutator_alloc_region(size_t word_size,
if (new_alloc_region != NULL) {
g1_policy()->update_region_num(true /* next_is_young */);
return new_alloc_region;
@ -28,7 +28,9 @@
#include "gc_implementation/g1/concurrentMark.hpp"
#include "gc_implementation/g1/g1AllocRegion.hpp"
#include "gc_implementation/g1/g1RemSet.hpp"
#include "gc_implementation/g1/g1MonitoringSupport.hpp"
#include "gc_implementation/g1/heapRegionSets.hpp"
#include "gc_implementation/shared/hSpaceCounters.hpp"
#include "gc_implementation/parNew/parGCAllocBuffer.hpp"
#include "memory/barrierSet.hpp"
#include "memory/memRegion.hpp"
@ -57,6 +59,7 @@ class HeapRegionRemSetIterator;
class ConcurrentMark;
class ConcurrentMarkThread;
class ConcurrentG1Refine;
class GenerationCounters;
typedef OverflowTaskQueue<StarTask> RefToScanQueue;
typedef GenericTaskQueueSet<RefToScanQueue> RefToScanQueueSet;
@ -236,6 +239,9 @@ private:
// current collection.
HeapRegion* _gc_alloc_region_list;
// Helper for monitoring and management support.
G1MonitoringSupport* _g1mm;
// Determines PLAB size for a particular allocation purpose.
static size_t desired_plab_sz(GCAllocPurpose purpose);
@ -298,6 +304,14 @@ private:
// started is maintained in _total_full_collections in CollectedHeap.
volatile unsigned int _full_collections_completed;
// This is a non-product method that is helpful for testing. It is
// called at the end of a GC and artificially expands the heap by
// allocating a number of dead regions. This way we can induce very
// frequent marking cycles and stress the cleanup / concurrent
// cleanup code more (as all the regions that will be allocated by
// this method will be found dead by the marking cycle).
void allocate_dummy_regions() PRODUCT_RETURN;
// These are macros so that, if the assert fires, we get the correct
// line number, file, etc.
@ -542,6 +556,9 @@ protected:
HeapWord* expand_and_allocate(size_t word_size);
G1MonitoringSupport* g1mm() { return _g1mm; }
// Expand the garbage-first heap by at least the given size (in bytes!).
// Returns true if the heap was expanded by the requested amount;
// false otherwise.
@ -953,6 +970,8 @@ public:
// The number of regions available for "regular" expansion.
size_t expansion_regions() { return _expansion_regions; }
void verify_not_dirty_region(HeapRegion* hr) PRODUCT_RETURN;
void verify_dirty_region(HeapRegion* hr) PRODUCT_RETURN;
void verify_dirty_young_list(HeapRegion* head) PRODUCT_RETURN;
void verify_dirty_young_regions() PRODUCT_RETURN;
@ -0,0 +1,178 @@
* Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit if you need additional information or have any
* questions.
#include "precompiled.hpp"
#include "gc_implementation/g1/g1MonitoringSupport.hpp"
#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
#include "gc_implementation/g1/g1CollectorPolicy.hpp"
G1MonitoringSupport::G1MonitoringSupport(G1CollectedHeap* g1h,
VirtualSpace* g1_storage_addr) :
// Counters for GC collections
// name "collector.0". In a generational collector this would be the
// young generation collection.
_incremental_collection_counters =
new CollectorCounters("G1 incremental collections", 0);
// name "collector.1". In a generational collector this would be the
// old generation collection.
_full_collection_counters =
new CollectorCounters("G1 stop-the-world full collections", 1);
// timer sampling for all counters supporting sampling only update the
// used value. See the take_sample() method. G1 requires both used and
// capacity updated so sampling is not currently used. It might
// be sufficient to update all counters in take_sample() even though
// take_sample() only returns "used". When sampling was used, there
// were some anomolous values emitted which may have been the consequence
// of not updating all values simultaneously (i.e., see the calculation done
// in eden_space_used(), is it possbile that the values used to
// calculate either eden_used or survivor_used are being updated by
// the collector when the sample is being done?).
const bool sampled = false;
// "Generation" and "Space" counters.
// name "generation.1" This is logically the old generation in
// generational GC terms. The "1, 1" parameters are for
// the n-th generation (=1) with 1 space.
// Counters are created from minCapacity, maxCapacity, and capacity
_non_young_collection_counters =
new GenerationCounters("whole heap", 1, 1, _g1_storage_addr);
// name ""
// Counters are created from maxCapacity, capacity, initCapacity,
// and used.
_old_space_counters = new HSpaceCounters("space", 0,
_g1h->max_capacity(), _g1h->capacity(), _non_young_collection_counters);
// Young collection set
// name "generation.0". This is logically the young generation.
// The "0, 3" are paremeters for the n-th genertaion (=0) with 3 spaces.
// See _non_young_collection_counters for additional counters
_young_collection_counters = new GenerationCounters("young", 0, 3, NULL);
// Replace "max_heap_byte_size() with maximum young gen size for
// g1Collectedheap
// name ""
// See _old_space_counters for additional counters
_eden_counters = new HSpaceCounters("eden", 0,
_g1h->max_capacity(), eden_space_committed(),
// name ""
// See _old_space_counters for additional counters
// Set the arguments to indicate that this survivor space is not used.
_from_counters = new HSpaceCounters("s0", 1, (long) 0, (long) 0,
// name ""
// See _old_space_counters for additional counters
_to_counters = new HSpaceCounters("s1", 2,
size_t G1MonitoringSupport::overall_committed() {
return g1h()->capacity();
size_t G1MonitoringSupport::overall_used() {
return g1h()->used_unlocked();
size_t G1MonitoringSupport::eden_space_committed() {
return MAX2(eden_space_used(), (size_t) HeapRegion::GrainBytes);
size_t G1MonitoringSupport::eden_space_used() {
size_t young_list_length = g1h()->young_list()->length();
size_t eden_used = young_list_length * HeapRegion::GrainBytes;
size_t survivor_used = survivor_space_used();
eden_used = subtract_up_to_zero(eden_used, survivor_used);
return eden_used;
size_t G1MonitoringSupport::survivor_space_committed() {
return MAX2(survivor_space_used(),
(size_t) HeapRegion::GrainBytes);
size_t G1MonitoringSupport::survivor_space_used() {
size_t survivor_num = g1h()->g1_policy()->recorded_survivor_regions();
size_t survivor_used = survivor_num * HeapRegion::GrainBytes;
return survivor_used;
size_t G1MonitoringSupport::old_space_committed() {
size_t committed = overall_committed();
size_t eden_committed = eden_space_committed();
size_t survivor_committed = survivor_space_committed();
committed = subtract_up_to_zero(committed, eden_committed);
committed = subtract_up_to_zero(committed, survivor_committed);
committed = MAX2(committed, (size_t) HeapRegion::GrainBytes);
return committed;
// See the comment near the top of g1MonitoringSupport.hpp for
// an explanation of these calculations for "used" and "capacity".
size_t G1MonitoringSupport::old_space_used() {
size_t used = overall_used();
size_t eden_used = eden_space_used();
size_t survivor_used = survivor_space_used();
used = subtract_up_to_zero(used, eden_used);
used = subtract_up_to_zero(used, survivor_used);
return used;
void G1MonitoringSupport::update_counters() {
if (UsePerfData) {
void G1MonitoringSupport::update_eden_counters() {
if (UsePerfData) {
@ -0,0 +1,203 @@
* Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit if you need additional information or have any
* questions.
#include "gc_implementation/shared/hSpaceCounters.hpp"
class G1CollectedHeap;
class G1SpaceMonitoringSupport;
// Class for monitoring logical spaces in G1.
// G1 defines a set of regions as a young
// collection (analogous to a young generation).
// The young collection is a logical generation
// with no fixed chunk (see space.hpp) reflecting
// the address space for the generation. In addition
// to the young collection there is its complement
// the non-young collection that is simply the regions
// not in the young collection. The non-young collection
// is treated here as a logical old generation only
// because the monitoring tools expect a generational
// heap. The monitoring tools expect that a Space
// (see space.hpp) exists that describe the
// address space of young collection and non-young
// collection and such a view is provided here.
// This class provides interfaces to access
// the value of variables for the young collection
// that include the "capacity" and "used" of the
// young collection along with constant values
// for the minimum and maximum capacities for
// the logical spaces. Similarly for the non-young
// collection.
// Also provided are counters for G1 concurrent collections
// and stop-the-world full heap collecitons.
// Below is a description of how "used" and "capactiy"
// (or committed) is calculated for the logical spaces.
// 1) The used space calculation for a pool is not necessarily
// independent of the others. We can easily get from G1 the overall
// used space in the entire heap, the number of regions in the young
// generation (includes both eden and survivors), and the number of
// survivor regions. So, from that we calculate:
// survivor_used = survivor_num * region_size
// eden_used = young_region_num * region_size - survivor_used
// old_gen_used = overall_used - eden_used - survivor_used
// Note that survivor_used and eden_used are upper bounds. To get the
// actual value we would have to iterate over the regions and add up
// ->used(). But that'd be expensive. So, we'll accept some lack of
// accuracy for those two. But, we have to be careful when calculating
// old_gen_used, in case we subtract from overall_used more then the
// actual number and our result goes negative.
// 2) Calculating the used space is straightforward, as described
// above. However, how do we calculate the committed space, given that
// we allocate space for the eden, survivor, and old gen out of the
// same pool of regions? One way to do this is to use the used value
// as also the committed value for the eden and survivor spaces and
// then calculate the old gen committed space as follows:
// old_gen_committed = overall_committed - eden_committed - survivor_committed
// Maybe a better way to do that would be to calculate used for eden
// and survivor as a sum of ->used() over their regions and then
// calculate committed as region_num * region_size (i.e., what we use
// to calculate the used space now). This is something to consider
// in the future.
// 3) Another decision that is again not straightforward is what is
// the max size that each memory pool can grow to. One way to do this
// would be to use the committed size for the max for the eden and
// survivors and calculate the old gen max as follows (basically, it's
// a similar pattern to what we use for the committed space, as
// described above):
// old_gen_max = overall_max - eden_max - survivor_max
// Unfortunately, the above makes the max of each pool fluctuate over
// time and, even though this is allowed according to the spec, it
// broke several assumptions in the M&M framework (there were cases
// where used would reach a value greater than max). So, for max we
// use -1, which means "undefined" according to the spec.
// 4) Now, there is a very subtle issue with all the above. The
// framework will call get_memory_usage() on the three pools
// asynchronously. As a result, each call might get a different value
// for, say, survivor_num which will yield inconsistent values for
// eden_used, survivor_used, and old_gen_used (as survivor_num is used
// in the calculation of all three). This would normally be
// ok. However, it's possible that this might cause the sum of
// eden_used, survivor_used, and old_gen_used to go over the max heap
// size and this seems to sometimes cause JConsole (and maybe other
// clients) to get confused. There's not a really an easy / clean
// solution to this problem, due to the asynchrounous nature of the
// framework.
class G1MonitoringSupport : public CHeapObj {
G1CollectedHeap* _g1h;
VirtualSpace* _g1_storage_addr;
// jstat performance counters
// incremental collections both fully and partially young
CollectorCounters* _incremental_collection_counters;
// full stop-the-world collections
CollectorCounters* _full_collection_counters;
// young collection set counters. The _eden_counters,
// _from_counters, and _to_counters are associated with
// this "generational" counter.
GenerationCounters* _young_collection_counters;
// non-young collection set counters. The _old_space_counters
// below are associated with this "generational" counter.
GenerationCounters* _non_young_collection_counters;
// Counters for the capacity and used for
// the whole heap
HSpaceCounters* _old_space_counters;
// the young collection
HSpaceCounters* _eden_counters;
// the survivor collection (only one, _to_counters, is actively used)
HSpaceCounters* _from_counters;
HSpaceCounters* _to_counters;
// It returns x - y if x > y, 0 otherwise.
// As described in the comment above, some of the inputs to the
// calculations we have to do are obtained concurrently and hence
// may be inconsistent with each other. So, this provides a
// defensive way of performing the subtraction and avoids the value
// going negative (which would mean a very large result, given that
// the parameter are size_t).
static size_t subtract_up_to_zero(size_t x, size_t y) {
if (x > y) {
return x - y;
} else {
return 0;
G1MonitoringSupport(G1CollectedHeap* g1h, VirtualSpace* g1_storage_addr);
G1CollectedHeap* g1h() { return _g1h; }
VirtualSpace* g1_storage_addr() { return _g1_storage_addr; }
// Performance Counter accessors
void update_counters();
void update_eden_counters();
CollectorCounters* incremental_collection_counters() {
return _incremental_collection_counters;
CollectorCounters* full_collection_counters() {
return _full_collection_counters;
GenerationCounters* non_young_collection_counters() {
return _non_young_collection_counters;
HSpaceCounters* old_space_counters() { return _old_space_counters; }
HSpaceCounters* eden_counters() { return _eden_counters; }
HSpaceCounters* from_counters() { return _from_counters; }
HSpaceCounters* to_counters() { return _to_counters; }
// Monitoring support used by
// MemoryService
// jstat counters
size_t overall_committed();
size_t overall_used();
size_t eden_space_committed();
size_t eden_space_used();
size_t survivor_space_committed();
size_t survivor_space_used();
size_t old_space_committed();
size_t old_space_used();
@ -157,7 +157,6 @@ public:
void set_try_claimed() { _try_claimed = true; }
void scanCard(size_t index, HeapRegion *r) {
DirtyCardToOopClosure* cl =
@ -168,17 +167,14 @@ public:
HeapWord* card_start = _bot_shared->address_for_index(index);
HeapWord* card_end = card_start + G1BlockOffsetSharedArray::N_words;
Space *sp = SharedHeap::heap()->space_containing(card_start);
MemRegion sm_region;
if (ParallelGCThreads > 0) {
// first find the used area
sm_region = sp->used_region_at_save_marks();
} else {
// The closure is not idempotent. We shouldn't look at objects
// allocated during the GC.
sm_region = sp->used_region_at_save_marks();
MemRegion sm_region = sp->used_region_at_save_marks();
MemRegion mr = sm_region.intersection(MemRegion(card_start,card_end));
if (!mr.is_empty()) {
if (!mr.is_empty() && !_ct_bs->is_card_claimed(index)) {
// We make the card as "claimed" lazily (so races are possible
// but they're benign), which reduces the number of duplicate
// scans (the rsets of the regions in the cset can intersect).
@ -199,6 +195,9 @@ public:
HeapRegionRemSet* hrrs = r->rem_set();
if (hrrs->iter_is_complete()) return false; // All done.
if (!_try_claimed && !hrrs->claim_iter()) return false;
// If we ever free the collection set concurrently, we should also
// clear the card table concurrently therefore we won't need to
// add regions of the collection set to the dirty cards region.
// If we didn't return above, then
// _try_claimed || r->claim_iter()
@ -230,15 +229,10 @@ public:
// If the card is dirty, then we will scan it during updateRS.
if (!card_region->in_collection_set() && !_ct_bs->is_card_dirty(card_index)) {
// We make the card as "claimed" lazily (so races are possible but they're benign),
// which reduces the number of duplicate scans (the rsets of the regions in the cset
// can intersect).
if (!_ct_bs->is_card_claimed(card_index)) {
scanCard(card_index, card_region);
// If the card is dirty, then we will scan it during updateRS.
if (!card_region->in_collection_set() &&
!_ct_bs->is_card_dirty(card_index)) {
scanCard(card_index, card_region);
if (!_try_claimed) {
@ -246,8 +240,6 @@ public:
return false;
// Set all cards back to clean.
void cleanup() {_g1h->cleanUpCardTable();}
size_t cards_done() { return _cards_done;}
size_t cards_looked_up() { return _cards;}
@ -566,8 +558,9 @@ public:
HeapWord* stop_point =
false /* filter_young */);
false /* filter_young */,
NULL /* card_ptr */);
// Since this is performed in the event of an evacuation failure, we
// we shouldn't see a non-null stop point
@ -735,12 +728,6 @@ bool G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i,
(OopClosure*)&mux :
// Undirty the card.
*card_ptr = CardTableModRefBS::clean_card_val();
// We must complete this write before we do any of the reads below.
// And process it, being careful of unallocated portions of TLAB's.
// The region for the current card may be a young region. The
// current card may have been a card that was evicted from the
// card cache. When the card was inserted into the cache, we had
@ -749,7 +736,7 @@ bool G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i,
// and tagged as young.
// We wish to filter out cards for such a region but the current
// thread, if we're running conucrrently, may "see" the young type
// thread, if we're running concurrently, may "see" the young type
// change at any time (so an earlier "is_young" check may pass or
// fail arbitrarily). We tell the iteration code to perform this
// filtering when it has been determined that there has been an actual
@ -759,7 +746,8 @@ bool G1RemSet::concurrentRefineOneCard_impl(jbyte* card_ptr, int worker_i,
HeapWord* stop_point =
// If stop_point is non-null, then we encountered an unallocated region
// (perhaps the unfilled portion of a TLAB.) For now, we'll dirty the
@ -300,13 +300,22 @@
develop(uintx, G1StressConcRegionFreeingDelayMillis, 0, \
"Artificial delay during concurrent region freeing") \
develop(uintx, G1DummyRegionsPerGC, 0, \
"The number of dummy regions G1 will allocate at the end of " \
"each evacuation pause in order to artificially fill up the " \
"heap and stress the marking implementation.") \
develop(bool, ReduceInitialCardMarksForG1, false, \
"When ReduceInitialCardMarks is true, this flag setting " \
" controls whether G1 allows the RICM optimization") \
develop(bool, G1ExitOnExpansionFailure, false, \
"Raise a fatal VM exit out of memory failure in the event " \
" that heap expansion fails due to running out of swap.")
" that heap expansion fails due to running out of swap.") \
develop(uintx, G1ConcMarkForceOverflow, 0, \
"The number of times we'll force an overflow during " \
"concurrent marking")
@ -376,6 +376,17 @@ void HeapRegion::hr_clear(bool par, bool clear_space) {
if (clear_space) clear(SpaceDecorator::Mangle);
void HeapRegion::par_clear() {
assert(used() == 0, "the region should have been already cleared");
assert(capacity() == (size_t) HeapRegion::GrainBytes,
"should be back to normal");
HeapRegionRemSet* hrrs = rem_set();
CardTableModRefBS* ct_bs =
ct_bs->clear(MemRegion(bottom(), end()));
void HeapRegion::calc_gc_efficiency() {
G1CollectedHeap* g1h = G1CollectedHeap::heap();
@ -600,7 +611,15 @@ HeapWord*
oops_on_card_seq_iterate_careful(MemRegion mr,
FilterOutOfRegionClosure* cl,
bool filter_young) {
bool filter_young,
jbyte* card_ptr) {
// Currently, we should only have to clean the card if filter_young
// is true and vice versa.
if (filter_young) {
assert(card_ptr != NULL, "pre-condition");
} else {
assert(card_ptr == NULL, "pre-condition");
G1CollectedHeap* g1h = G1CollectedHeap::heap();
// If we're within a stop-world GC, then we might look at a card in a
@ -626,6 +645,15 @@ oops_on_card_seq_iterate_careful(MemRegion mr,
assert(!is_young(), "check value of filter_young");
// We can only clean the card here, after we make the decision that
// the card is not young. And we only clean the card if we have been
// asked to (i.e., card_ptr != NULL).
if (card_ptr != NULL) {
*card_ptr = CardTableModRefBS::clean_card_val();
// We must complete this write before we do any of the reads below.
// We used to use "block_start_careful" here. But we're actually happy
// to update the BOT while we do this...
HeapWord* cur = block_start(mr.start());
@ -584,6 +584,7 @@ class HeapRegion: public G1OffsetTableContigSpace {
// Reset HR stuff to default values.
void hr_clear(bool par, bool clear_space);
void par_clear();
void initialize(MemRegion mr, bool clear_space, bool mangle_space);
@ -802,12 +803,16 @@ class HeapRegion: public G1OffsetTableContigSpace {
object_iterate_mem_careful(MemRegion mr, ObjectClosure* cl);
// In this version - if filter_young is true and the region
// is a young region then we skip the iteration.
// filter_young: if true and the region is a young region then we
// skip the iteration.
// card_ptr: if not NULL, and we decide that the card is not young
// and we iterate over it, we'll clean the card before we start the
// iteration.
oops_on_card_seq_iterate_careful(MemRegion mr,
FilterOutOfRegionClosure* cl,
bool filter_young);
bool filter_young,
jbyte* card_ptr);
// A version of block start that is guaranteed to find *some* block
// boundary at or before "p", but does not object iteration, and may
@ -33,44 +33,43 @@
#include "runtime/mutexLocker.hpp"
#include "runtime/virtualspace.hpp"
void CardTableModRefBS::par_non_clean_card_iterate_work(Space* sp, MemRegion mr,
DirtyCardToOopClosure* dcto_cl,
MemRegionClosure* cl,
int n_threads) {
if (n_threads > 0) {
assert((n_threads == 1 && ParallelGCThreads == 0) ||
n_threads <= (int)ParallelGCThreads,
"# worker threads != # requested!");
// Make sure the LNC array is valid for the space.
jbyte** lowest_non_clean;
uintptr_t lowest_non_clean_base_chunk_index;
size_t lowest_non_clean_chunk_size;
get_LNC_array_for_space(sp, lowest_non_clean,
void CardTableModRefBS::non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
DirtyCardToOopClosure* dcto_cl,
ClearNoncleanCardWrapper* cl,
int n_threads) {
assert(n_threads > 0, "Error: expected n_threads > 0");
assert((n_threads == 1 && ParallelGCThreads == 0) ||
n_threads <= (int)ParallelGCThreads,
"# worker threads != # requested!");
// Make sure the LNC array is valid for the space.
jbyte** lowest_non_clean;
uintptr_t lowest_non_clean_base_chunk_index;
size_t lowest_non_clean_chunk_size;
get_LNC_array_for_space(sp, lowest_non_clean,
int n_strides = n_threads * StridesPerThread;
SequentialSubTasksDone* pst = sp->par_seq_tasks();
int n_strides = n_threads * StridesPerThread;
SequentialSubTasksDone* pst = sp->par_seq_tasks();
int stride = 0;
while (!pst->is_task_claimed(/* reference */ stride)) {
process_stride(sp, mr, stride, n_strides, dcto_cl, cl,
if (pst->all_tasks_completed()) {
// Clear lowest_non_clean array for next time.
intptr_t first_chunk_index = addr_to_chunk_index(mr.start());
uintptr_t last_chunk_index = addr_to_chunk_index(mr.last());
for (uintptr_t ch = first_chunk_index; ch <= last_chunk_index; ch++) {
intptr_t ind = ch - lowest_non_clean_base_chunk_index;
assert(0 <= ind && ind < (intptr_t)lowest_non_clean_chunk_size,
"Bounds error");
lowest_non_clean[ind] = NULL;
int stride = 0;
while (!pst->is_task_claimed(/* reference */ stride)) {
process_stride(sp, mr, stride, n_strides, dcto_cl, cl,
if (pst->all_tasks_completed()) {
// Clear lowest_non_clean array for next time.
intptr_t first_chunk_index = addr_to_chunk_index(mr.start());
uintptr_t last_chunk_index = addr_to_chunk_index(mr.last());
for (uintptr_t ch = first_chunk_index; ch <= last_chunk_index; ch++) {
intptr_t ind = ch - lowest_non_clean_base_chunk_index;
assert(0 <= ind && ind < (intptr_t)lowest_non_clean_chunk_size,
"Bounds error");
lowest_non_clean[ind] = NULL;
@ -81,7 +80,7 @@ process_stride(Space* sp,
MemRegion used,
jint stride, int n_strides,
DirtyCardToOopClosure* dcto_cl,
MemRegionClosure* cl,
ClearNoncleanCardWrapper* cl,
jbyte** lowest_non_clean,
uintptr_t lowest_non_clean_base_chunk_index,
size_t lowest_non_clean_chunk_size) {
@ -127,7 +126,11 @@ process_stride(Space* sp,
non_clean_card_iterate_work(chunk_mr, cl);
// We do not call the non_clean_card_iterate_serial() version because
// we want to clear the cards, and the ClearNoncleanCardWrapper closure
// itself does the work of finding contiguous dirty ranges of cards to
// process (and clear).
// Find the next chunk of the stride.
chunk_card_start += CardsPerStrideChunk * n_strides;
@ -224,6 +224,12 @@ void PSOldGen::expand(size_t bytes) {
const size_t alignment = virtual_space()->alignment();
size_t aligned_bytes = align_size_up(bytes, alignment);
size_t aligned_expand_bytes = align_size_up(MinHeapDeltaBytes, alignment);
if (UseNUMA) {
// With NUMA we use round-robin page allocation for the old gen. Expand by at least
// providing a page per lgroup. Alignment is larger or equal to the page size.
aligned_expand_bytes = MAX2(aligned_expand_bytes, alignment * os::numa_get_groups_num());
if (aligned_bytes == 0){
// The alignment caused the number of bytes to wrap. An expand_by(0) will
// return true with the implication that and expansion was done when it
@ -1,5 +1,5 @@
* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
@ -51,15 +51,18 @@ GenerationCounters::GenerationCounters(const char* name,
cname = PerfDataManager::counter_name(_name_space, "minCapacity");
PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_Bytes,
_virtual_space == NULL ? 0 :
_virtual_space->committed_size(), CHECK);
cname = PerfDataManager::counter_name(_name_space, "maxCapacity");
PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_Bytes,
_virtual_space == NULL ? 0 :
_virtual_space->reserved_size(), CHECK);
cname = PerfDataManager::counter_name(_name_space, "capacity");
_current_size = PerfDataManager::create_variable(SUN_GC, cname,
_virtual_space == NULL ? 0 :
_virtual_space->committed_size(), CHECK);
@ -1,5 +1,5 @@
* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
@ -61,10 +61,11 @@ class GenerationCounters: public CHeapObj {
virtual void update_all() {
_current_size->set_value(_virtual_space == NULL ? 0 :
const char* name_space() const { return _name_space; }
@ -0,0 +1,66 @@
* Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit if you need additional information or have any
* questions.
#include "precompiled.hpp"
#include "gc_implementation/shared/hSpaceCounters.hpp"
#include "memory/generation.hpp"
#include "memory/resourceArea.hpp"
HSpaceCounters::HSpaceCounters(const char* name,
int ordinal,
size_t max_size,
size_t initial_capacity,
GenerationCounters* gc) {
if (UsePerfData) {
ResourceMark rm;
const char* cns =
PerfDataManager::name_space(gc->name_space(), "space", ordinal);
_name_space = NEW_C_HEAP_ARRAY(char, strlen(cns)+1);
strcpy(_name_space, cns);
const char* cname = PerfDataManager::counter_name(_name_space, "name");
PerfDataManager::create_string_constant(SUN_GC, cname, name, CHECK);
cname = PerfDataManager::counter_name(_name_space, "maxCapacity");
PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_Bytes,
(jlong)max_size, CHECK);
cname = PerfDataManager::counter_name(_name_space, "capacity");
_capacity = PerfDataManager::create_variable(SUN_GC, cname,
initial_capacity, CHECK);
cname = PerfDataManager::counter_name(_name_space, "used");
_used = PerfDataManager::create_variable(SUN_GC, cname, PerfData::U_Bytes,
(jlong) 0, CHECK);
cname = PerfDataManager::counter_name(_name_space, "initCapacity");
PerfDataManager::create_constant(SUN_GC, cname, PerfData::U_Bytes,
initial_capacity, CHECK);
@ -0,0 +1,87 @@
* Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit if you need additional information or have any
* questions.
#ifndef SERIALGC
#include "gc_implementation/shared/generationCounters.hpp"
#include "memory/generation.hpp"
#include "runtime/perfData.hpp"
// A HSpaceCounter is a holder class for performance counters
// that track a collections (logical spaces) in a heap;
class HeapSpaceUsedHelper;
class G1SpaceMonitoringSupport;
class HSpaceCounters: public CHeapObj {
friend class VMStructs;
PerfVariable* _capacity;
PerfVariable* _used;
// Constant PerfData types don't need to retain a reference.
// However, it's a good idea to document them here.
char* _name_space;
HSpaceCounters(const char* name, int ordinal, size_t max_size,
size_t initial_capacity, GenerationCounters* gc);
~HSpaceCounters() {
if (_name_space != NULL) FREE_C_HEAP_ARRAY(char, _name_space);
inline void update_capacity(size_t v) {
inline void update_used(size_t v) {
// for security reasons, we do not allow arbitrary reads from
// the counters as they may live in shared memory.
jlong used() {
return _used->get_value();
jlong capacity() {
return _used->get_value();
inline void update_all(size_t capacity, size_t used) {
const char* name_space() const { return _name_space; }
@ -327,6 +327,7 @@ void LinkResolver::resolve_method(methodHandle& resolved_method, KlassHandle res
// 1. check if klass is not interface
if (resolved_klass->is_interface()) {
ResourceMark rm(THREAD);
char buf[200];
jio_snprintf(buf, sizeof(buf), "Found interface %s, but class was expected", Klass::cast(resolved_klass())->external_name());
THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), buf);
@ -413,6 +414,7 @@ void LinkResolver::resolve_interface_method(methodHandle& resolved_method,
// check if klass is interface
if (!resolved_klass->is_interface()) {
ResourceMark rm(THREAD);
char buf[200];
jio_snprintf(buf, sizeof(buf), "Found class %s, but interface was expected", Klass::cast(resolved_klass())->external_name());
THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), buf);
@ -534,6 +536,7 @@ void LinkResolver::resolve_field(FieldAccessInfo& result, constantPoolHandle poo
// check for errors
if (is_static != fd.is_static()) {
ResourceMark rm(THREAD);
char msg[200];
jio_snprintf(msg, sizeof(msg), "Expected %s field %s.%s", is_static ? "static" : "non-static", Klass::cast(resolved_klass())->external_name(),>as_C_string());
THROW_MSG(vmSymbols::java_lang_IncompatibleClassChangeError(), msg);
@ -631,6 +634,7 @@ void LinkResolver::linktime_resolve_static_method(methodHandle& resolved_method,
// check if static
if (!resolved_method->is_static()) {
ResourceMark rm(THREAD);
char buf[200];
jio_snprintf(buf, sizeof(buf), "Expected static method %s", methodOopDesc::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
@ -671,6 +675,7 @@ void LinkResolver::linktime_resolve_special_method(methodHandle& resolved_method
// check if not static
if (resolved_method->is_static()) {
ResourceMark rm(THREAD);
char buf[200];
jio_snprintf(buf, sizeof(buf),
"Expecting non-static method %s",
@ -717,6 +722,7 @@ void LinkResolver::runtime_resolve_special_method(CallInfo& result, methodHandle
// check if not static
if (sel_method->is_static()) {
ResourceMark rm(THREAD);
char buf[200];
jio_snprintf(buf, sizeof(buf), "Expecting non-static method %s", methodOopDesc::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
@ -757,6 +763,7 @@ void LinkResolver::linktime_resolve_virtual_method(methodHandle &resolved_method
// check if not static
if (resolved_method->is_static()) {
ResourceMark rm(THREAD);
char buf[200];
jio_snprintf(buf, sizeof(buf), "Expecting non-static method %s", methodOopDesc::name_and_sig_as_C_string(Klass::cast(resolved_klass()),
@ -873,6 +880,7 @@ void LinkResolver::runtime_resolve_interface_method(CallInfo& result, methodHand
// check if receiver klass implements the resolved interface
if (!recv_klass->is_subtype_of(resolved_klass())) {
ResourceMark rm(THREAD);
char buf[200];
jio_snprintf(buf, sizeof(buf), "Class %s does not implement the requested interface %s",
@ -44,6 +44,14 @@ void* CHeapObj::operator new(size_t size){
return (void *) AllocateHeap(size, "CHeapObj-new");
void* CHeapObj::operator new (size_t size, const std::nothrow_t& nothrow_constant) {
char* p = (char*) os::malloc(size);
#ifdef ASSERT
if (PrintMallocFree) trace_heap_malloc(size, "CHeapObj-new", p);
return p;
void CHeapObj::operator delete(void* p){
@ -34,6 +34,8 @@
#include "opto/c2_globals.hpp"
#include <new>
#define ARENA_ALIGN_M1 (((size_t)(ARENA_AMALLOC_ALIGNMENT)) - 1)
#define ARENA_ALIGN_MASK (~((size_t)ARENA_ALIGN_M1))
#define ARENA_ALIGN(x) ((((size_t)(x)) + ARENA_ALIGN_M1) & ARENA_ALIGN_MASK)
@ -99,6 +101,7 @@ class AllocatedObj {
void* operator new(size_t size);
void* operator new (size_t size, const std::nothrow_t& nothrow_constant);
void operator delete(void* p);
void* new_array(size_t size);
@ -456,31 +456,35 @@ bool CardTableModRefBS::mark_card_deferred(size_t card_index) {
void CardTableModRefBS::non_clean_card_iterate(Space* sp,
MemRegion mr,
DirtyCardToOopClosure* dcto_cl,
MemRegionClosure* cl) {
void CardTableModRefBS::non_clean_card_iterate_possibly_parallel(Space* sp,
MemRegion mr,
DirtyCardToOopClosure* dcto_cl,
ClearNoncleanCardWrapper* cl) {
if (!mr.is_empty()) {
int n_threads = SharedHeap::heap()->n_par_threads();
if (n_threads > 0) {
#ifndef SERIALGC
par_non_clean_card_iterate_work(sp, mr, dcto_cl, cl, n_threads);
non_clean_card_iterate_parallel_work(sp, mr, dcto_cl, cl, n_threads);
#else // SERIALGC
fatal("Parallel gc not supported here.");
#endif // SERIALGC
} else {
non_clean_card_iterate_work(mr, cl);
// We do not call the non_clean_card_iterate_serial() version below because
// we want to clear the cards (which non_clean_card_iterate_serial() does not
// do for us), and the ClearNoncleanCardWrapper closure itself does the work
// of finding contiguous dirty ranges of cards to process (and clear).
// NOTE: For this to work correctly, it is important that
// we look for non-clean cards below (so as to catch those
// marked precleaned), rather than look explicitly for dirty
// cards (and miss those marked precleaned). In that sense,
// the name precleaned is currently somewhat of a misnomer.
void CardTableModRefBS::non_clean_card_iterate_work(MemRegion mr,
MemRegionClosure* cl) {
// The iterator itself is not MT-aware, but
// MT-aware callers and closures can use this to
// accomplish dirty card iteration in parallel. The
// iterator itself does not clear the dirty cards, or
// change their values in any manner.
void CardTableModRefBS::non_clean_card_iterate_serial(MemRegion mr,
MemRegionClosure* cl) {
for (int i = 0; i < _cur_covered_regions; i++) {
MemRegion mri = mr.intersection(_covered[i]);
if (mri.word_size() > 0) {
@ -648,43 +652,37 @@ void CardTableModRefBS::verify() {
#ifndef PRODUCT
class GuaranteeNotModClosure: public MemRegionClosure {
CardTableModRefBS* _ct;
GuaranteeNotModClosure(CardTableModRefBS* ct) : _ct(ct) {}
void do_MemRegion(MemRegion mr) {
jbyte* entry = _ct->byte_for(mr.start());
guarantee(*entry != CardTableModRefBS::clean_card,
"Dirty card in region that should be clean");
void CardTableModRefBS::verify_region(MemRegion mr,
jbyte val, bool val_equals) {
jbyte* start = byte_for(mr.start());
jbyte* end = byte_for(mr.last());
bool failures = false;
for (jbyte* curr = start; curr <= end; ++curr) {
jbyte curr_val = *curr;
bool failed = (val_equals) ? (curr_val != val) : (curr_val == val);
if (failed) {
if (!failures) {
tty->print_cr("== CT verification failed: ["PTR_FORMAT","PTR_FORMAT"]");
tty->print_cr("== %sexpecting value: %d",
(val_equals) ? "" : "not ", val);
failures = true;
tty->print_cr("== card "PTR_FORMAT" ["PTR_FORMAT","PTR_FORMAT"], "
"val: %d", curr, addr_for(curr),
(HeapWord*) (((size_t) addr_for(curr)) + card_size),
(int) curr_val);
void CardTableModRefBS::verify_clean_region(MemRegion mr) {
GuaranteeNotModClosure blk(this);
non_clean_card_iterate_work(mr, &blk);
guarantee(!failures, "there should not have been any failures");
// To verify a MemRegion is entirely dirty this closure is passed to
// dirty_card_iterate. If the region is dirty do_MemRegion will be
// invoked only once with a MemRegion equal to the one being
// verified.
class GuaranteeDirtyClosure: public MemRegionClosure {
CardTableModRefBS* _ct;
MemRegion _mr;
bool _result;
GuaranteeDirtyClosure(CardTableModRefBS* ct, MemRegion mr)
: _ct(ct), _mr(mr), _result(false) {}
void do_MemRegion(MemRegion mr) {
_result = _mr.equals(mr);
bool result() const { return _result; }
void CardTableModRefBS::verify_not_dirty_region(MemRegion mr) {
verify_region(mr, dirty_card, false /* val_equals */);
void CardTableModRefBS::verify_dirty_region(MemRegion mr) {
GuaranteeDirtyClosure blk(this, mr);
dirty_card_iterate(mr, &blk);
guarantee(blk.result(), "Non-dirty cards in region that should be dirty");
verify_region(mr, dirty_card, true /* val_equals */);
@ -44,6 +44,7 @@
class Generation;
class OopsInGenClosure;
class DirtyCardToOopClosure;
class ClearNoncleanCardWrapper;
class CardTableModRefBS: public ModRefBarrierSet {
// Some classes get to look at some private stuff.
@ -165,22 +166,28 @@ class CardTableModRefBS: public ModRefBarrierSet {
// Iterate over the portion of the card-table which covers the given
// region mr in the given space and apply cl to any dirty sub-regions
// of mr. cl and dcto_cl must either be the same closure or cl must
// wrap dcto_cl. Both are required - neither may be NULL. Also, dcto_cl
// may be modified. Note that this function will operate in a parallel
// mode if worker threads are available.
void non_clean_card_iterate(Space* sp, MemRegion mr,
DirtyCardToOopClosure* dcto_cl,
MemRegionClosure* cl);
// of mr. Dirty cards are _not_ cleared by the iterator method itself,
// but closures may arrange to do so on their own should they so wish.
void non_clean_card_iterate_serial(MemRegion mr, MemRegionClosure* cl);
// Utility function used to implement the other versions below.
void non_clean_card_iterate_work(MemRegion mr, MemRegionClosure* cl);
// A variant of the above that will operate in a parallel mode if
// worker threads are available, and clear the dirty cards as it
// processes them.
// ClearNoncleanCardWrapper cl must wrap the DirtyCardToOopClosure dcto_cl,
// which may itself be modified by the method.
void non_clean_card_iterate_possibly_parallel(Space* sp, MemRegion mr,
DirtyCardToOopClosure* dcto_cl,
ClearNoncleanCardWrapper* cl);
void par_non_clean_card_iterate_work(Space* sp, MemRegion mr,
DirtyCardToOopClosure* dcto_cl,
MemRegionClosure* cl,
int n_threads);
// Work method used to implement non_clean_card_iterate_possibly_parallel()
// above in the parallel case.
void non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
DirtyCardToOopClosure* dcto_cl,
ClearNoncleanCardWrapper* cl,
int n_threads);
// Dirty the bytes corresponding to "mr" (not all of which must be
// covered.)
void dirty_MemRegion(MemRegion mr);
@ -237,7 +244,7 @@ class CardTableModRefBS: public ModRefBarrierSet {
MemRegion used,
jint stride, int n_strides,
DirtyCardToOopClosure* dcto_cl,
MemRegionClosure* cl,
ClearNoncleanCardWrapper* cl,
jbyte** lowest_non_clean,
uintptr_t lowest_non_clean_base_chunk_index,
size_t lowest_non_clean_chunk_size);
@ -409,14 +416,14 @@ public:
// marking, where a dirty card may cause scanning, and summarization
// marking, of objects that extend onto subsequent cards.)
void mod_card_iterate(MemRegionClosure* cl) {
non_clean_card_iterate_work(_whole_heap, cl);
non_clean_card_iterate_serial(_whole_heap, cl);
// Like the "mod_cards_iterate" above, except only invokes the closure
// for cards within the MemRegion "mr" (which is required to be
// card-aligned and sized.)
void mod_card_iterate(MemRegion mr, MemRegionClosure* cl) {
non_clean_card_iterate_work(mr, cl);
non_clean_card_iterate_serial(mr, cl);
static uintx ct_max_alignment_constraint();
@ -468,7 +475,10 @@ public:
void verify();
void verify_guard();
void verify_clean_region(MemRegion mr) PRODUCT_RETURN;
// val_equals -> it will check that all cards covered by mr equal val
// !val_equals -> it will check that all cards covered by mr do not equal val
void verify_region(MemRegion mr, jbyte val, bool val_equals) PRODUCT_RETURN;
void verify_not_dirty_region(MemRegion mr) PRODUCT_RETURN;
void verify_dirty_region(MemRegion mr) PRODUCT_RETURN;
static size_t par_chunk_heapword_alignment() {
@ -493,4 +503,5 @@ public:
void set_CTRS(CardTableRS* rs) { _rs = rs; }
@ -105,107 +105,111 @@ void CardTableRS::younger_refs_iterate(Generation* g,
class ClearNoncleanCardWrapper: public MemRegionClosure {
MemRegionClosure* _dirty_card_closure;
CardTableRS* _ct;
bool _is_par;
// Clears the given card, return true if the corresponding card should be
// processed.
bool clear_card(jbyte* entry) {
if (_is_par) {
while (true) {
// In the parallel case, we may have to do this several times.
jbyte entry_val = *entry;
assert(entry_val != CardTableRS::clean_card_val(),
"We shouldn't be looking at clean cards, and this should "
"be the only place they get cleaned.");
if (CardTableRS::card_is_dirty_wrt_gen_iter(entry_val)
|| _ct->is_prev_youngergen_card_val(entry_val)) {
jbyte res =
Atomic::cmpxchg(CardTableRS::clean_card_val(), entry, entry_val);
if (res == entry_val) {
} else {
assert(res == CardTableRS::cur_youngergen_and_prev_nonclean_card,
"The CAS above should only fail if another thread did "
"a GC write barrier.");
} else if (entry_val ==
CardTableRS::cur_youngergen_and_prev_nonclean_card) {
// Parallelism shouldn't matter in this case. Only the thread
// assigned to scan the card should change this value.
*entry = _ct->cur_youngergen_card_val();
} else {
assert(entry_val == _ct->cur_youngergen_card_val(),
"Should be the only possibility.");
// In this case, the card was clean before, and become
// cur_youngergen only because of processing of a promoted object.
// We don't have to look at the card.
return false;
inline bool ClearNoncleanCardWrapper::clear_card(jbyte* entry) {
if (_is_par) {
return clear_card_parallel(entry);
} else {
return clear_card_serial(entry);
inline bool ClearNoncleanCardWrapper::clear_card_parallel(jbyte* entry) {
while (true) {
// In the parallel case, we may have to do this several times.
jbyte entry_val = *entry;
assert(entry_val != CardTableRS::clean_card_val(),
"We shouldn't be looking at clean cards, and this should "
"be the only place they get cleaned.");
if (CardTableRS::card_is_dirty_wrt_gen_iter(entry_val)
|| _ct->is_prev_youngergen_card_val(entry_val)) {
jbyte res =
Atomic::cmpxchg(CardTableRS::clean_card_val(), entry, entry_val);
if (res == entry_val) {
} else {
assert(res == CardTableRS::cur_youngergen_and_prev_nonclean_card,
"The CAS above should only fail if another thread did "
"a GC write barrier.");
return true;
} else if (entry_val ==
CardTableRS::cur_youngergen_and_prev_nonclean_card) {
// Parallelism shouldn't matter in this case. Only the thread
// assigned to scan the card should change this value.
*entry = _ct->cur_youngergen_card_val();
} else {
jbyte entry_val = *entry;
assert(entry_val != CardTableRS::clean_card_val(),
"We shouldn't be looking at clean cards, and this should "
"be the only place they get cleaned.");
assert(entry_val != CardTableRS::cur_youngergen_and_prev_nonclean_card,
"This should be possible in the sequential case.");
*entry = CardTableRS::clean_card_val();
return true;
assert(entry_val == _ct->cur_youngergen_card_val(),
"Should be the only possibility.");
// In this case, the card was clean before, and become
// cur_youngergen only because of processing of a promoted object.
// We don't have to look at the card.
return false;
return true;
ClearNoncleanCardWrapper(MemRegionClosure* dirty_card_closure,
CardTableRS* ct) :
inline bool ClearNoncleanCardWrapper::clear_card_serial(jbyte* entry) {
jbyte entry_val = *entry;
assert(entry_val != CardTableRS::clean_card_val(),
"We shouldn't be looking at clean cards, and this should "
"be the only place they get cleaned.");
assert(entry_val != CardTableRS::cur_youngergen_and_prev_nonclean_card,
"This should be possible in the sequential case.");
*entry = CardTableRS::clean_card_val();
return true;
MemRegionClosure* dirty_card_closure, CardTableRS* ct) :
_dirty_card_closure(dirty_card_closure), _ct(ct) {
_is_par = (SharedHeap::heap()->n_par_threads() > 0);
void do_MemRegion(MemRegion mr) {
// We start at the high end of "mr", walking backwards
// while accumulating a contiguous dirty range of cards in
// [start_of_non_clean, end_of_non_clean) which we then
// process en masse.
HeapWord* end_of_non_clean = mr.end();
HeapWord* start_of_non_clean = end_of_non_clean;
jbyte* entry = _ct->byte_for(mr.last());
const jbyte* first_entry = _ct->byte_for(mr.start());
while (entry >= first_entry) {
HeapWord* cur = _ct->addr_for(entry);
if (!clear_card(entry)) {
// We hit a clean card; process any non-empty
// dirty range accumulated so far.
if (start_of_non_clean < end_of_non_clean) {
MemRegion mr2(start_of_non_clean, end_of_non_clean);
// Reset the dirty window while continuing to
// look for the next dirty window to process.
end_of_non_clean = cur;
start_of_non_clean = end_of_non_clean;
void ClearNoncleanCardWrapper::do_MemRegion(MemRegion mr) {
assert(mr.word_size() > 0, "Error");
assert(_ct->is_aligned(mr.start()), "mr.start() should be card aligned");
// mr.end() may not necessarily be card aligned.
jbyte* cur_entry = _ct->byte_for(mr.last());
const jbyte* limit = _ct->byte_for(mr.start());
HeapWord* end_of_non_clean = mr.end();
HeapWord* start_of_non_clean = end_of_non_clean;
while (cur_entry >= limit) {
HeapWord* cur_hw = _ct->addr_for(cur_entry);
if ((*cur_entry != CardTableRS::clean_card_val()) && clear_card(cur_entry)) {
// Continue the dirty range by opening the
// dirty window one card to the left.
start_of_non_clean = cur_hw;
} else {
// We hit a "clean" card; process any non-empty
// "dirty" range accumulated so far.
if (start_of_non_clean < end_of_non_clean) {
const MemRegion mrd(start_of_non_clean, end_of_non_clean);
// Open the left end of the window one card to the left.
start_of_non_clean = cur;
// Note that "entry" leads "start_of_non_clean" in
// its leftward excursion after this point
// in the loop and, when we hit the left end of "mr",
// will point off of the left end of the card-table
// for "mr".
// If the first card of "mr" was dirty, we will have
// been left with a dirty window, co-initial with "mr",
// which we now process.
if (start_of_non_clean < end_of_non_clean) {
MemRegion mr2(start_of_non_clean, end_of_non_clean);
// Reset the dirty window, while continuing to look
// for the next dirty card that will start a
// new dirty window.
end_of_non_clean = cur_hw;
start_of_non_clean = cur_hw;
// Note that "cur_entry" leads "start_of_non_clean" in
// its leftward excursion after this point
// in the loop and, when we hit the left end of "mr",
// will point off of the left end of the card-table
// for "mr".
// If the first card of "mr" was dirty, we will have
// been left with a dirty window, co-initial with "mr",
// which we now process.
if (start_of_non_clean < end_of_non_clean) {
const MemRegion mrd(start_of_non_clean, end_of_non_clean);
// clean (by dirty->clean before) ==> cur_younger_gen
// dirty ==> cur_youngergen_and_prev_nonclean_card
// precleaned ==> cur_youngergen_and_prev_nonclean_card
@ -246,8 +250,35 @@ void CardTableRS::younger_refs_in_space_iterate(Space* sp,
ClearNoncleanCardWrapper clear_cl(dcto_cl, this);
_ct_bs->non_clean_card_iterate(sp, sp->used_region_at_save_marks(),
dcto_cl, &clear_cl);
const MemRegion urasm = sp->used_region_at_save_marks();
#ifdef ASSERT
// Convert the assertion check to a warning if we are running
// CMS+ParNew until related bug is fixed.
MemRegion ur = sp->used_region();
assert(ur.contains(urasm) || (UseConcMarkSweepGC && UseParNewGC),
err_msg("Did you forget to call save_marks()? "
"[" PTR_FORMAT ", " PTR_FORMAT ") is not contained in "
urasm.start(), urasm.end(), ur.start(), ur.end()));
// In the case of CMS+ParNew, issue a warning
if (!ur.contains(urasm)) {
assert(UseConcMarkSweepGC && UseParNewGC, "Tautology: see assert above");
warning("CMS+ParNew: Did you forget to call save_marks()? "
"[" PTR_FORMAT ", " PTR_FORMAT ") is not contained in "
urasm.start(), urasm.end(), ur.start(), ur.end());
MemRegion ur2 = sp->used_region();
MemRegion urasm2 = sp->used_region_at_save_marks();
if (!ur.equals(ur2)) {
warning("CMS+ParNew: Flickering used_region()!!");
if (!urasm.equals(urasm2)) {
warning("CMS+ParNew: Flickering used_region_at_save_marks()!!");
_ct_bs->non_clean_card_iterate_possibly_parallel(sp, urasm,
dcto_cl, &clear_cl);
void CardTableRS::clear_into_younger(Generation* gen, bool clear_perm) {
@ -1,5 +1,5 @@
* Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
@ -166,4 +166,21 @@ public:
class ClearNoncleanCardWrapper: public MemRegionClosure {
MemRegionClosure* _dirty_card_closure;
CardTableRS* _ct;
bool _is_par;
// Clears the given card, return true if the corresponding card should be
// processed.
inline bool clear_card(jbyte* entry);
// Work methods called by the clear_card()
inline bool clear_card_serial(jbyte* entry);
inline bool clear_card_parallel(jbyte* entry);
ClearNoncleanCardWrapper(MemRegionClosure* dirty_card_closure, CardTableRS* ct);
void do_MemRegion(MemRegion mr);
@ -265,8 +265,6 @@ void TwoGenerationCollectorPolicy::initialize_flags() {
MaxHeapSize = align_size_up(MaxHeapSize, max_alignment());
always_do_update_barrier = UseConcMarkSweepGC;
BlockOffsetArrayUseUnallocatedBlock =
BlockOffsetArrayUseUnallocatedBlock || ParallelGCThreads > 0;
// Check validity of heap flags
assert(OldSize % min_alignment() == 0, "old space alignment");
@ -427,13 +427,13 @@ public:
// explicitly mark reachable objects in younger generations, to avoid
// excess storage retention.) If "collecting_perm_gen" is false, then
// roots that may only contain references to permGen objects are not
// scanned. The "so" argument determines which of the roots
// scanned; instead, the older_gens closure is applied to all outgoing
// references in the perm gen. The "so" argument determines which of the roots
// the closure is applied to:
// "SO_None" does none;
// "SO_AllClasses" applies the closure to all entries in the SystemDictionary;
// "SO_SystemClasses" to all the "system" classes and loaders;
// "SO_Symbols_and_Strings" applies the closure to all entries in
// SymbolsTable and StringTable.
// "SO_Strings" applies the closure to all entries in the StringTable.
void gen_process_strong_roots(int level,
bool younger_gens_as_roots,
// The remaining arguments are in an order
@ -1,5 +1,5 @@
* Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
* This code is free software; you can redistribute it and/or modify it
@ -175,7 +175,7 @@ class VerifyOopClosure: public OopClosure {
template <class T> inline void do_oop_work(T* p) {
oop obj = oopDesc::load_decode_heap_oop(p);
guarantee(obj->is_oop_or_null(), "invalid oop");
guarantee(obj->is_oop_or_null(), err_msg("invalid oop: " INTPTR_FORMAT, (oopDesc*) obj));
virtual void do_oop(oop* p);
@ -100,12 +100,6 @@ public:
// Pass along the argument to the superclass.
ModRefBarrierSet(int max_covered_regions) :
BarrierSet(max_covered_regions) {}
#ifndef PRODUCT
// Verifies that the given region contains no modified references.
virtual void verify_clean_region(MemRegion mr) = 0;
@ -46,7 +46,6 @@ enum SH_process_strong_roots_tasks {
// Leave this one last.
@ -161,13 +160,9 @@ void SharedHeap::process_strong_roots(bool activate_scope,
if (!_process_strong_tasks->is_task_claimed(SH_PS_SystemDictionary_oops_do)) {
if (so & SO_AllClasses) {
} else
if (so & SO_SystemClasses) {
if (!_process_strong_tasks->is_task_claimed(SH_PS_SymbolTable_oops_do)) {
} else if (so & SO_SystemClasses) {
if (!_process_strong_tasks->is_task_claimed(SH_PS_StringTable_oops_do)) {
@ -192,9 +192,8 @@ public:
SO_None = 0x0,
SO_AllClasses = 0x1,
SO_SystemClasses = 0x2,
SO_Symbols = 0x4,
SO_Strings = 0x8,
SO_CodeCache = 0x10
SO_Strings = 0x4,
SO_CodeCache = 0x8
FlexibleWorkGang* workers() const { return _workers; }
@ -208,14 +207,13 @@ public:
// Invoke the "do_oop" method the closure "roots" on all root locations.
// If "collecting_perm_gen" is false, then roots that may only contain
// references to permGen objects are not scanned. If true, the
// "perm_gen" closure is applied to all older-to-younger refs in the
// references to permGen objects are not scanned; instead, in that case,
// the "perm_blk" closure is applied to all outgoing refs in the
// permanent generation. The "so" argument determines which of roots
// the closure is applied to:
// "SO_None" does none;
// "SO_AllClasses" applies the closure to all entries in the SystemDictionary;
// "SO_SystemClasses" to all the "system" classes and loaders;
// "SO_Symbols" applies the closure to all entries in SymbolsTable;
// "SO_Strings" applies the closure to all entries in StringTable;
// "SO_CodeCache" applies the closure to all elements of the CodeCache.
void process_strong_roots(bool activate_scope,
@ -104,7 +104,7 @@ void ConstantPoolCacheEntry::set_f1_if_null_atomic(oop f1) {
void* result = Atomic::cmpxchg_ptr(f1, f1_addr, NULL);
bool success = (result == NULL);
if (success) {
update_barrier_set(f1_addr, f1);
update_barrier_set((void*) f1_addr, f1);
@ -275,21 +275,23 @@ int ConstantPoolCacheEntry::bootstrap_method_index_in_cache() {
return (int) bsm_cache_index;
void ConstantPoolCacheEntry::set_dynamic_call(Handle call_site,
methodHandle signature_invoker) {
void ConstantPoolCacheEntry::set_dynamic_call(Handle call_site, methodHandle signature_invoker) {
assert(is_secondary_entry(), "");
// NOTE: it's important that all other values are set before f1 is
// set since some users short circuit on f1 being set
// (i.e. non-null) and that may result in uninitialized values for
// other racing threads (e.g. flags).
int param_size = signature_invoker->size_of_parameters();
assert(param_size >= 1, "method argument size must include MH.this");
param_size -= 1; // do not count MH.this; it is not stacked for invokedynamic
if (Atomic::cmpxchg_ptr(call_site(), &_f1, NULL) == NULL) {
// racing threads might be trying to install their own favorites
param_size -= 1; // do not count MH.this; it is not stacked for invokedynamic
bool is_final = true;
assert(signature_invoker->is_final_method(), "is_final");
set_flags(as_flags(as_TosState(signature_invoker->result_type()), is_final, false, false, false, true) | param_size);
int flags = as_flags(as_TosState(signature_invoker->result_type()), is_final, false, false, false, true) | param_size;
assert(_flags == 0 || _flags == flags, "flags should be the same");
// do not do set_bytecode on a secondary CP cache entry
set_f1_if_null_atomic(call_site()); // This must be the last one to set (see NOTE above)!
@ -1194,7 +1194,7 @@ private:
// Whole-method sticky bits and flags
enum {
_trap_hist_limit = 16, // decoupled from Deoptimization::Reason_LIMIT
_trap_hist_limit = 17, // decoupled from Deoptimization::Reason_LIMIT
_trap_hist_mask = max_jubyte,
_extra_data_count = 4 // extra DataLayout headers, for trap history
}; // Public flag values
@ -310,13 +310,14 @@ const char* InlineTree::try_to_inline(ciMethod* callee_method, ciMethod* caller_
return "inlining too deep";
// We need to detect recursive inlining of method handle targets: if
// the current method is a method handle adapter and one of the
// callers is the same method as the callee, we bail out if
// MaxRecursiveInlineLevel is hit.
if (method()->is_method_handle_adapter()) {
// detect direct and indirect recursive inlining
// count the current method and the callee
int inline_level = (method() == callee_method) ? 1 : 0;
if (inline_level > MaxRecursiveInlineLevel)
return "recursively inlining too deep";
// count callers of current method and callee
JVMState* jvms = caller_jvms();
int inline_level = 0;
while (jvms != NULL && jvms->has_method()) {
if (jvms->method() == callee_method) {
@ -327,10 +328,6 @@ const char* InlineTree::try_to_inline(ciMethod* callee_method, ciMethod* caller_
if (method() == callee_method && inline_depth() > MaxRecursiveInlineLevel) {
return "recursively inlining too deep";
int size = callee_method->code_size();
if (UseOldInlining && ClipInlining
@ -376,7 +373,6 @@ bool pass_initial_checks(ciMethod* caller_method, int caller_bci, ciMethod* call
return true;
#ifndef PRODUCT
// Really, the failure_msg can be a success message also.
void InlineTree::print_inlining(ciMethod* callee_method, int caller_bci, const char* failure_msg) const {
@ -388,7 +384,6 @@ void InlineTree::print_inlining(ciMethod* callee_method, int caller_bci, const c
tty->print(" bcs: %d+%d invoked: %d", top->count_inline_bcs(), callee_method->code_size(), callee_method->interpreter_invocation_count());
WarmCallInfo* InlineTree::ok_to_inline(ciMethod* callee_method, JVMState* jvms, ciCallProfile& profile, WarmCallInfo* initial_wci) {
@ -183,6 +183,21 @@
develop(bool, TraceLoopOpts, false, \
"Trace executed loop optimizations") \
diagnostic(bool, LoopLimitCheck, true, \
"Generate a loop limits check for overflow") \
develop(bool, TraceLoopLimitCheck, false, \
"Trace generation of loop limits checks") \
diagnostic(bool, RangeLimitCheck, true, \
"Additional overflow checks during range check elimination") \
develop(bool, TraceRangeLimitCheck, false, \
"Trace additional overflow checks in RCE") \
diagnostic(bool, UnrollLimitCheck, true, \
"Additional overflow checks during loop unroll") \
product(bool, OptimizeFill, false, \
"convert fill/copy loops into intrinsic") \
@ -1373,7 +1373,7 @@ static void split_once(PhaseIterGVN *igvn, Node *phi, Node *val, Node *n, Node *
// Clone loop predicates
if (predicate_proj != NULL) {
newn = igvn->clone_loop_predicates(predicate_proj, newn);
newn = igvn->clone_loop_predicates(predicate_proj, newn, !n->is_CountedLoop());
// Now I can point to the new node.
@ -156,6 +156,7 @@ macro(Lock)
@ -1437,7 +1437,10 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist)
// Update the memory inputs of MemNodes with the value we computed
// in Phase 2 and move stores memory users to corresponding memory slices.
#ifdef ASSERT
// Disable memory split verification code until the fix for 6984348.
// Currently it produces false negative results since it does not cover all cases.
#if 0 // ifdef ASSERT
Node_Stack old_mems(arena, _compile->unique() >> 2);
@ -1447,7 +1450,7 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist)
Node *n = ptnode_adr(i)->_node;
assert(n != NULL, "sanity");
if (n->is_Mem()) {
#ifdef ASSERT
#if 0 // ifdef ASSERT
Node* old_mem = n->in(MemNode::Memory);
if (!visited.test_set(old_mem->_idx)) {
old_mems.push(old_mem, old_mem->outcnt());
@ -1469,13 +1472,13 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist)
#ifdef ASSERT
#if 0 // ifdef ASSERT
// Verify that memory was split correctly
while (old_mems.is_nonempty()) {
Node* old_mem = old_mems.node();
uint old_cnt = old_mems.index();
assert(old_cnt = old_mem->outcnt(), "old mem could be lost");
assert(old_cnt == old_mem->outcnt(), "old mem could be lost");
@ -1033,14 +1033,10 @@ bool GraphKit::compute_stack_effects(int& inputs, int& depth) {
ciMethod* method = iter.get_method(ignore);
inputs = method->arg_size_no_receiver();
// Add a receiver argument, maybe:
if (code != Bytecodes::_invokestatic &&
code != Bytecodes::_invokedynamic)
inputs += 1;
// (Do not use ciMethod::arg_size(), because
// it might be an unloaded method, which doesn't
// know whether it is static or not.)
inputs = method->invoke_arg_size(code);
int size = method->return_type()->size();
depth = size - inputs;
@ -2957,8 +2953,7 @@ static void hook_memory_on_init(GraphKit& kit, int alias_idx,
Node* GraphKit::set_output_for_allocation(AllocateNode* alloc,
const TypeOopPtr* oop_type,
bool raw_mem_only) {
const TypeOopPtr* oop_type) {
int rawidx = Compile::AliasIdxRaw;
alloc->set_req( TypeFunc::FramePtr, frameptr() );
@ -2982,7 +2977,7 @@ Node* GraphKit::set_output_for_allocation(AllocateNode* alloc,
assert(alloc->initialization() == init, "2-way macro link must work");
assert(init ->allocation() == alloc, "2-way macro link must work");
if (ReduceFieldZeroing && !raw_mem_only) {
// Extract memory strands which may participate in the new object's
// initialization, and source them from the new InitializeNode.
// This will allow us to observe initializations when they occur,
@ -3043,11 +3038,9 @@ Node* GraphKit::set_output_for_allocation(AllocateNode* alloc,
// the type to a constant.
// The optional arguments are for specialized use by intrinsics:
// - If 'extra_slow_test' if not null is an extra condition for the slow-path.
// - If 'raw_mem_only', do not cast the result to an oop.
// - If 'return_size_val', report the the total object size to the caller.
Node* GraphKit::new_instance(Node* klass_node,
Node* extra_slow_test,
bool raw_mem_only, // affect only raw memory
Node* *return_size_val) {
// Compute size in doublewords
// The size is always an integral number of doublewords, represented
@ -3118,7 +3111,7 @@ Node* GraphKit::new_instance(Node* klass_node,
size, klass_node,
return set_output_for_allocation(alloc, oop_type, raw_mem_only);
return set_output_for_allocation(alloc, oop_type);
@ -3128,7 +3121,6 @@ Node* GraphKit::new_instance(Node* klass_node,
Node* GraphKit::new_array(Node* klass_node, // array klass (maybe variable)
Node* length, // number of array elements
int nargs, // number of arguments to push back for uncommon trap
bool raw_mem_only, // affect only raw memory
Node* *return_size_val) {
jint layout_con = Klass::_lh_neutral_value;
Node* layout_val = get_layout_helper(klass_node, layout_con);
@ -3273,7 +3265,7 @@ Node* GraphKit::new_array(Node* klass_node, // array klass (maybe variable)
ary_type = ary_type->is_aryptr()->cast_to_size(length_type);
Node* javaoop = set_output_for_allocation(alloc, ary_type, raw_mem_only);
Node* javaoop = set_output_for_allocation(alloc, ary_type);
// Cast length on remaining path to be as narrow as possible
if (map()->find_edge(length) >= 0) {
@ -3386,6 +3378,10 @@ void GraphKit::add_predicate(int nargs) {
if (UseLoopPredicate) {
add_predicate_impl(Deoptimization::Reason_predicate, nargs);
// loop's limit check predicate should be near the loop.
if (LoopLimitCheck) {
add_predicate_impl(Deoptimization::Reason_loop_limit_check, nargs);
//----------------------------- store barriers ----------------------------
@ -3462,9 +3458,22 @@ void GraphKit::write_barrier_post(Node* oop_store,
// Get the alias_index for raw card-mark memory
int adr_type = Compile::AliasIdxRaw;
// Smash zero into card
Node* zero = __ ConI(0);
Node* zero = __ ConI(0); // Dirty card value
BasicType bt = T_BYTE;
if (UseCondCardMark) {
// The classic GC reference write barrier is typically implemented
// as a store into the global card mark table. Unfortunately
// unconditional stores can result in false sharing and excessive
// coherence traffic as well as false transactional aborts.
// UseCondCardMark enables MP "polite" conditional card mark
// stores. In theory we could relax the load from ctrl() to
// no_ctrl, but that doesn't buy much latitude.
Node* card_val = __ load( __ ctrl(), card_adr, TypeInt::BYTE, bt, adr_type);
__ if_then(card_val, BoolTest::ne, zero);
// Smash zero into card
if( !UseConcMarkSweepGC ) {
__ store(__ ctrl(), card_adr, zero, bt, adr_type);
} else {
@ -3472,6 +3481,10 @@ void GraphKit::write_barrier_post(Node* oop_store,
__ storeCM(__ ctrl(), card_adr, zero, oop_store, adr_idx, bt, adr_type);
if (UseCondCardMark) {
__ end_if();
// Final sync IdealKit and GraphKit.
@ -773,15 +773,13 @@ class GraphKit : public Phase {
// implementation of object creation
Node* set_output_for_allocation(AllocateNode* alloc,
const TypeOopPtr* oop_type,
bool raw_mem_only);
const TypeOopPtr* oop_type);
Node* get_layout_helper(Node* klass_node, jint& constant_value);
Node* new_instance(Node* klass_node,
Node* slow_test = NULL,
bool raw_mem_only = false,
Node* *return_size_val = NULL);
Node* new_array(Node* klass_node, Node* count_val, int nargs,
bool raw_mem_only = false, Node* *return_size_val = NULL);
Node* *return_size_val = NULL);
// Handy for making control flow
IfNode* create_and_map_if(Node* ctrl, Node* tst, float prob, float cnt) {
@ -236,6 +236,7 @@ static Node* split_if(IfNode *iff, PhaseIterGVN *igvn) {
Node* predicate_c = NULL;
Node* predicate_x = NULL;
bool counted_loop = r->is_CountedLoop();
Node *region_c = new (igvn->C, req_c + 1) RegionNode(req_c + 1);
Node *phi_c = con1;
@ -294,16 +295,16 @@ static Node* split_if(IfNode *iff, PhaseIterGVN *igvn) {
if (predicate_c != NULL) {
assert(predicate_x == NULL, "only one predicate entry expected");
// Clone loop predicates to each path
iff_c_t = igvn->clone_loop_predicates(predicate_c, iff_c_t);
iff_c_f = igvn->clone_loop_predicates(predicate_c, iff_c_f);
iff_c_t = igvn->clone_loop_predicates(predicate_c, iff_c_t, !counted_loop);
iff_c_f = igvn->clone_loop_predicates(predicate_c, iff_c_f, !counted_loop);
Node *iff_x_t = phase->transform(new (igvn->C, 1) IfTrueNode (iff_x));
Node *iff_x_f = phase->transform(new (igvn->C, 1) IfFalseNode(iff_x));
if (predicate_x != NULL) {
assert(predicate_c == NULL, "only one predicate entry expected");
// Clone loop predicates to each path
iff_x_t = igvn->clone_loop_predicates(predicate_x, iff_x_t);
iff_x_f = igvn->clone_loop_predicates(predicate_x, iff_x_f);
iff_x_t = igvn->clone_loop_predicates(predicate_x, iff_x_t, !counted_loop);
iff_x_f = igvn->clone_loop_predicates(predicate_x, iff_x_f, !counted_loop);
// Merge the TRUE paths
@ -545,6 +546,7 @@ static void adjust_check(Node* proj, Node* range, Node* index,
Node *new_bol = gvn->transform( new (gvn->C, 2) BoolNode( new_cmp, bol->as_Bool()->_test._test ) );
igvn->hash_delete( iff );
iff->set_req_X( 1, new_bol, igvn );
igvn->_worklist.push( iff );
@ -867,12 +867,10 @@ Node* LibraryCallKit::make_string_method_node(int opcode, Node* str1, Node* cnt1
Node* str1_offset = make_load(no_ctrl, str1_offseta, TypeInt::INT, T_INT, string_type->add_offset(offset_offset));
Node* str1_start = array_element_address(str1_value, str1_offset, T_CHAR);
// Pin loads from String::equals() argument since it could be NULL.
Node* str2_ctrl = (opcode == Op_StrEquals) ? control() : no_ctrl;
Node* str2_valuea = basic_plus_adr(str2, str2, value_offset);
Node* str2_value = make_load(str2_ctrl, str2_valuea, value_type, T_OBJECT, string_type->add_offset(value_offset));
Node* str2_value = make_load(no_ctrl, str2_valuea, value_type, T_OBJECT, string_type->add_offset(value_offset));
Node* str2_offseta = basic_plus_adr(str2, str2, offset_offset);
Node* str2_offset = make_load(str2_ctrl, str2_offseta, TypeInt::INT, T_INT, string_type->add_offset(offset_offset));
Node* str2_offset = make_load(no_ctrl, str2_offseta, TypeInt::INT, T_INT, string_type->add_offset(offset_offset));
Node* str2_start = array_element_address(str2_value, str2_offset, T_CHAR);
Node* result = NULL;
@ -1012,14 +1010,15 @@ bool LibraryCallKit::inline_string_equals() {
if (!stopped()) {
// Properly cast the argument to String
argument = _gvn.transform(new (C, 2) CheckCastPPNode(control(), argument, string_type));
// This path is taken only when argument's type is String:NotNull.
argument = cast_not_null(argument, false);
// Get counts for string and argument
Node* receiver_cnta = basic_plus_adr(receiver, receiver, count_offset);
receiver_cnt = make_load(no_ctrl, receiver_cnta, TypeInt::INT, T_INT, string_type->add_offset(count_offset));
// Pin load from argument string since it could be NULL.
Node* argument_cnta = basic_plus_adr(argument, argument, count_offset);
argument_cnt = make_load(control(), argument_cnta, TypeInt::INT, T_INT, string_type->add_offset(count_offset));
argument_cnt = make_load(no_ctrl, argument_cnta, TypeInt::INT, T_INT, string_type->add_offset(count_offset));
// Check for receiver count != argument count
Node* cmp = _gvn.transform( new(C, 3) CmpINode(receiver_cnt, argument_cnt) );
@ -3527,8 +3526,7 @@ bool LibraryCallKit::inline_array_copyOf(bool is_copyOfRange) {
Node* orig_tail = _gvn.transform( new(C, 3) SubINode(orig_length, start) );
Node* moved = generate_min_max(vmIntrinsics::_min, orig_tail, length);
const bool raw_mem_only = true;
newcopy = new_array(klass_node, length, 0, raw_mem_only);
newcopy = new_array(klass_node, length, 0);
// Generate a direct call to the right arraycopy function(s).
// We know the copy is disjoint but we might not know if the
@ -4325,8 +4323,6 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) {
const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
int raw_adr_idx = Compile::AliasIdxRaw;
const bool raw_mem_only = true;
Node* array_ctl = generate_array_guard(obj_klass, (RegionNode*)NULL);
if (array_ctl != NULL) {
@ -4335,8 +4331,7 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) {
Node* obj_length = load_array_length(obj);
Node* obj_size = NULL;
Node* alloc_obj = new_array(obj_klass, obj_length, 0,
raw_mem_only, &obj_size);
Node* alloc_obj = new_array(obj_klass, obj_length, 0, &obj_size);
if (!use_ReduceInitialCardMarks()) {
// If it is an oop array, it requires very special treatment,
@ -4408,7 +4403,7 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) {
// It's an instance, and it passed the slow-path tests.
PreserveJVMState pjvms(this);
Node* obj_size = NULL;
Node* alloc_obj = new_instance(obj_klass, NULL, raw_mem_only, &obj_size);
Node* alloc_obj = new_instance(obj_klass, NULL, &obj_size);
copy_to_clone(obj, alloc_obj, obj_size, false, !use_ReduceInitialCardMarks());
@ -341,7 +341,7 @@ ProjNode* PhaseIdealLoop::move_predicate(ProjNode* predicate_proj, Node* new_ent
// Cut predicate from old place.
Node* old = predicate_proj;
for (DUIterator_Last imin, i = old->last_outs(imin); i >= imin; ) {
for (DUIterator_Last imin, i = old->last_outs(imin); i >= imin;) {
Node* use = old->last_out(i); // for each use...
@ -384,24 +384,25 @@ ProjNode* PhaseIdealLoop::move_predicate(ProjNode* predicate_proj, Node* new_ent
// Interface from IGVN
Node* PhaseIterGVN::clone_loop_predicates(Node* old_entry, Node* new_entry) {
return PhaseIdealLoop::clone_loop_predicates(old_entry, new_entry, false, NULL, this);
Node* PhaseIterGVN::clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check) {
return PhaseIdealLoop::clone_loop_predicates(old_entry, new_entry, false, clone_limit_check, NULL, this);
Node* PhaseIterGVN::move_loop_predicates(Node* old_entry, Node* new_entry) {
return PhaseIdealLoop::clone_loop_predicates(old_entry, new_entry, true, NULL, this);
Node* PhaseIterGVN::move_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check) {
return PhaseIdealLoop::clone_loop_predicates(old_entry, new_entry, true, clone_limit_check, NULL, this);
// Interface from PhaseIdealLoop
Node* PhaseIdealLoop::clone_loop_predicates(Node* old_entry, Node* new_entry) {
return clone_loop_predicates(old_entry, new_entry, false, this, &this->_igvn);
Node* PhaseIdealLoop::clone_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check) {
return clone_loop_predicates(old_entry, new_entry, false, clone_limit_check, this, &this->_igvn);
Node* PhaseIdealLoop::move_loop_predicates(Node* old_entry, Node* new_entry) {
return clone_loop_predicates(old_entry, new_entry, true, this, &this->_igvn);
Node* PhaseIdealLoop::move_loop_predicates(Node* old_entry, Node* new_entry, bool clone_limit_check) {
return clone_loop_predicates(old_entry, new_entry, true, clone_limit_check, this, &this->_igvn);
// Clone loop predicates to cloned loops (peeled, unswitched, split_if).
Node* PhaseIdealLoop::clone_loop_predicates(Node* old_entry, Node* new_entry,
bool move_predicates,
bool clone_limit_check,
PhaseIdealLoop* loop_phase,
PhaseIterGVN* igvn) {
#ifdef ASSERT
@ -413,10 +414,16 @@ Node* PhaseIdealLoop::clone_loop_predicates(Node* old_entry, Node* new_entry,
// Search original predicates
Node* entry = old_entry;
ProjNode* limit_check_proj = NULL;
if (LoopLimitCheck) {
limit_check_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
if (limit_check_proj != NULL) {
entry = entry->in(0)->in(0);
if (UseLoopPredicate) {
ProjNode* predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
if (predicate_proj != NULL) { // right pattern that can be used by loop predication
assert(entry->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be");
if (move_predicates) {
new_entry = move_predicate(predicate_proj, new_entry,
@ -435,11 +442,37 @@ Node* PhaseIdealLoop::clone_loop_predicates(Node* old_entry, Node* new_entry,
if (limit_check_proj != NULL && clone_limit_check) {
// Clone loop limit check last to insert it before loop.
// Don't clone a limit check which was already finalized
// for this counted loop (only one limit check is needed).
if (move_predicates) {
new_entry = move_predicate(limit_check_proj, new_entry,
loop_phase, igvn);
assert(new_entry == limit_check_proj, "old limit check fall through projection");
} else {
new_entry = clone_predicate(limit_check_proj, new_entry,
loop_phase, igvn);
assert(new_entry != NULL && new_entry->is_Proj(), "IfTrue or IfFalse after clone limit check");
if (TraceLoopLimitCheck) {
tty->print_cr("Loop Limit Check %s: ", move_predicates ? "moved" : "cloned");
debug_only( new_entry->in(0)->dump(); )
return new_entry;
void PhaseIdealLoop::eliminate_loop_predicates(Node* entry) {
if (LoopLimitCheck) {
Node* predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
if (predicate != NULL) {
entry = entry->in(0)->in(0);
if (UseLoopPredicate) {
ProjNode* predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
if (predicate_proj != NULL) { // right pattern that can be used by loop predication
@ -456,10 +489,15 @@ void PhaseIdealLoop::eliminate_loop_predicates(Node* entry) {
// Skip related predicates.
Node* PhaseIdealLoop::skip_loop_predicates(Node* entry) {
Node* predicate = NULL;
if (LoopLimitCheck) {
predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
if (predicate != NULL) {
entry = entry->in(0)->in(0);
if (UseLoopPredicate) {
predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
if (predicate != NULL) { // right pattern that can be used by loop predication
assert(entry->is_Proj() && entry->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be");
IfNode* iff = entry->in(0)->as_If();
ProjNode* uncommon_proj = iff->proj_out(1 - entry->as_Proj()->_con);
Node* rgn = uncommon_proj->unique_ctrl_out();
@ -491,10 +529,15 @@ ProjNode* PhaseIdealLoop::find_predicate_insertion_point(Node* start_c, Deoptimi
// Find a predicate
Node* PhaseIdealLoop::find_predicate(Node* entry) {
Node* predicate = NULL;
if (LoopLimitCheck) {
predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
if (predicate != NULL) { // right pattern that can be used by loop predication
return entry;
if (UseLoopPredicate) {
predicate = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
if (predicate != NULL) { // right pattern that can be used by loop predication
assert(entry->in(0)->in(1)->in(1)->Opcode()==Op_Opaque1, "must be");
return entry;
@ -658,7 +701,7 @@ bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invari
Node* range = cmp->in(2);
if (range->Opcode() != Op_LoadRange) {
const TypeInt* tint = phase->_igvn.type(range)->isa_int();
if (!OptimizeFill || tint == NULL || tint->empty() || tint->_lo < 0) {
if (tint == NULL || tint->empty() || tint->_lo < 0) {
// Allow predication on positive values that aren't LoadRanges.
// This allows optimization of loops where the length of the
// array is a known value and doesn't need to be loaded back
@ -696,36 +739,49 @@ bool IdealLoopTree::is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invari
// max(scale*i + offset) = scale*(limit-stride) + offset
// (2) stride*scale < 0
// max(scale*i + offset) = scale*init + offset
BoolNode* PhaseIdealLoop::rc_predicate(Node* ctrl,
BoolNode* PhaseIdealLoop::rc_predicate(IdealLoopTree *loop, Node* ctrl,
int scale, Node* offset,
Node* init, Node* limit, Node* stride,
Node* range, bool upper) {
DEBUG_ONLY(ttyLocker ttyl);
if (TraceLoopPredicate) tty->print("rc_predicate ");
stringStream* predString = NULL;
if (TraceLoopPredicate) {
predString = new stringStream();
predString->print("rc_predicate ");
Node* max_idx_expr = init;
int stride_con = stride->get_int();
if ((stride_con > 0) == (scale > 0) == upper) {
max_idx_expr = new (C, 3) SubINode(limit, stride);
register_new_node(max_idx_expr, ctrl);
if (TraceLoopPredicate) tty->print("(limit - stride) ");
if (LoopLimitCheck) {
// With LoopLimitCheck limit is not exact.
// Calculate exact limit here.
// Note, counted loop's test is '<' or '>'.
limit = exact_limit(loop);
max_idx_expr = new (C, 3) SubINode(limit, stride);
register_new_node(max_idx_expr, ctrl);
if (TraceLoopPredicate) predString->print("(limit - stride) ");
} else {
max_idx_expr = new (C, 3) SubINode(limit, stride);
register_new_node(max_idx_expr, ctrl);
if (TraceLoopPredicate) predString->print("(limit - stride) ");
} else {
if (TraceLoopPredicate) tty->print("init ");
if (TraceLoopPredicate) predString->print("init ");
if (scale != 1) {
ConNode* con_scale = _igvn.intcon(scale);
max_idx_expr = new (C, 3) MulINode(max_idx_expr, con_scale);
register_new_node(max_idx_expr, ctrl);
if (TraceLoopPredicate) tty->print("* %d ", scale);
if (TraceLoopPredicate) predString->print("* %d ", scale);
if (offset && (!offset->is_Con() || offset->get_int() != 0)){
max_idx_expr = new (C, 3) AddINode(max_idx_expr, offset);
register_new_node(max_idx_expr, ctrl);
if (TraceLoopPredicate)
if (offset->is_Con()) tty->print("+ %d ", offset->get_int());
else tty->print("+ offset ");
if (offset->is_Con()) predString->print("+ %d ", offset->get_int());
else predString->print("+ offset ");
CmpUNode* cmp = new (C, 3) CmpUNode(max_idx_expr, range);
@ -733,7 +789,10 @@ BoolNode* PhaseIdealLoop::rc_predicate(Node* ctrl,
BoolNode* bol = new (C, 2) BoolNode(cmp, BoolTest::lt);
register_new_node(bol, ctrl);
if (TraceLoopPredicate) tty->print_cr("<u range");
if (TraceLoopPredicate) {
predString->print_cr("<u range");
return bol;
@ -746,29 +805,36 @@ bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) {
// Could be a simple region when irreducible loops are present.
return false;
LoopNode* head = loop->_head->as_Loop();
if (loop->_head->unique_ctrl_out()->Opcode() == Op_NeverBranch) {
if (head->unique_ctrl_out()->Opcode() == Op_NeverBranch) {
// do nothing for infinite loops
return false;
CountedLoopNode *cl = NULL;
if (loop->_head->is_CountedLoop()) {
cl = loop->_head->as_CountedLoop();
if (head->is_CountedLoop()) {
cl = head->as_CountedLoop();
// do nothing for iteration-splitted loops
if (!cl->is_normal_loop()) return false;
LoopNode *lpn = loop->_head->as_Loop();
Node* entry = lpn->in(LoopNode::EntryControl);
Node* entry = head->in(LoopNode::EntryControl);
ProjNode *predicate_proj = NULL;
// Loop limit check predicate should be near the loop.
if (LoopLimitCheck) {
predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
if (predicate_proj != NULL)
entry = predicate_proj->in(0)->in(0);
ProjNode *predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
if (!predicate_proj) {
#ifndef PRODUCT
if (TraceLoopPredicate) {
tty->print("missing predicate:");
return false;
@ -782,7 +848,6 @@ bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) {
// Create list of if-projs such that a newer proj dominates all older
// projs in the list, and they all dominate loop->tail()
Node_List if_proj_list(area);
LoopNode *head = loop->_head->as_Loop();
Node *current_proj = loop->tail(); //start from tail
while (current_proj != head) {
if (loop == get_loop(current_proj) && // still in the loop ?
@ -856,8 +921,8 @@ bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) {
const Node* cmp = bol->in(1)->as_Cmp();
Node* idx = cmp->in(1);
assert(!invar.is_invariant(idx), "index is variant");
assert(cmp->in(2)->Opcode() == Op_LoadRange || OptimizeFill, "must be");
Node* rng = cmp->in(2);
assert(rng->Opcode() == Op_LoadRange || _igvn.type(rng)->is_int() >= 0, "must be");
assert(invar.is_invariant(rng), "range must be invariant");
int scale = 1;
Node* offset = zero;
@ -886,14 +951,14 @@ bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) {
// Test the lower bound
Node* lower_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, false);
Node* lower_bound_bol = rc_predicate(loop, ctrl, scale, offset, init, limit, stride, rng, false);
IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If();
lower_bound_iff->set_req(1, lower_bound_bol);
if (TraceLoopPredicate) tty->print_cr("lower bound check if: %d", lower_bound_iff->_idx);
// Test the upper bound
Node* upper_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, true);
Node* upper_bound_bol = rc_predicate(loop, ctrl, scale, offset, init, limit, stride, rng, true);
IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If();
upper_bound_iff->set_req(1, upper_bound_bol);
@ -957,4 +1022,3 @@ bool IdealLoopTree::loop_predication( PhaseIdealLoop *phase) {
return hoisted;
@ -83,7 +83,7 @@ void IdealLoopTree::compute_exact_trip_count( PhaseIdealLoop *phase ) {
#ifdef ASSERT
BoolTest::mask bt = cl->loopexit()->test_trip();
assert(bt == BoolTest::lt || bt == BoolTest::gt ||
bt == BoolTest::ne, "canonical test is expected");
(bt == BoolTest::ne && !LoopLimitCheck), "canonical test is expected");
Node* init_n = cl->init_trip();
@ -510,7 +510,7 @@ void PhaseIdealLoop::do_peeling( IdealLoopTree *loop, Node_List &old_new ) {
// the pre-loop with only 1 user (the new peeled iteration), but the
// peeled-loop backedge has 2 users.
Node* new_exit_value = old_new[head->in(LoopNode::LoopBackControl)->_idx];
new_exit_value = move_loop_predicates(entry, new_exit_value);
new_exit_value = move_loop_predicates(entry, new_exit_value, !counted_loop);
head->set_req(LoopNode::EntryControl, new_exit_value);
for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
@ -593,6 +593,12 @@ bool IdealLoopTree::policy_maximally_unroll( PhaseIdealLoop *phase ) const {
return false;
// Fully unroll a loop with few iterations regardless next
// conditions since following loop optimizations will split
// such loop anyway (pre-main-post).
if (trip_count <= 3)
return true;
// Take into account that after unroll conjoined heads and tails will fold,
// otherwise policy_unroll() may allow more unrolling than max unrolling.
uint new_body_size = EMPTY_LOOP_SIZE + (body_size - EMPTY_LOOP_SIZE) * trip_count;
@ -605,15 +611,6 @@ bool IdealLoopTree::policy_maximally_unroll( PhaseIdealLoop *phase ) const {
return false;
// Currently we don't have policy to optimize one iteration loops.
// Maximally unrolling transformation is used for that:
// it is peeled and the original loop become non reachable (dead).
// Also fully unroll a loop with few iterations regardless next
// conditions since following loop optimizations will split
// such loop anyway (pre-main-post).
if (trip_count <= 3)
return true;
// Do not unroll a loop with String intrinsics code.
// String intrinsics are large and have loops.
for (uint k = 0; k < _body.size(); k++) {
@ -632,6 +629,8 @@ bool IdealLoopTree::policy_maximally_unroll( PhaseIdealLoop *phase ) const {
#define MAX_UNROLL 16 // maximum number of unrolls for main loop
// Return TRUE or FALSE if the loop should be unrolled or not. Unroll if
// the loop is a CountedLoop and the body is small enough.
@ -643,13 +642,15 @@ bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const {
if (!cl->is_valid_counted_loop())
return false; // Malformed counted loop
// protect against over-unrolling
if (cl->trip_count() <= 1) return false;
// Check for stride being a small enough constant
if (abs(cl->stride_con()) > (1<<3)) return false;
// Protect against over-unrolling.
// After split at least one iteration will be executed in pre-loop.
if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false;
int future_unroll_ct = cl->unrolled_count() * 2;
if (future_unroll_ct > MAX_UNROLL) return false;
// Check for initial stride being a small enough constant
if (abs(cl->stride_con()) > (1<<2)*future_unroll_ct) return false;
// Don't unroll if the next round of unrolling would push us
// over the expected trip count of the loop. One is subtracted
@ -675,6 +676,7 @@ bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const {
Node *init_n = cl->init_trip();
Node *limit_n = cl->limit();
int stride_con = cl->stride_con();
// Non-constant bounds.
// Protect against over-unrolling when init or/and limit are not constant
// (so that trip_count's init value is maxint) but iv range is known.
@ -684,7 +686,7 @@ bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const {
if (phi != NULL) {
assert(phi->is_Phi() && phi->in(0) == _head, "Counted loop should have iv phi.");
const TypeInt* iv_type = phase->_igvn.type(phi)->is_int();
int next_stride = cl->stride_con() * 2; // stride after this unroll
int next_stride = stride_con * 2; // stride after this unroll
if (next_stride > 0) {
if (iv_type->_lo + next_stride <= iv_type->_lo || // overflow
iv_type->_lo + next_stride > iv_type->_hi) {
@ -699,15 +701,19 @@ bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const {
// After unroll limit will be adjusted: new_limit = limit-stride.
// Bailout if adjustment overflow.
const TypeInt* limit_type = phase->_igvn.type(limit_n)->is_int();
if (stride_con > 0 && ((limit_type->_hi - stride_con) >= limit_type->_hi) ||
stride_con < 0 && ((limit_type->_lo - stride_con) <= limit_type->_lo))
return false; // overflow
// Adjust body_size to determine if we unroll or not
uint body_size = _body.size();
// Key test to unroll CaffeineMark's Logic test
int xors_in_loop = 0;
// Also count ModL, DivL and MulL which expand mightly
for (uint k = 0; k < _body.size(); k++) {
Node* n =;
switch (n->Opcode()) {
case Op_XorI: xors_in_loop++; break; // CaffeineMark's Logic test
case Op_ModL: body_size += 30; break;
case Op_DivL: body_size += 30; break;
case Op_MulL: body_size += 10; break;
@ -724,8 +730,7 @@ bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const {
// Check for being too big
if (body_size > (uint)LoopUnrollLimit) {
if (xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
// Normal case: loop too big
// Normal case: loop too big
return false;
@ -747,28 +752,31 @@ bool IdealLoopTree::policy_align( PhaseIdealLoop *phase ) const {
// Return TRUE or FALSE if the loop should be range-check-eliminated.
// Actually we do iteration-splitting, a more powerful form of RCE.
bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const {
if( !RangeCheckElimination ) return false;
if (!RangeCheckElimination) return false;
CountedLoopNode *cl = _head->as_CountedLoop();
// If we unrolled with no intention of doing RCE and we later
// changed our minds, we got no pre-loop. Either we need to
// make a new pre-loop, or we gotta disallow RCE.
if( cl->is_main_no_pre_loop() ) return false; // Disallowed for now.
if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
Node *trip_counter = cl->phi();
// Check loop body for tests of trip-counter plus loop-invariant vs
// loop-invariant.
for( uint i = 0; i < _body.size(); i++ ) {
for (uint i = 0; i < _body.size(); i++) {
Node *iff = _body[i];
if( iff->Opcode() == Op_If ) { // Test?
if (iff->Opcode() == Op_If) { // Test?
// Comparing trip+off vs limit
Node *bol = iff->in(1);
if( bol->req() != 2 ) continue; // dead constant test
if (bol->req() != 2) continue; // dead constant test
if (!bol->is_Bool()) {
assert(UseLoopPredicate && bol->Opcode() == Op_Conv2B, "predicate check only");
if (bol->as_Bool()->_test._test == BoolTest::ne)
continue; // not RC
Node *cmp = bol->in(1);
Node *rc_exp = cmp->in(1);
@ -1064,6 +1072,7 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_
// negative stride use >
if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) {
assert(!LoopLimitCheck, "only canonical tests (lt or gt) are expected");
BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt;
// Modify pre loop end condition
@ -1090,6 +1099,9 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_
if( peel_only ) main_head->set_main_no_pre_loop();
// Subtract a trip count for the pre-loop.
main_head->set_trip_count(main_head->trip_count() - 1);
// It's difficult to be precise about the trip-counts
// for the pre/post loops. They are usually very short,
// so guess that 4 trips is a reasonable value.
@ -1123,9 +1135,9 @@ void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool ad
} else if (TraceLoopOpts) {
if (loop_head->trip_count() < (uint)LoopUnrollLimit) {
tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count());
tty->print("Unroll %d(%2d) ", loop_head->unrolled_count()*2, loop_head->trip_count());
} else {
tty->print("Unroll %d ", loop_head->unrolled_count()*2);
tty->print("Unroll %d ", loop_head->unrolled_count()*2);
@ -1141,7 +1153,8 @@ void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool ad
Node *stride = loop_head->stride();
Node *opaq = NULL;
if( adjust_min_trip ) { // If not maximally unrolling, need adjustment
if (adjust_min_trip) { // If not maximally unrolling, need adjustment
// Search for zero-trip guard.
assert( loop_head->is_main_loop(), "" );
assert( ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "" );
Node *iff = ctrl->in(0);
@ -1151,63 +1164,210 @@ void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool ad
Node *cmp = bol->in(1);
assert( cmp->Opcode() == Op_CmpI, "" );
opaq = cmp->in(2);
// Occasionally it's possible for a pre-loop Opaque1 node to be
// Occasionally it's possible for a zero-trip guard Opaque1 node to be
// optimized away and then another round of loop opts attempted.
// We can not optimize this particular loop in that case.
if( opaq->Opcode() != Op_Opaque1 )
return; // Cannot find pre-loop! Bail out!
if (opaq->Opcode() != Op_Opaque1)
return; // Cannot find zero-trip guard! Bail out!
// Zero-trip test uses an 'opaque' node which is not shared.
assert(opaq->outcnt() == 1 && opaq->in(1) == limit, "");
// Adjust max trip count. The trip count is intentionally rounded
// down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
// the main, unrolled, part of the loop will never execute as it is protected
// by the min-trip test. See bug 4834191 for a case where we over-unrolled
// and later determined that part of the unrolled loop was dead.
loop_head->set_trip_count(loop_head->trip_count() / 2);
Node* new_limit = NULL;
if (UnrollLimitCheck) {
int stride_con = stride->get_int();
int stride_p = (stride_con > 0) ? stride_con : -stride_con;
uint old_trip_count = loop_head->trip_count();
// Verify that unroll policy result is still valid.
assert(old_trip_count > 1 &&
(!adjust_min_trip || stride_p <= (1<<3)*loop_head->unrolled_count()), "sanity");
// Double the count of original iterations in the unrolled loop body.
// Adjust loop limit to keep valid iterations number after unroll.
// Use (limit - stride) instead of (((limit - init)/stride) & (-2))*stride
// which may overflow.
if (!adjust_min_trip) {
assert(old_trip_count > 1 && (old_trip_count & 1) == 0,
"odd trip count for maximally unroll");
// Don't need to adjust limit for maximally unroll since trip count is even.
} else if (loop_head->has_exact_trip_count() && init->is_Con()) {
// Loop's limit is constant. Loop's init could be constant when pre-loop
// become peeled iteration.
long init_con = init->get_int();
// We can keep old loop limit if iterations count stays the same:
// old_trip_count == new_trip_count * 2
// Note: since old_trip_count >= 2 then new_trip_count >= 1
// so we also don't need to adjust zero trip test.
long limit_con = limit->get_int();
// (stride_con*2) not overflow since stride_con <= 8.
int new_stride_con = stride_con * 2;
int stride_m = new_stride_con - (stride_con > 0 ? 1 : -1);
long trip_count = (limit_con - init_con + stride_m)/new_stride_con;
// New trip count should satisfy next conditions.
assert(trip_count > 0 && (julong)trip_count < (julong)max_juint/2, "sanity");
uint new_trip_count = (uint)trip_count;
adjust_min_trip = (old_trip_count != new_trip_count*2);
// -----------
// Step 2: Cut back the trip counter for an unroll amount of 2.
// Loop will normally trip (limit - init)/stride_con. Since it's a
// CountedLoop this is exact (stride divides limit-init exactly).
// We are going to double the loop body, so we want to knock off any
// odd iteration: (trip_cnt & ~1). Then back compute a new limit.
Node *span = new (C, 3) SubINode( limit, init );
register_new_node( span, ctrl );
Node *trip = new (C, 3) DivINode( 0, span, stride );
register_new_node( trip, ctrl );
Node *mtwo = _igvn.intcon(-2);
set_ctrl(mtwo, C->root());
Node *rond = new (C, 3) AndINode( trip, mtwo );
register_new_node( rond, ctrl );
Node *spn2 = new (C, 3) MulINode( rond, stride );
register_new_node( spn2, ctrl );
Node *lim2 = new (C, 3) AddINode( spn2, init );
register_new_node( lim2, ctrl );
if (adjust_min_trip) {
// Step 2: Adjust the trip limit if it is called for.
// The adjustment amount is -stride. Need to make sure if the
// adjustment underflows or overflows, then the main loop is skipped.
Node* cmp = loop_end->cmp_node();
assert(cmp->in(2) == limit, "sanity");
assert(opaq != NULL && opaq->in(1) == limit, "sanity");
// Hammer in the new limit
Node *ctrl2 = loop_end->in(0);
Node *cmp2 = new (C, 3) CmpINode( loop_head->incr(), lim2 );
register_new_node( cmp2, ctrl2 );
Node *bol2 = new (C, 2) BoolNode( cmp2, loop_end->test_trip() );
register_new_node( bol2, ctrl2 );
loop_end->set_req(CountedLoopEndNode::TestValue, bol2);
// Verify that policy_unroll result is still valid.
const TypeInt* limit_type = _igvn.type(limit)->is_int();
assert(stride_con > 0 && ((limit_type->_hi - stride_con) < limit_type->_hi) ||
stride_con < 0 && ((limit_type->_lo - stride_con) > limit_type->_lo), "sanity");
// Step 3: Find the min-trip test guaranteed before a 'main' loop.
// Make it a 1-trip test (means at least 2 trips).
if( adjust_min_trip ) {
// Guard test uses an 'opaque' node which is not shared. Hence I
// can edit it's inputs directly. Hammer in the new limit for the
// minimum-trip guard.
assert( opaq->outcnt() == 1, "" );
opaq->set_req(1, lim2);
if (limit->is_Con()) {
// The check in policy_unroll and the assert above guarantee
// no underflow if limit is constant.
new_limit = _igvn.intcon(limit->get_int() - stride_con);
set_ctrl(new_limit, C->root());
} else {
// Limit is not constant.
// Separate limit by Opaque node in case it is an incremented
// variable from previous loop to avoid using pre-incremented
// value which could increase register pressure.
// Otherwise reorg_offsets() optimization will create a separate
// Opaque node for each use of trip-counter and as result
// zero trip guard limit will be different from loop limit.
assert(has_ctrl(opaq), "should have it");
Node* opaq_ctrl = get_ctrl(opaq);
limit = new (C, 2) Opaque2Node( C, limit );
register_new_node( limit, opaq_ctrl );
if (stride_con > 0 && ((limit_type->_lo - stride_con) < limit_type->_lo) ||
stride_con < 0 && ((limit_type->_hi - stride_con) > limit_type->_hi)) {
// No underflow.
new_limit = new (C, 3) SubINode(limit, stride);
} else {
// (limit - stride) may underflow.
// Clamp the adjustment value with MININT or MAXINT:
// new_limit = limit-stride
// if (stride > 0)
// new_limit = (limit < new_limit) ? MININT : new_limit;
// else
// new_limit = (limit > new_limit) ? MAXINT : new_limit;
BoolTest::mask bt = loop_end->test_trip();
assert(bt == BoolTest::lt || bt == BoolTest::gt, "canonical test is expected");
Node* adj_max = _igvn.intcon((stride_con > 0) ? min_jint : max_jint);
set_ctrl(adj_max, C->root());
Node* old_limit = NULL;
Node* adj_limit = NULL;
Node* bol = limit->is_CMove() ? limit->in(CMoveNode::Condition) : NULL;
if (loop_head->unrolled_count() > 1 &&
limit->is_CMove() && limit->Opcode() == Op_CMoveI &&
limit->in(CMoveNode::IfTrue) == adj_max &&
bol->as_Bool()->_test._test == bt &&
bol->in(1)->Opcode() == Op_CmpI &&
bol->in(1)->in(2) == limit->in(CMoveNode::IfFalse)) {
// Loop was unrolled before.
// Optimize the limit to avoid nested CMove:
// use original limit as old limit.
old_limit = bol->in(1)->in(1);
// Adjust previous adjusted limit.
adj_limit = limit->in(CMoveNode::IfFalse);
adj_limit = new (C, 3) SubINode(adj_limit, stride);
} else {
old_limit = limit;
adj_limit = new (C, 3) SubINode(limit, stride);
assert(old_limit != NULL && adj_limit != NULL, "");
register_new_node( adj_limit, ctrl ); // adjust amount
Node* adj_cmp = new (C, 3) CmpINode(old_limit, adj_limit);
register_new_node( adj_cmp, ctrl );
Node* adj_bool = new (C, 2) BoolNode(adj_cmp, bt);
register_new_node( adj_bool, ctrl );
new_limit = new (C, 4) CMoveINode(adj_bool, adj_limit, adj_max, TypeInt::INT);
register_new_node(new_limit, ctrl);
assert(new_limit != NULL, "");
// Replace in loop test.
cmp->set_req(2, new_limit);
// Step 3: Find the min-trip test guaranteed before a 'main' loop.
// Make it a 1-trip test (means at least 2 trips).
// Guard test uses an 'opaque' node which is not shared. Hence I
// can edit it's inputs directly. Hammer in the new limit for the
// minimum-trip guard.
assert(opaq->outcnt() == 1, "");
opaq->set_req(1, new_limit);
// Adjust max trip count. The trip count is intentionally rounded
// down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
// the main, unrolled, part of the loop will never execute as it is protected
// by the min-trip test. See bug 4834191 for a case where we over-unrolled
// and later determined that part of the unrolled loop was dead.
loop_head->set_trip_count(old_trip_count / 2);
// Double the count of original iterations in the unrolled loop body.
} else { // LoopLimitCheck
// Adjust max trip count. The trip count is intentionally rounded
// down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
// the main, unrolled, part of the loop will never execute as it is protected
// by the min-trip test. See bug 4834191 for a case where we over-unrolled
// and later determined that part of the unrolled loop was dead.
loop_head->set_trip_count(loop_head->trip_count() / 2);
// Double the count of original iterations in the unrolled loop body.
// -----------
// Step 2: Cut back the trip counter for an unroll amount of 2.
// Loop will normally trip (limit - init)/stride_con. Since it's a
// CountedLoop this is exact (stride divides limit-init exactly).
// We are going to double the loop body, so we want to knock off any
// odd iteration: (trip_cnt & ~1). Then back compute a new limit.
Node *span = new (C, 3) SubINode( limit, init );
register_new_node( span, ctrl );
Node *trip = new (C, 3) DivINode( 0, span, stride );
register_new_node( trip, ctrl );
Node *mtwo = _igvn.intcon(-2);
set_ctrl(mtwo, C->root());
Node *rond = new (C, 3) AndINode( trip, mtwo );
register_new_node( rond, ctrl );
Node *spn2 = new (C, 3) MulINode( rond, stride );
register_new_node( spn2, ctrl );
new_limit = new (C, 3) AddINode( spn2, init );
register_new_node( new_limit, ctrl );
// Hammer in the new limit
Node *ctrl2 = loop_end->in(0);
Node *cmp2 = new (C, 3) CmpINode( loop_head->incr(), new_limit );
register_new_node( cmp2, ctrl2 );
Node *bol2 = new (C, 2) BoolNode( cmp2, loop_end->test_trip() );
register_new_node( bol2, ctrl2 );
loop_end->set_req(CountedLoopEndNode::TestValue, bol2);
// Step 3: Find the min-trip test guaranteed before a 'main' loop.
// Make it a 1-trip test (means at least 2 trips).
if( adjust_min_trip ) {
assert( new_limit != NULL, "" );
// Guard test uses an 'opaque' node which is not shared. Hence I
// can edit it's inputs directly. Hammer in the new limit for the
// minimum-trip guard.
assert( opaq->outcnt() == 1, "" );
opaq->set_req(1, new_limit);
} // LoopLimitCheck
// ---------
// Step 4: Clone the loop body. Move it inside the loop. This loop body
@ -1263,6 +1423,7 @@ void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool ad
void PhaseIdealLoop::do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new ) {
CountedLoopNode *cl = loop->_head->as_CountedLoop();
assert(cl->has_exact_trip_count(), "trip count is not exact");
assert(cl->trip_count() > 0, "");
#ifndef PRODUCT
if (TraceLoopOpts) {
@ -1279,6 +1440,7 @@ void PhaseIdealLoop::do_maximally_unroll( IdealLoopTree *loop, Node_List &old_ne
// Now its tripping an even number of times remaining. Double loop body.
// Do not adjust pre-guards; they are not needed and do not exist.
if (cl->trip_count() > 0) {
assert((cl->trip_count() & 1) == 0, "missed peeling");
do_unroll(loop, old_new, false);
@ -1292,22 +1454,13 @@ bool IdealLoopTree::dominates_backedge(Node* ctrl) {
// Constrain the main loop iterations so the condition:
// scale_con * I + offset < limit
// Constrain the main loop iterations so the conditions:
// low_limit <= scale_con * I + offset < upper_limit
// always holds true. That is, either increase the number of iterations in
// the pre-loop or the post-loop until the condition holds true in the main
// loop. Stride, scale, offset and limit are all loop invariant. Further,
// stride and scale are constants (offset and limit often are).
void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) {
// Compute "I :: (limit-offset)/scale_con"
Node *con = new (C, 3) SubINode( limit, offset );
register_new_node( con, pre_ctrl );
Node *scale = _igvn.intcon(scale_con);
set_ctrl(scale, C->root());
Node *X = new (C, 3) DivINode( 0, con, scale );
register_new_node( X, pre_ctrl );
void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *low_limit, Node *upper_limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) {
// For positive stride, the pre-loop limit always uses a MAX function
// and the main loop a MIN function. For negative stride these are
// reversed.
@ -1316,48 +1469,143 @@ void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset
// pre-loop must check for underflow and the post-loop for overflow.
// Negative stride*scale reverses this; pre-loop checks for overflow and
// post-loop for underflow.
if( stride_con*scale_con > 0 ) {
// Compute I < (limit-offset)/scale_con
// Adjust main-loop last iteration to be MIN/MAX(main_loop,X)
*main_limit = (stride_con > 0)
? (Node*)(new (C, 3) MinINode( *main_limit, X ))
: (Node*)(new (C, 3) MaxINode( *main_limit, X ));
register_new_node( *main_limit, pre_ctrl );
if (stride_con*scale_con > 0) {
// The overflow limit: scale*I+offset < upper_limit
// For main-loop compute
// ( if (scale > 0) /* and stride > 0 */
// I < (upper_limit-offset)/scale
// else /* scale < 0 and stride < 0 */
// I > (upper_limit-offset)/scale
// )
// (upper_limit-offset) may overflow when offset < 0.
// But it is fine since main loop will either have
// less iterations or will be skipped in such case.
Node *con = new (C, 3) SubINode(upper_limit, offset);
register_new_node(con, pre_ctrl);
Node *scale = _igvn.intcon(scale_con);
set_ctrl(scale, C->root());
Node *X = new (C, 3) DivINode(0, con, scale);
register_new_node(X, pre_ctrl);
} else {
// Compute (limit-offset)/scale_con + SGN(-scale_con) <= I
// Add the negation of the main-loop constraint to the pre-loop.
// See footnote [++] below for a derivation of the limit expression.
Node *incr = _igvn.intcon(scale_con > 0 ? -1 : 1);
set_ctrl(incr, C->root());
Node *adj = new (C, 3) AddINode( X, incr );
register_new_node( adj, pre_ctrl );
*pre_limit = (scale_con > 0)
? (Node*)new (C, 3) MinINode( *pre_limit, adj )
: (Node*)new (C, 3) MaxINode( *pre_limit, adj );
register_new_node( *pre_limit, pre_ctrl );
// Adjust main-loop last iteration
Node *loop_limit = *main_limit;
loop_limit = (stride_con > 0) // scale > 0
? (Node*)(new (C, 3) MinINode(loop_limit, X))
: (Node*)(new (C, 3) MaxINode(loop_limit, X));
register_new_node(loop_limit, pre_ctrl);
*main_limit = loop_limit;
// The underflow limit: low_limit <= scale*I+offset.
// For pre-loop compute
// NOT(scale*I+offset >= low_limit)
// scale*I+offset < low_limit
// ( if (scale > 0) /* and stride > 0 */
// I < (low_limit-offset)/scale
// else /* scale < 0 and stride < 0 */
// I > (low_limit-offset)/scale
// )
if (low_limit->get_int() == -max_jint) {
if (!RangeLimitCheck) return;
// We need this guard when scale*pre_limit+offset >= limit
// due to underflow so we need execute pre-loop until
// scale*I+offset >= min_int. But (low_limit-offset) will
// underflow when offset > 0 and X will be > original_limit.
// To avoid it we replace offset = offset > 0 ? 0 : offset
// and add min(pre_limit, original_limit).
Node* shift = _igvn.intcon(31);
set_ctrl(shift, C->root());
Node *neg_off = new (C, 3) RShiftINode(offset, shift);
register_new_node(neg_off, pre_ctrl);
offset = new (C, 3) AndINode(offset, neg_off);
register_new_node(offset, pre_ctrl);
} else {
assert(low_limit->get_int() == 0, "wrong low limit for range check");
// The only problem we have here when offset == min_int
// since (0-min_int) == min_int. It may be fine for scale > 0
// but for scale < 0 X will be < original_limit.
con = new (C, 3) SubINode(low_limit, offset);
register_new_node(con, pre_ctrl);
scale = _igvn.intcon(scale_con);
set_ctrl(scale, C->root());
X = new (C, 3) DivINode(0, con, scale);
register_new_node(X, pre_ctrl);
// Adjust pre-loop last iteration
loop_limit = *pre_limit;
loop_limit = (stride_con > 0) // scale > 0
? (Node*)(new (C, 3) MaxINode(loop_limit, X))
: (Node*)(new (C, 3) MinINode(loop_limit, X));
register_new_node( loop_limit, pre_ctrl );
*pre_limit = loop_limit;
} else { // stride_con*scale_con < 0
// For negative stride*scale pre-loop checks for overflow and
// post-loop for underflow.
// The underflow limit: low_limit <= scale*I+offset.
// For main-loop compute
// scale*I+offset+1 > low_limit
// ( if (scale < 0) /* and stride > 0 */
// I < (low_limit-(offset+1))/scale
// else /* scale < 0 and stride < 0 */
// I > (low_limit-(offset+1))/scale
// )
if (low_limit->get_int() == -max_jint) {
if (!RangeLimitCheck) return;
} else {
assert(low_limit->get_int() == 0, "wrong low limit for range check");
Node *one = _igvn.intcon(1);
set_ctrl(one, C->root());
Node *plus_one = new (C, 3) AddINode(offset, one);
register_new_node( plus_one, pre_ctrl );
Node *con = new (C, 3) SubINode(low_limit, plus_one);
register_new_node(con, pre_ctrl);
Node *scale = _igvn.intcon(scale_con);
set_ctrl(scale, C->root());
Node *X = new (C, 3) DivINode(0, con, scale);
register_new_node(X, pre_ctrl);
// Adjust main-loop last iteration
Node *loop_limit = *main_limit;
loop_limit = (stride_con > 0) // scale < 0
? (Node*)(new (C, 3) MinINode(loop_limit, X))
: (Node*)(new (C, 3) MaxINode(loop_limit, X));
register_new_node(loop_limit, pre_ctrl);
*main_limit = loop_limit;
// The overflow limit: scale*I+offset < upper_limit
// For pre-loop compute
// NOT(scale*I+offset < upper_limit)
// scale*I+offset >= upper_limit
// scale*I+offset+1 > upper_limit
// ( if (scale < 0) /* and stride > 0 */
// I < (upper_limit-(offset+1))/scale
// else /* scale < 0 and stride < 0 */
// I > (upper_limit-(offset+1))/scale
// )
plus_one = new (C, 3) AddINode(offset, one);
register_new_node( plus_one, pre_ctrl );
con = new (C, 3) SubINode(upper_limit, plus_one);
register_new_node(con, pre_ctrl);
scale = _igvn.intcon(scale_con);
set_ctrl(scale, C->root());
X = new (C, 3) DivINode(0, con, scale);
register_new_node(X, pre_ctrl);
// Adjust pre-loop last iteration
loop_limit = *pre_limit;
loop_limit = (stride_con > 0) // scale < 0
? (Node*)(new (C, 3) MaxINode(loop_limit, X))
: (Node*)(new (C, 3) MinINode(loop_limit, X));
register_new_node( loop_limit, pre_ctrl );
*pre_limit = loop_limit;
// [++] Here's the algebra that justifies the pre-loop limit expression:
// NOT( scale_con * I + offset < limit )
// ==
// scale_con * I + offset >= limit
// ==
// SGN(scale_con) * I >= (limit-offset)/|scale_con|
// ==
// (limit-offset)/|scale_con| <= I * SGN(scale_con)
// ==
// (limit-offset)/|scale_con|-1 < I * SGN(scale_con)
// ==
// ( if (scale_con > 0) /*common case*/
// (limit-offset)/scale_con - 1 < I
// else
// (limit-offset)/scale_con + 1 > I
// )
// ( if (scale_con > 0) /*common case*/
// (limit-offset)/scale_con + SGN(-scale_con) < I
// else
// (limit-offset)/scale_con + SGN(-scale_con) > I
@ -1488,7 +1736,7 @@ void PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) {
Node *cmpzm = bolzm->in(1);
assert(cmpzm->is_Cmp(), "");
Node *opqzm = cmpzm->in(2);
// Can not optimize a loop if pre-loop Opaque1 node is optimized
// Can not optimize a loop if zero-trip Opaque1 node is optimized
// away and then another round of loop opts attempted.
if (opqzm->Opcode() != Op_Opaque1)
@ -1523,8 +1771,11 @@ void PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) {
int stride_con = cl->stride_con();
Node *zero = _igvn.intcon(0);
Node *one = _igvn.intcon(1);
// Use symmetrical int range [-max_jint,max_jint]
Node *mini = _igvn.intcon(-max_jint);
set_ctrl(zero, C->root());
set_ctrl(one, C->root());
set_ctrl(mini, C->root());
// Range checks that do not dominate the loop backedge (ie.
// conditionally executed) can lengthen the pre loop limit beyond
@ -1599,7 +1850,12 @@ void PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) {
if( offset_c == ctrl ) {
continue; // Don't rce this check but continue looking for other candidates.
#ifdef ASSERT
if (TraceRangeLimitCheck) {
tty->print_cr("RC bool node%s", flip ? " flipped:" : ":");
// At this point we have the expression as:
// scale_con * trip_counter + offset :: limit
// where scale_con, offset and limit are loop invariant. Trip_counter
@ -1610,17 +1866,16 @@ void PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) {
// Adjust pre and main loop limits to guard the correct iteration set
if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests
if( b_test._test == BoolTest::lt ) { // Range checks always use lt
// The overflow limit: scale*I+offset < limit
add_constraint( stride_con, scale_con, offset, limit, pre_ctrl, &pre_limit, &main_limit );
// The underflow limit: 0 <= scale*I+offset.
// Some math yields: -scale*I-(offset+1) < 0
Node *plus_one = new (C, 3) AddINode( offset, one );
register_new_node( plus_one, pre_ctrl );
Node *neg_offset = new (C, 3) SubINode( zero, plus_one );
register_new_node( neg_offset, pre_ctrl );
add_constraint( stride_con, -scale_con, neg_offset, zero, pre_ctrl, &pre_limit, &main_limit );
// The underflow and overflow limits: 0 <= scale*I+offset < limit
add_constraint( stride_con, scale_con, offset, zero, limit, pre_ctrl, &pre_limit, &main_limit );
if (!conditional_rc) {
conditional_rc = !loop->dominates_backedge(iff);
// It is also needed if offset->_lo == min_int since
// (0-min_int) == min_int. It may be fine for stride > 0
// but for stride < 0 pre_limit will be < original_limit.
const TypeInt* offset_t = _igvn.type(offset)->is_int();
conditional_rc |= RangeLimitCheck && (offset_t->_lo == min_jint) &&
(scale_con<0) && (stride_con<0);
} else {
#ifndef PRODUCT
@ -1631,21 +1886,35 @@ void PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) {
} else { // Otherwise work on normal compares
switch( b_test._test ) {
case BoolTest::ge: // Convert X >= Y to -X <= -Y
case BoolTest::gt:
// Fall into GE case
case BoolTest::ge:
// Convert (I*scale+offset) >= Limit to (I*(-scale)+(-offset)) <= -Limit
scale_con = -scale_con;
offset = new (C, 3) SubINode( zero, offset );
register_new_node( offset, pre_ctrl );
limit = new (C, 3) SubINode( zero, limit );
register_new_node( limit, pre_ctrl );
// Fall into LE case
case BoolTest::le: // Convert X <= Y to X < Y+1
limit = new (C, 3) AddINode( limit, one );
register_new_node( limit, pre_ctrl );
case BoolTest::le:
if (b_test._test != BoolTest::gt) {
// Convert X <= Y to X < Y+1
limit = new (C, 3) AddINode( limit, one );
register_new_node( limit, pre_ctrl );
// Fall into LT case
case BoolTest::lt:
add_constraint( stride_con, scale_con, offset, limit, pre_ctrl, &pre_limit, &main_limit );
// The underflow and overflow limits: MIN_INT <= scale*I+offset < limit
add_constraint( stride_con, scale_con, offset, mini, limit, pre_ctrl, &pre_limit, &main_limit );
if (!conditional_rc) {
conditional_rc = !loop->dominates_backedge(iff);
// It is also needed if scale*pre_limit+offset >= limit
// due to underflow so we need execute pre-loop until
// scale*I+offset >= min_int. But (low_limit-offset) will
// underflow when offset > 0 and X will be > original_limit.
const TypeInt* offset_t = _igvn.type(offset)->is_int();
conditional_rc |= RangeLimitCheck && (offset_t->_hi > 0) &&
(scale_con>0) && (stride_con>0);
@ -1696,7 +1965,8 @@ void PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) {
// Note:: we are making the main loop limit no longer precise;
// need to round up based on stride.
if( stride_con != 1 && stride_con != -1 ) { // Cutout for common case
if (!LoopLimitCheck && stride_con != 1 && stride_con != -1) { // Cutout for common case
// "Standard" round-up logic: ([main_limit-init+(y-1)]/y)*y+init
// Hopefully, compiler will optimize for powers of 2.
Node *ctrl = get_ctrl(main_limit);
@ -1876,7 +2146,19 @@ bool IdealLoopTree::policy_do_remove_empty_loop( PhaseIdealLoop *phase ) {
// iteration. Then the CountedLoopEnd will collapse (backedge never
// taken) and all loop-invariant uses of the exit values will be correct.
Node *phi = cl->phi();
Node *final = new (phase->C, 3) SubINode( cl->limit(), cl->stride() );
Node *exact_limit = phase->exact_limit(this);
if (exact_limit != cl->limit()) {
// We also need to replace the original limit to collapse loop exit.
Node* cmp = cl->loopexit()->cmp_node();
assert(cl->limit() == cmp->in(2), "sanity");
phase->_igvn._worklist.push(cmp->in(2)); // put limit on worklist
cmp->set_req(2, exact_limit);
phase->_igvn._worklist.push(cmp); // put cmp on worklist
// Note: the final value after increment should not overflow since
// counted loop has limit check predicate.
Node *final = new (phase->C, 3) SubINode( exact_limit, cl->stride() );
@ -130,6 +130,11 @@ void PhaseIdealLoop::do_unswitching (IdealLoopTree *loop, Node_List &old_new) {
Node* uniqc = proj_true->unique_ctrl_out();
Node* entry = head->in(LoopNode::EntryControl);
Node* predicate = find_predicate(entry);
if (predicate != NULL && LoopLimitCheck && UseLoopPredicate) {
// We may have two predicates, find first.
entry = find_predicate(entry->in(0)->in(0));
if (entry != NULL) predicate = entry;
if (predicate != NULL) predicate = predicate->in(0);
assert(proj_true->is_IfTrue() &&
(predicate == NULL && uniqc == head ||
@ -217,6 +222,7 @@ void PhaseIdealLoop::do_unswitching (IdealLoopTree *loop, Node_List &old_new) {
ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop,
Node_List &old_new) {
LoopNode* head = loop->_head->as_Loop();
bool counted_loop = head->is_CountedLoop();
Node* entry = head->in(LoopNode::EntryControl);
@ -242,14 +248,14 @@ ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop,
assert(old_new[head->_idx]->is_Loop(), "" );
// Fast (true) control
Node* iffast_pred = clone_loop_predicates(entry, iffast);
Node* iffast_pred = clone_loop_predicates(entry, iffast, !counted_loop);
head->set_req(LoopNode::EntryControl, iffast_pred);
set_idom(head, iffast_pred, dom_depth(head));
// Slow (false) control
Node* ifslow_pred = move_loop_predicates(entry, ifslow);
Node* ifslow_pred = move_loop_predicates(entry, ifslow, !counted_loop);
LoopNode* slow_head = old_new[head->_idx]->as_Loop();
slow_head->set_req(LoopNode::EntryControl, ifslow_pred);
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user