This commit is contained in:
Alejandro Murillo 2015-10-08 14:28:55 -07:00
commit cfc752716c
155 changed files with 7241 additions and 1967 deletions

View File

@ -67,9 +67,6 @@ public class ImmutableOopMapSet extends VMObject {
} }
} }
public void visitValueLocation(Address valueAddr) {
}
public void visitNarrowOopLocation(Address narrowOopAddr) { public void visitNarrowOopLocation(Address narrowOopAddr) {
addressVisitor.visitCompOopAddress(narrowOopAddr); addressVisitor.visitCompOopAddress(narrowOopAddr);
} }
@ -216,9 +213,9 @@ public class ImmutableOopMapSet extends VMObject {
} }
} }
// We want narow oop, value and oop oop_types // We want narow oop and oop oop_types
OopMapValue.OopTypes[] values = new OopMapValue.OopTypes[]{ OopMapValue.OopTypes[] values = new OopMapValue.OopTypes[] {
OopMapValue.OopTypes.OOP_VALUE, OopMapValue.OopTypes.VALUE_VALUE, OopMapValue.OopTypes.NARROWOOP_VALUE OopMapValue.OopTypes.OOP_VALUE, OopMapValue.OopTypes.NARROWOOP_VALUE
}; };
{ {
@ -231,8 +228,6 @@ public class ImmutableOopMapSet extends VMObject {
// to detect in the debugging system // to detect in the debugging system
// assert(Universe::is_heap_or_null(*loc), "found non oop pointer"); // assert(Universe::is_heap_or_null(*loc), "found non oop pointer");
visitor.visitOopLocation(loc); visitor.visitOopLocation(loc);
} else if (omv.getType() == OopMapValue.OopTypes.VALUE_VALUE) {
visitor.visitValueLocation(loc);
} else if (omv.getType() == OopMapValue.OopTypes.NARROWOOP_VALUE) { } else if (omv.getType() == OopMapValue.OopTypes.NARROWOOP_VALUE) {
visitor.visitNarrowOopLocation(loc); visitor.visitNarrowOopLocation(loc);
} }

View File

@ -49,7 +49,6 @@ public class OopMapValue {
// Types of OopValues // Types of OopValues
static int UNUSED_VALUE; static int UNUSED_VALUE;
static int OOP_VALUE; static int OOP_VALUE;
static int VALUE_VALUE;
static int NARROWOOP_VALUE; static int NARROWOOP_VALUE;
static int CALLEE_SAVED_VALUE; static int CALLEE_SAVED_VALUE;
static int DERIVED_OOP_VALUE; static int DERIVED_OOP_VALUE;
@ -73,7 +72,6 @@ public class OopMapValue {
REGISTER_MASK_IN_PLACE = db.lookupIntConstant("OopMapValue::register_mask_in_place").intValue(); REGISTER_MASK_IN_PLACE = db.lookupIntConstant("OopMapValue::register_mask_in_place").intValue();
UNUSED_VALUE = db.lookupIntConstant("OopMapValue::unused_value").intValue(); UNUSED_VALUE = db.lookupIntConstant("OopMapValue::unused_value").intValue();
OOP_VALUE = db.lookupIntConstant("OopMapValue::oop_value").intValue(); OOP_VALUE = db.lookupIntConstant("OopMapValue::oop_value").intValue();
VALUE_VALUE = db.lookupIntConstant("OopMapValue::value_value").intValue();
NARROWOOP_VALUE = db.lookupIntConstant("OopMapValue::narrowoop_value").intValue(); NARROWOOP_VALUE = db.lookupIntConstant("OopMapValue::narrowoop_value").intValue();
CALLEE_SAVED_VALUE = db.lookupIntConstant("OopMapValue::callee_saved_value").intValue(); CALLEE_SAVED_VALUE = db.lookupIntConstant("OopMapValue::callee_saved_value").intValue();
DERIVED_OOP_VALUE = db.lookupIntConstant("OopMapValue::derived_oop_value").intValue(); DERIVED_OOP_VALUE = db.lookupIntConstant("OopMapValue::derived_oop_value").intValue();
@ -82,7 +80,6 @@ public class OopMapValue {
public static abstract class OopTypes { public static abstract class OopTypes {
public static final OopTypes UNUSED_VALUE = new OopTypes() { int getValue() { return OopMapValue.UNUSED_VALUE; }}; public static final OopTypes UNUSED_VALUE = new OopTypes() { int getValue() { return OopMapValue.UNUSED_VALUE; }};
public static final OopTypes OOP_VALUE = new OopTypes() { int getValue() { return OopMapValue.OOP_VALUE; }}; public static final OopTypes OOP_VALUE = new OopTypes() { int getValue() { return OopMapValue.OOP_VALUE; }};
public static final OopTypes VALUE_VALUE = new OopTypes() { int getValue() { return OopMapValue.VALUE_VALUE; }};
public static final OopTypes NARROWOOP_VALUE = new OopTypes() { int getValue() { return OopMapValue.NARROWOOP_VALUE; }}; public static final OopTypes NARROWOOP_VALUE = new OopTypes() { int getValue() { return OopMapValue.NARROWOOP_VALUE; }};
public static final OopTypes CALLEE_SAVED_VALUE = new OopTypes() { int getValue() { return OopMapValue.CALLEE_SAVED_VALUE; }}; public static final OopTypes CALLEE_SAVED_VALUE = new OopTypes() { int getValue() { return OopMapValue.CALLEE_SAVED_VALUE; }};
public static final OopTypes DERIVED_OOP_VALUE = new OopTypes() { int getValue() { return OopMapValue.DERIVED_OOP_VALUE; }}; public static final OopTypes DERIVED_OOP_VALUE = new OopTypes() { int getValue() { return OopMapValue.DERIVED_OOP_VALUE; }};
@ -105,7 +102,6 @@ public class OopMapValue {
// Querying // Querying
public boolean isOop() { return (getValue() & TYPE_MASK_IN_PLACE) == OOP_VALUE; } public boolean isOop() { return (getValue() & TYPE_MASK_IN_PLACE) == OOP_VALUE; }
public boolean isValue() { return (getValue() & TYPE_MASK_IN_PLACE) == VALUE_VALUE; }
public boolean isNarrowOop() { return (getValue() & TYPE_MASK_IN_PLACE) == NARROWOOP_VALUE; } public boolean isNarrowOop() { return (getValue() & TYPE_MASK_IN_PLACE) == NARROWOOP_VALUE; }
public boolean isCalleeSaved() { return (getValue() & TYPE_MASK_IN_PLACE) == CALLEE_SAVED_VALUE; } public boolean isCalleeSaved() { return (getValue() & TYPE_MASK_IN_PLACE) == CALLEE_SAVED_VALUE; }
public boolean isDerivedOop() { return (getValue() & TYPE_MASK_IN_PLACE) == DERIVED_OOP_VALUE; } public boolean isDerivedOop() { return (getValue() & TYPE_MASK_IN_PLACE) == DERIVED_OOP_VALUE; }
@ -117,7 +113,6 @@ public class OopMapValue {
int which = (getValue() & TYPE_MASK_IN_PLACE); int which = (getValue() & TYPE_MASK_IN_PLACE);
if (which == UNUSED_VALUE) return OopTypes.UNUSED_VALUE; if (which == UNUSED_VALUE) return OopTypes.UNUSED_VALUE;
else if (which == OOP_VALUE) return OopTypes.OOP_VALUE; else if (which == OOP_VALUE) return OopTypes.OOP_VALUE;
else if (which == VALUE_VALUE) return OopTypes.VALUE_VALUE;
else if (which == NARROWOOP_VALUE) return OopTypes.NARROWOOP_VALUE; else if (which == NARROWOOP_VALUE) return OopTypes.NARROWOOP_VALUE;
else if (which == CALLEE_SAVED_VALUE) return OopTypes.CALLEE_SAVED_VALUE; else if (which == CALLEE_SAVED_VALUE) return OopTypes.CALLEE_SAVED_VALUE;
else if (which == DERIVED_OOP_VALUE) return OopTypes.DERIVED_OOP_VALUE; else if (which == DERIVED_OOP_VALUE) return OopTypes.DERIVED_OOP_VALUE;

View File

@ -31,6 +31,5 @@ import sun.jvm.hotspot.debugger.*;
public interface OopMapVisitor { public interface OopMapVisitor {
public void visitOopLocation(Address oopAddr); public void visitOopLocation(Address oopAddr);
public void visitDerivedOopLocation(Address baseOopAddr, Address derivedOopAddr); public void visitDerivedOopLocation(Address baseOopAddr, Address derivedOopAddr);
public void visitValueLocation(Address valueAddr);
public void visitNarrowOopLocation(Address narrowOopAddr); public void visitNarrowOopLocation(Address narrowOopAddr);
} }

View File

@ -536,9 +536,6 @@ public abstract class Frame implements Cloneable {
} }
} }
public void visitValueLocation(Address valueAddr) {
}
public void visitNarrowOopLocation(Address compOopAddr) { public void visitNarrowOopLocation(Address compOopAddr) {
addressVisitor.visitCompOopAddress(compOopAddr); addressVisitor.visitCompOopAddress(compOopAddr);
} }

View File

@ -1220,9 +1220,6 @@ public class HTMLGenerator implements /* imports */ ClassConstants {
oms = new OopMapStream(map, OopMapValue.OopTypes.NARROWOOP_VALUE); oms = new OopMapStream(map, OopMapValue.OopTypes.NARROWOOP_VALUE);
buf.append(omvIterator.iterate(oms, "NarrowOops:", false)); buf.append(omvIterator.iterate(oms, "NarrowOops:", false));
oms = new OopMapStream(map, OopMapValue.OopTypes.VALUE_VALUE);
buf.append(omvIterator.iterate(oms, "Values:", false));
oms = new OopMapStream(map, OopMapValue.OopTypes.CALLEE_SAVED_VALUE); oms = new OopMapStream(map, OopMapValue.OopTypes.CALLEE_SAVED_VALUE);
buf.append(omvIterator.iterate(oms, "Callee saved:", true)); buf.append(omvIterator.iterate(oms, "Callee saved:", true));

File diff suppressed because it is too large Load Diff

View File

@ -2311,6 +2311,12 @@ public:
#define MSG "invalid arrangement" #define MSG "invalid arrangement"
#define ASSERTION (T == T2S || T == T4S || T == T2D)
INSN(fsqrt, 1, 0b11111);
INSN(fabs, 0, 0b01111);
INSN(fneg, 1, 0b01111);
#undef ASSERTION
#define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H || T == T2S || T == T4S) #define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H || T == T2S || T == T4S)
INSN(rev64, 0, 0b00000); INSN(rev64, 0, 0b00000);
#undef ASSERTION #undef ASSERTION

View File

@ -72,6 +72,7 @@ define_pd_global(bool, OptoPeephole, true);
define_pd_global(bool, UseCISCSpill, true); define_pd_global(bool, UseCISCSpill, true);
define_pd_global(bool, OptoScheduling, false); define_pd_global(bool, OptoScheduling, false);
define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoBundling, false);
define_pd_global(bool, OptoRegScheduling, false);
define_pd_global(intx, ReservedCodeCacheSize, 48*M); define_pd_global(intx, ReservedCodeCacheSize, 48*M);
define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); define_pd_global(intx, NonProfiledCodeHeapSize, 21*M);

View File

@ -42,6 +42,11 @@
// Implementation of InterpreterMacroAssembler // Implementation of InterpreterMacroAssembler
void InterpreterMacroAssembler::jump_to_entry(address entry) {
assert(entry, "Entry must have been generated by now");
b(entry);
}
#ifndef CC_INTERP #ifndef CC_INTERP
void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {

View File

@ -66,6 +66,8 @@ class InterpreterMacroAssembler: public MacroAssembler {
void load_earlyret_value(TosState state); void load_earlyret_value(TosState state);
void jump_to_entry(address entry);
#ifdef CC_INTERP #ifdef CC_INTERP
void save_bcp() { /* not needed in c++ interpreter and harmless */ } void save_bcp() { /* not needed in c++ interpreter and harmless */ }
void restore_bcp() { /* not needed in c++ interpreter and harmless */ } void restore_bcp() { /* not needed in c++ interpreter and harmless */ }

View File

@ -41,13 +41,13 @@ private:
address generate_native_entry(bool synchronized); address generate_native_entry(bool synchronized);
address generate_abstract_entry(void); address generate_abstract_entry(void);
address generate_math_entry(AbstractInterpreter::MethodKind kind); address generate_math_entry(AbstractInterpreter::MethodKind kind);
address generate_jump_to_normal_entry(void); address generate_accessor_entry(void) { return NULL; }
address generate_accessor_entry(void) { return generate_jump_to_normal_entry(); } address generate_empty_entry(void) { return NULL; }
address generate_empty_entry(void) { return generate_jump_to_normal_entry(); }
void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs); void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs);
address generate_Reference_get_entry(); address generate_Reference_get_entry();
address generate_CRC32_update_entry(); address generate_CRC32_update_entry();
address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind); address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
void lock_method(void); void lock_method(void);
void generate_stack_overflow_check(void); void generate_stack_overflow_check(void);

View File

@ -236,17 +236,6 @@ void InterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::Me
__ blrt(rscratch1, gpargs, fpargs, rtype); __ blrt(rscratch1, gpargs, fpargs, rtype);
} }
// Jump into normal path for accessor and empty entry to jump to normal entry
// The "fast" optimization don't update compilation count therefore can disable inlining
// for these functions that should be inlined.
address InterpreterGenerator::generate_jump_to_normal_entry(void) {
address entry_point = __ pc();
assert(Interpreter::entry_for_kind(Interpreter::zerolocals) != NULL, "should already be generated");
__ b(Interpreter::entry_for_kind(Interpreter::zerolocals));
return entry_point;
}
// Abstract method entry // Abstract method entry
// Attempt to execute abstract method. Throw exception // Attempt to execute abstract method. Throw exception
address InterpreterGenerator::generate_abstract_entry(void) { address InterpreterGenerator::generate_abstract_entry(void) {

View File

@ -2286,18 +2286,30 @@ void MacroAssembler::c_stub_prolog(int gp_arg_count, int fp_arg_count, int ret_t
} }
#endif #endif
void MacroAssembler::push_CPU_state() { void MacroAssembler::push_CPU_state(bool save_vectors) {
push(0x3fffffff, sp); // integer registers except lr & sp push(0x3fffffff, sp); // integer registers except lr & sp
if (!save_vectors) {
for (int i = 30; i >= 0; i -= 2) for (int i = 30; i >= 0; i -= 2)
stpd(as_FloatRegister(i), as_FloatRegister(i+1), stpd(as_FloatRegister(i), as_FloatRegister(i+1),
Address(pre(sp, -2 * wordSize))); Address(pre(sp, -2 * wordSize)));
} else {
for (int i = 30; i >= 0; i -= 2)
stpq(as_FloatRegister(i), as_FloatRegister(i+1),
Address(pre(sp, -4 * wordSize)));
}
} }
void MacroAssembler::pop_CPU_state() { void MacroAssembler::pop_CPU_state(bool restore_vectors) {
if (!restore_vectors) {
for (int i = 0; i < 32; i += 2) for (int i = 0; i < 32; i += 2)
ldpd(as_FloatRegister(i), as_FloatRegister(i+1), ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
Address(post(sp, 2 * wordSize))); Address(post(sp, 2 * wordSize)));
} else {
for (int i = 0; i < 32; i += 2)
ldpq(as_FloatRegister(i), as_FloatRegister(i+1),
Address(post(sp, 4 * wordSize)));
}
pop(0x3fffffff, sp); // integer registers except lr & sp pop(0x3fffffff, sp); // integer registers except lr & sp
} }

View File

@ -777,8 +777,8 @@ public:
DEBUG_ONLY(void verify_heapbase(const char* msg);) DEBUG_ONLY(void verify_heapbase(const char* msg);)
void push_CPU_state(); void push_CPU_state(bool save_vectors = false);
void pop_CPU_state() ; void pop_CPU_state(bool restore_vectors = false) ;
// Round up to a power of two // Round up to a power of two
void round_to(Register reg, int modulus); void round_to(Register reg, int modulus);

View File

@ -75,8 +75,8 @@ class SimpleRuntimeFrame {
// FIXME -- this is used by C1 // FIXME -- this is used by C1
class RegisterSaver { class RegisterSaver {
public: public:
static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false);
static void restore_live_registers(MacroAssembler* masm); static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
// Offsets into the register save area // Offsets into the register save area
// Used by deoptimization when it is managing result register // Used by deoptimization when it is managing result register
@ -108,7 +108,17 @@ class RegisterSaver {
}; };
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
#ifdef COMPILER2
if (save_vectors) {
// Save upper half of vector registers
int vect_words = 32 * 8 / wordSize;
additional_frame_words += vect_words;
}
#else
assert(!save_vectors, "vectors are generated only by C2");
#endif
int frame_size_in_bytes = round_to(additional_frame_words*wordSize + int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
reg_save_size*BytesPerInt, 16); reg_save_size*BytesPerInt, 16);
// OopMap frame size is in compiler stack slots (jint's) not bytes or words // OopMap frame size is in compiler stack slots (jint's) not bytes or words
@ -122,7 +132,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
// Save registers, fpu state, and flags. // Save registers, fpu state, and flags.
__ enter(); __ enter();
__ push_CPU_state(); __ push_CPU_state(save_vectors);
// Set an oopmap for the call site. This oopmap will map all // Set an oopmap for the call site. This oopmap will map all
// oop-registers and debug-info registers as callee-saved. This // oop-registers and debug-info registers as callee-saved. This
@ -139,14 +149,14 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
// register slots are 8 bytes // register slots are 8 bytes
// wide, 32 floating-point // wide, 32 floating-point
// registers // registers
oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots),
r->as_VMReg()); r->as_VMReg());
} }
} }
for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) { for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
FloatRegister r = as_FloatRegister(i); FloatRegister r = as_FloatRegister(i);
int sp_offset = 2 * i; int sp_offset = save_vectors ? (4 * i) : (2 * i);
oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
r->as_VMReg()); r->as_VMReg());
} }
@ -154,8 +164,11 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
return oop_map; return oop_map;
} }
void RegisterSaver::restore_live_registers(MacroAssembler* masm) { void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
__ pop_CPU_state(); #ifndef COMPILER2
assert(!restore_vectors, "vectors are generated only by C2");
#endif
__ pop_CPU_state(restore_vectors);
__ leave(); __ leave();
} }
@ -177,9 +190,9 @@ void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
} }
// Is vector's size (in bytes) bigger than a size saved by default? // Is vector's size (in bytes) bigger than a size saved by default?
// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. // 8 bytes vector registers are saved by default on AArch64.
bool SharedRuntime::is_wide_vector(int size) { bool SharedRuntime::is_wide_vector(int size) {
return size > 16; return size > 8;
} }
// The java_calling_convention describes stack locations as ideal slots on // The java_calling_convention describes stack locations as ideal slots on
// a frame with no abi restrictions. Since we must observe abi restrictions // a frame with no abi restrictions. Since we must observe abi restrictions
@ -1146,7 +1159,7 @@ static void rt_call(MacroAssembler* masm, address dest, int gpargs, int fpargs,
assert((unsigned)gpargs < 256, "eek!"); assert((unsigned)gpargs < 256, "eek!");
assert((unsigned)fpargs < 32, "eek!"); assert((unsigned)fpargs < 32, "eek!");
__ lea(rscratch1, RuntimeAddress(dest)); __ lea(rscratch1, RuntimeAddress(dest));
__ mov(rscratch2, (gpargs << 6) | (fpargs << 2) | type); if (UseBuiltinSim) __ mov(rscratch2, (gpargs << 6) | (fpargs << 2) | type);
__ blrt(rscratch1, rscratch2); __ blrt(rscratch1, rscratch2);
__ maybe_isb(); __ maybe_isb();
} }
@ -1521,14 +1534,13 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
int vep_offset = ((intptr_t)__ pc()) - start; int vep_offset = ((intptr_t)__ pc()) - start;
// Generate stack overflow check
// If we have to make this method not-entrant we'll overwrite its // If we have to make this method not-entrant we'll overwrite its
// first instruction with a jump. For this action to be legal we // first instruction with a jump. For this action to be legal we
// must ensure that this first instruction is a B, BL, NOP, BKPT, // must ensure that this first instruction is a B, BL, NOP, BKPT,
// SVC, HVC, or SMC. Make it a NOP. // SVC, HVC, or SMC. Make it a NOP.
__ nop(); __ nop();
// Generate stack overflow check
if (UseStackBanging) { if (UseStackBanging) {
__ bang_stack_with_offset(StackShadowPages*os::vm_page_size()); __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
} else { } else {
@ -1709,23 +1721,20 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// need to spill before we call out // need to spill before we call out
int c_arg = total_c_args - total_in_args; int c_arg = total_c_args - total_in_args;
// Pre-load a static method's oop into r20. Used both by locking code and // Pre-load a static method's oop into c_rarg1.
// the normal JNI call code.
if (method->is_static() && !is_critical_native) { if (method->is_static() && !is_critical_native) {
// load oop into a register // load oop into a register
__ movoop(oop_handle_reg, __ movoop(c_rarg1,
JNIHandles::make_local(method->method_holder()->java_mirror()), JNIHandles::make_local(method->method_holder()->java_mirror()),
/*immediate*/true); /*immediate*/true);
// Now handlize the static class mirror it's known not-null. // Now handlize the static class mirror it's known not-null.
__ str(oop_handle_reg, Address(sp, klass_offset)); __ str(c_rarg1, Address(sp, klass_offset));
map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
// Now get the handle // Now get the handle
__ lea(oop_handle_reg, Address(sp, klass_offset)); __ lea(c_rarg1, Address(sp, klass_offset));
// store the klass handle as second argument
__ mov(c_rarg1, oop_handle_reg);
// and protect the arg if we must spill // and protect the arg if we must spill
c_arg--; c_arg--;
} }
@ -1740,19 +1749,13 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
__ set_last_Java_frame(sp, noreg, (address)the_pc, rscratch1); __ set_last_Java_frame(sp, noreg, (address)the_pc, rscratch1);
Label dtrace_method_entry, dtrace_method_entry_done;
// We have all of the arguments setup at this point. We must not touch any register
// argument registers at this point (what if we save/restore them there are no oop?
{ {
SkipIfEqual skip(masm, &DTraceMethodProbes, false); unsigned long offset;
// protect the args we've loaded __ adrp(rscratch1, ExternalAddress((address)&DTraceMethodProbes), offset);
save_args(masm, total_c_args, c_arg, out_regs); __ ldrb(rscratch1, Address(rscratch1, offset));
__ mov_metadata(c_rarg1, method()); __ cbnzw(rscratch1, dtrace_method_entry);
__ call_VM_leaf( __ bind(dtrace_method_entry_done);
CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
rthread, c_rarg1);
restore_args(masm, total_c_args, c_arg, out_regs);
} }
// RedefineClasses() tracing support for obsolete method entry // RedefineClasses() tracing support for obsolete method entry
@ -1782,7 +1785,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
if (method->is_synchronized()) { if (method->is_synchronized()) {
assert(!is_critical_native, "unhandled"); assert(!is_critical_native, "unhandled");
const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
// Get the handle (the 2nd argument) // Get the handle (the 2nd argument)
@ -1838,7 +1840,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// Finally just about ready to make the JNI call // Finally just about ready to make the JNI call
// get JNIEnv* which is first argument to native // get JNIEnv* which is first argument to native
if (!is_critical_native) { if (!is_critical_native) {
__ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset()))); __ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset())));
@ -1904,14 +1905,17 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// Thread A is resumed to finish this native method, but doesn't block here since it // Thread A is resumed to finish this native method, but doesn't block here since it
// didn't see any synchronization is progress, and escapes. // didn't see any synchronization is progress, and escapes.
__ mov(rscratch1, _thread_in_native_trans); __ mov(rscratch1, _thread_in_native_trans);
__ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
__ stlrw(rscratch1, rscratch2);
if(os::is_MP()) { if(os::is_MP()) {
if (UseMembar) { if (UseMembar) {
__ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
// Force this write out before the read below // Force this write out before the read below
__ dmb(Assembler::SY); __ dmb(Assembler::SY);
} else { } else {
__ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
__ stlrw(rscratch1, rscratch2);
// Write serialization page so VM thread can do a pseudo remote membar. // Write serialization page so VM thread can do a pseudo remote membar.
// We use the current thread pointer to calculate a thread specific // We use the current thread pointer to calculate a thread specific
// offset to write to within the page. This minimizes bus traffic // offset to write to within the page. This minimizes bus traffic
@ -1920,54 +1924,23 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
} }
} }
Label after_transition;
// check for safepoint operation in progress and/or pending suspend requests // check for safepoint operation in progress and/or pending suspend requests
Label safepoint_in_progress, safepoint_in_progress_done;
{ {
Label Continue; assert(SafepointSynchronize::_not_synchronized == 0, "fix this code");
unsigned long offset;
{ unsigned long offset;
__ adrp(rscratch1, __ adrp(rscratch1,
ExternalAddress((address)SafepointSynchronize::address_of_state()), ExternalAddress((address)SafepointSynchronize::address_of_state()),
offset); offset);
__ ldrw(rscratch1, Address(rscratch1, offset)); __ ldrw(rscratch1, Address(rscratch1, offset));
} __ cbnzw(rscratch1, safepoint_in_progress);
__ cmpw(rscratch1, SafepointSynchronize::_not_synchronized);
Label L;
__ br(Assembler::NE, L);
__ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset())); __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset()));
__ cbz(rscratch1, Continue); __ cbnzw(rscratch1, safepoint_in_progress);
__ bind(L); __ bind(safepoint_in_progress_done);
// Don't use call_VM as it will see a possible pending exception and forward it
// and never return here preventing us from clearing _last_native_pc down below.
//
save_native_result(masm, ret_type, stack_slots);
__ mov(c_rarg0, rthread);
#ifndef PRODUCT
assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
#endif
if (!is_critical_native) {
__ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
} else {
__ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
}
__ blrt(rscratch1, 1, 0, 1);
__ maybe_isb();
// Restore any method result value
restore_native_result(masm, ret_type, stack_slots);
if (is_critical_native) {
// The call above performed the transition to thread_in_Java so
// skip the transition logic below.
__ b(after_transition);
}
__ bind(Continue);
} }
// change thread state // change thread state
Label after_transition;
__ mov(rscratch1, _thread_in_Java); __ mov(rscratch1, _thread_in_Java);
__ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
__ stlrw(rscratch1, rscratch2); __ stlrw(rscratch1, rscratch2);
@ -2024,16 +1997,15 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
} }
__ bind(done); __ bind(done);
} }
Label dtrace_method_exit, dtrace_method_exit_done;
{ {
SkipIfEqual skip(masm, &DTraceMethodProbes, false); unsigned long offset;
save_native_result(masm, ret_type, stack_slots); __ adrp(rscratch1, ExternalAddress((address)&DTraceMethodProbes), offset);
__ mov_metadata(c_rarg1, method()); __ ldrb(rscratch1, Address(rscratch1, offset));
__ call_VM_leaf( __ cbnzw(rscratch1, dtrace_method_exit);
CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), __ bind(dtrace_method_exit_done);
rthread, c_rarg1);
restore_native_result(masm, ret_type, stack_slots);
} }
__ reset_last_Java_frame(false, true); __ reset_last_Java_frame(false, true);
@ -2082,7 +2054,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// Slow path locking & unlocking // Slow path locking & unlocking
if (method->is_synchronized()) { if (method->is_synchronized()) {
// BEGIN Slow path lock __ block_comment("Slow path lock {");
__ bind(slow_path_lock); __ bind(slow_path_lock);
// has last_Java_frame setup. No exceptions so do vanilla call not call_VM // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
@ -2109,9 +2081,9 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
#endif #endif
__ b(lock_done); __ b(lock_done);
// END Slow path lock __ block_comment("} Slow path lock");
// BEGIN Slow path unlock __ block_comment("Slow path unlock {");
__ bind(slow_path_unlock); __ bind(slow_path_unlock);
// If we haven't already saved the native result we must save it now as xmm registers // If we haven't already saved the native result we must save it now as xmm registers
@ -2149,7 +2121,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
} }
__ b(unlock_done); __ b(unlock_done);
// END Slow path unlock __ block_comment("} Slow path unlock");
} // synchronized } // synchronized
@ -2162,6 +2134,69 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// and continue // and continue
__ b(reguard_done); __ b(reguard_done);
// SLOW PATH safepoint
{
__ block_comment("safepoint {");
__ bind(safepoint_in_progress);
// Don't use call_VM as it will see a possible pending exception and forward it
// and never return here preventing us from clearing _last_native_pc down below.
//
save_native_result(masm, ret_type, stack_slots);
__ mov(c_rarg0, rthread);
#ifndef PRODUCT
assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
#endif
if (!is_critical_native) {
__ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
} else {
__ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
}
__ blrt(rscratch1, 1, 0, 1);
__ maybe_isb();
// Restore any method result value
restore_native_result(masm, ret_type, stack_slots);
if (is_critical_native) {
// The call above performed the transition to thread_in_Java so
// skip the transition logic above.
__ b(after_transition);
}
__ b(safepoint_in_progress_done);
__ block_comment("} safepoint");
}
// SLOW PATH dtrace support
{
__ block_comment("dtrace entry {");
__ bind(dtrace_method_entry);
// We have all of the arguments setup at this point. We must not touch any register
// argument registers at this point (what if we save/restore them there are no oop?
save_args(masm, total_c_args, c_arg, out_regs);
__ mov_metadata(c_rarg1, method());
__ call_VM_leaf(
CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
rthread, c_rarg1);
restore_args(masm, total_c_args, c_arg, out_regs);
__ b(dtrace_method_entry_done);
__ block_comment("} dtrace entry");
}
{
__ block_comment("dtrace exit {");
__ bind(dtrace_method_exit);
save_native_result(masm, ret_type, stack_slots);
__ mov_metadata(c_rarg1, method());
__ call_VM_leaf(
CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
rthread, c_rarg1);
restore_native_result(masm, ret_type, stack_slots);
__ b(dtrace_method_exit_done);
__ block_comment("} dtrace exit");
}
__ flush(); __ flush();
@ -2742,7 +2777,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP); bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
// Save registers, fpu state, and flags // Save registers, fpu state, and flags
map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors);
// The following is basically a call_VM. However, we need the precise // The following is basically a call_VM. However, we need the precise
// address of the call in order to generate an oopmap. Hence, we do all the // address of the call in order to generate an oopmap. Hence, we do all the
@ -2793,7 +2828,7 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
__ bind(noException); __ bind(noException);
// Normal exit, restore registers and exit. // Normal exit, restore registers and exit.
RegisterSaver::restore_live_registers(masm); RegisterSaver::restore_live_registers(masm, save_vectors);
__ ret(lr); __ ret(lr);

View File

@ -721,8 +721,7 @@ address InterpreterGenerator::generate_Reference_get_entry(void) {
// generate a vanilla interpreter entry as the slow path // generate a vanilla interpreter entry as the slow path
__ bind(slow_path); __ bind(slow_path);
(void) generate_normal_entry(false); __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
return entry; return entry;
} }
#endif // INCLUDE_ALL_GCS #endif // INCLUDE_ALL_GCS
@ -779,12 +778,10 @@ address InterpreterGenerator::generate_CRC32_update_entry() {
// generate a vanilla native entry as the slow path // generate a vanilla native entry as the slow path
__ bind(slow_path); __ bind(slow_path);
__ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
(void) generate_native_entry(false);
return entry; return entry;
} }
return generate_native_entry(false); return NULL;
} }
/** /**
@ -841,12 +838,10 @@ address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpret
// generate a vanilla native entry as the slow path // generate a vanilla native entry as the slow path
__ bind(slow_path); __ bind(slow_path);
__ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
(void) generate_native_entry(false);
return entry; return entry;
} }
return generate_native_entry(false); return NULL;
} }
void InterpreterGenerator::bang_stack_shadow_pages(bool native_call) { void InterpreterGenerator::bang_stack_shadow_pages(bool native_call) {

View File

@ -60,6 +60,7 @@ define_pd_global(intx, LoopUnrollLimit, 60);
define_pd_global(bool, OptoPeephole, false); define_pd_global(bool, OptoPeephole, false);
define_pd_global(bool, UseCISCSpill, false); define_pd_global(bool, UseCISCSpill, false);
define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoBundling, false);
define_pd_global(bool, OptoRegScheduling, false);
// GL: // GL:
// Detected a problem with unscaled compressed oops and // Detected a problem with unscaled compressed oops and
// narrow_oop_use_complex_address() == false. // narrow_oop_use_complex_address() == false.

View File

@ -46,7 +46,7 @@ void InterpreterMacroAssembler::null_check_throw(Register a, int offset, Registe
MacroAssembler::null_check_throw(a, offset, temp_reg, exception_entry); MacroAssembler::null_check_throw(a, offset, temp_reg, exception_entry);
} }
void InterpreterMacroAssembler::branch_to_entry(address entry, Register Rscratch) { void InterpreterMacroAssembler::jump_to_entry(address entry, Register Rscratch) {
assert(entry, "Entry must have been generated by now"); assert(entry, "Entry must have been generated by now");
if (is_within_range_of_b(entry, pc())) { if (is_within_range_of_b(entry, pc())) {
b(entry); b(entry);

View File

@ -39,7 +39,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
void null_check_throw(Register a, int offset, Register temp_reg); void null_check_throw(Register a, int offset, Register temp_reg);
void branch_to_entry(address entry, Register Rscratch); void jump_to_entry(address entry, Register Rscratch);
// Handy address generation macros. // Handy address generation macros.
#define thread_(field_name) in_bytes(JavaThread::field_name ## _offset()), R16_thread #define thread_(field_name) in_bytes(JavaThread::field_name ## _offset()), R16_thread

View File

@ -31,12 +31,12 @@
private: private:
address generate_abstract_entry(void); address generate_abstract_entry(void);
address generate_jump_to_normal_entry(void); address generate_accessor_entry(void) { return NULL; }
address generate_accessor_entry(void) { return generate_jump_to_normal_entry(); } address generate_empty_entry(void) { return NULL; }
address generate_empty_entry(void) { return generate_jump_to_normal_entry(); }
address generate_Reference_get_entry(void); address generate_Reference_get_entry(void);
address generate_CRC32_update_entry(); address generate_CRC32_update_entry();
address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind); address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
#endif // CPU_PPC_VM_INTERPRETERGENERATOR_PPC_HPP #endif // CPU_PPC_VM_INTERPRETERGENERATOR_PPC_HPP

View File

@ -427,18 +427,6 @@ address AbstractInterpreterGenerator::generate_result_handler_for(BasicType type
return entry; return entry;
} }
// Call an accessor method (assuming it is resolved, otherwise drop into
// vanilla (slow path) entry.
address InterpreterGenerator::generate_jump_to_normal_entry(void) {
address entry = __ pc();
address normal_entry = Interpreter::entry_for_kind(Interpreter::zerolocals);
assert(normal_entry != NULL, "should already be generated.");
__ branch_to_entry(normal_entry, R11_scratch1);
__ flush();
return entry;
}
// Abstract method entry. // Abstract method entry.
// //
address InterpreterGenerator::generate_abstract_entry(void) { address InterpreterGenerator::generate_abstract_entry(void) {
@ -529,12 +517,12 @@ address InterpreterGenerator::generate_Reference_get_entry(void) {
// regular method entry code to generate the NPE. // regular method entry code to generate the NPE.
// //
if (UseG1GC) {
address entry = __ pc(); address entry = __ pc();
const int referent_offset = java_lang_ref_Reference::referent_offset; const int referent_offset = java_lang_ref_Reference::referent_offset;
guarantee(referent_offset > 0, "referent offset not initialized"); guarantee(referent_offset > 0, "referent offset not initialized");
if (UseG1GC) {
Label slow_path; Label slow_path;
// Debugging not possible, so can't use __ skip_if_jvmti_mode(slow_path, GR31_SCRATCH); // Debugging not possible, so can't use __ skip_if_jvmti_mode(slow_path, GR31_SCRATCH);
@ -577,13 +565,11 @@ address InterpreterGenerator::generate_Reference_get_entry(void) {
// Generate regular method entry. // Generate regular method entry.
__ bind(slow_path); __ bind(slow_path);
__ branch_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), R11_scratch1); __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), R11_scratch1);
__ flush();
return entry; return entry;
} else {
return generate_jump_to_normal_entry();
} }
return NULL;
} }
void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) { void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) {

View File

@ -2064,6 +2064,10 @@ const bool Matcher::match_rule_supported(int opcode) {
return true; // Per default match rules are supported. return true; // Per default match rules are supported.
} }
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}
int Matcher::regnum_to_fpu_offset(int regnum) { int Matcher::regnum_to_fpu_offset(int regnum) {
// No user for this method? // No user for this method?
Unimplemented(); Unimplemented();

View File

@ -620,7 +620,7 @@ inline bool math_entry_available(AbstractInterpreter::MethodKind kind) {
address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
if (!math_entry_available(kind)) { if (!math_entry_available(kind)) {
NOT_PRODUCT(__ should_not_reach_here();) NOT_PRODUCT(__ should_not_reach_here();)
return Interpreter::entry_for_kind(Interpreter::zerolocals); return NULL;
} }
address entry = __ pc(); address entry = __ pc();
@ -1126,14 +1126,6 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
generate_fixed_frame(false, Rsize_of_parameters, Rsize_of_locals); generate_fixed_frame(false, Rsize_of_parameters, Rsize_of_locals);
#ifdef FAST_DISPATCH
__ unimplemented("Fast dispatch in generate_normal_entry");
#if 0
__ set((intptr_t)Interpreter::dispatch_table(), IdispatchTables);
// Set bytecode dispatch table base.
#endif
#endif
// -------------------------------------------------------------------------- // --------------------------------------------------------------------------
// Zero out non-parameter locals. // Zero out non-parameter locals.
// Note: *Always* zero out non-parameter locals as Sparc does. It's not // Note: *Always* zero out non-parameter locals as Sparc does. It's not
@ -1266,9 +1258,8 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
* int java.util.zip.CRC32.update(int crc, int b) * int java.util.zip.CRC32.update(int crc, int b)
*/ */
address InterpreterGenerator::generate_CRC32_update_entry() { address InterpreterGenerator::generate_CRC32_update_entry() {
address start = __ pc(); // Remember stub start address (is rtn value).
if (UseCRC32Intrinsics) { if (UseCRC32Intrinsics) {
address start = __ pc(); // Remember stub start address (is rtn value).
Label slow_path; Label slow_path;
// Safepoint check // Safepoint check
@ -1313,11 +1304,11 @@ address InterpreterGenerator::generate_CRC32_update_entry() {
// Generate a vanilla native entry as the slow path. // Generate a vanilla native entry as the slow path.
BLOCK_COMMENT("} CRC32_update"); BLOCK_COMMENT("} CRC32_update");
BIND(slow_path); BIND(slow_path);
__ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native), R11_scratch1);
return start;
} }
(void) generate_native_entry(false); return NULL;
return start;
} }
// CRC32 Intrinsics. // CRC32 Intrinsics.
@ -1327,9 +1318,8 @@ address InterpreterGenerator::generate_CRC32_update_entry() {
* int java.util.zip.CRC32.updateByteBuffer(int crc, long* buf, int off, int len) * int java.util.zip.CRC32.updateByteBuffer(int crc, long* buf, int off, int len)
*/ */
address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
address start = __ pc(); // Remember stub start address (is rtn value).
if (UseCRC32Intrinsics) { if (UseCRC32Intrinsics) {
address start = __ pc(); // Remember stub start address (is rtn value).
Label slow_path; Label slow_path;
// Safepoint check // Safepoint check
@ -1406,11 +1396,11 @@ address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpret
// Generate a vanilla native entry as the slow path. // Generate a vanilla native entry as the slow path.
BLOCK_COMMENT("} CRC32_updateBytes(Buffer)"); BLOCK_COMMENT("} CRC32_updateBytes(Buffer)");
BIND(slow_path); BIND(slow_path);
__ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native), R11_scratch1);
return start;
} }
(void) generate_native_entry(false); return NULL;
return start;
} }
// These should never be compiled since the interpreter will prefer // These should never be compiled since the interpreter will prefer

View File

@ -64,6 +64,7 @@ define_pd_global(bool, OptoPeephole, false);
define_pd_global(bool, UseCISCSpill, false); define_pd_global(bool, UseCISCSpill, false);
define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoBundling, false);
define_pd_global(bool, OptoScheduling, true); define_pd_global(bool, OptoScheduling, true);
define_pd_global(bool, OptoRegScheduling, false);
#ifdef _LP64 #ifdef _LP64
// We need to make sure that all generated code is within // We need to make sure that all generated code is within

View File

@ -468,7 +468,7 @@ address InterpreterGenerator::generate_Reference_get_entry(void) {
// If G1 is not enabled then attempt to go through the accessor entry point // If G1 is not enabled then attempt to go through the accessor entry point
// Reference.get is an accessor // Reference.get is an accessor
return generate_jump_to_normal_entry(); return NULL;
} }
// //

View File

@ -59,6 +59,13 @@ const Address InterpreterMacroAssembler::d_tmp(FP, (frame::interpreter_frame_d_s
#endif // CC_INTERP #endif // CC_INTERP
void InterpreterMacroAssembler::jump_to_entry(address entry) {
assert(entry, "Entry must have been generated by now");
AddressLiteral al(entry);
jump_to(al, G3_scratch);
delayed()->nop();
}
void InterpreterMacroAssembler::compute_extra_locals_size_in_bytes(Register args_size, Register locals_size, Register delta) { void InterpreterMacroAssembler::compute_extra_locals_size_in_bytes(Register args_size, Register locals_size, Register delta) {
// Note: this algorithm is also used by C1's OSR entry sequence. // Note: this algorithm is also used by C1's OSR entry sequence.
// Any changes should also be applied to CodeEmitter::emit_osr_entry(). // Any changes should also be applied to CodeEmitter::emit_osr_entry().

View File

@ -80,6 +80,8 @@ class InterpreterMacroAssembler: public MacroAssembler {
InterpreterMacroAssembler(CodeBuffer* c) InterpreterMacroAssembler(CodeBuffer* c)
: MacroAssembler(c) {} : MacroAssembler(c) {}
void jump_to_entry(address entry);
#ifndef CC_INTERP #ifndef CC_INTERP
virtual void load_earlyret_value(TosState state); virtual void load_earlyret_value(TosState state);

View File

@ -34,9 +34,8 @@
address generate_abstract_entry(void); address generate_abstract_entry(void);
// there are no math intrinsics on sparc // there are no math intrinsics on sparc
address generate_math_entry(AbstractInterpreter::MethodKind kind) { return NULL; } address generate_math_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
address generate_jump_to_normal_entry(void); address generate_accessor_entry(void) { return NULL; }
address generate_accessor_entry(void) { return generate_jump_to_normal_entry(); } address generate_empty_entry(void) { return NULL; }
address generate_empty_entry(void) { return generate_jump_to_normal_entry(); }
address generate_Reference_get_entry(void); address generate_Reference_get_entry(void);
void lock_method(void); void lock_method(void);
void save_native_result(void); void save_native_result(void);
@ -48,4 +47,5 @@
// Not supported // Not supported
address generate_CRC32_update_entry() { return NULL; } address generate_CRC32_update_entry() { return NULL; }
address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; } address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
#endif // CPU_SPARC_VM_INTERPRETERGENERATOR_SPARC_HPP #endif // CPU_SPARC_VM_INTERPRETERGENERATOR_SPARC_HPP

View File

@ -241,15 +241,6 @@ void InterpreterGenerator::generate_counter_overflow(Label& Lcontinue) {
// Various method entries // Various method entries
address InterpreterGenerator::generate_jump_to_normal_entry(void) {
address entry = __ pc();
assert(Interpreter::entry_for_kind(Interpreter::zerolocals) != NULL, "should already be generated");
AddressLiteral al(Interpreter::entry_for_kind(Interpreter::zerolocals));
__ jump_to(al, G3_scratch);
__ delayed()->nop();
return entry;
}
// Abstract method entry // Abstract method entry
// Attempt to execute abstract method. Throw exception // Attempt to execute abstract method. Throw exception
// //

View File

@ -1860,6 +1860,10 @@ const bool Matcher::match_rule_supported(int opcode) {
return true; // Per default match rules are supported. return true; // Per default match rules are supported.
} }
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}
int Matcher::regnum_to_fpu_offset(int regnum) { int Matcher::regnum_to_fpu_offset(int regnum) {
return regnum - 32; // The FP registers are in the second chunk return regnum - 32; // The FP registers are in the second chunk
} }

View File

@ -779,14 +779,14 @@ address InterpreterGenerator::generate_Reference_get_entry(void) {
// Generate regular method entry // Generate regular method entry
__ bind(slow_path); __ bind(slow_path);
(void) generate_normal_entry(false); __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
return entry; return entry;
} }
#endif // INCLUDE_ALL_GCS #endif // INCLUDE_ALL_GCS
// If G1 is not enabled then attempt to go through the accessor entry point // If G1 is not enabled then attempt to go through the accessor entry point
// Reference.get is an accessor // Reference.get is an accessor
return generate_jump_to_normal_entry(); return NULL;
} }
// //

View File

@ -770,6 +770,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
case 0x55: // andnps case 0x55: // andnps
case 0x56: // orps case 0x56: // orps
case 0x57: // xorps case 0x57: // xorps
case 0x59: //mulpd
case 0x6E: // movd case 0x6E: // movd
case 0x7E: // movd case 0x7E: // movd
case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
@ -1604,6 +1605,85 @@ void Assembler::cpuid() {
emit_int8((unsigned char)0xA2); emit_int8((unsigned char)0xA2);
} }
// Opcode / Instruction Op / En 64 - Bit Mode Compat / Leg Mode Description Implemented
// F2 0F 38 F0 / r CRC32 r32, r / m8 RM Valid Valid Accumulate CRC32 on r / m8. v
// F2 REX 0F 38 F0 / r CRC32 r32, r / m8* RM Valid N.E. Accumulate CRC32 on r / m8. -
// F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8 RM Valid N.E. Accumulate CRC32 on r / m8. -
//
// F2 0F 38 F1 / r CRC32 r32, r / m16 RM Valid Valid Accumulate CRC32 on r / m16. v
//
// F2 0F 38 F1 / r CRC32 r32, r / m32 RM Valid Valid Accumulate CRC32 on r / m32. v
//
// F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64 RM Valid N.E. Accumulate CRC32 on r / m64. v
void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
assert(VM_Version::supports_sse4_2(), "");
int8_t w = 0x01;
Prefix p = Prefix_EMPTY;
emit_int8((int8_t)0xF2);
switch (sizeInBytes) {
case 1:
w = 0;
break;
case 2:
case 4:
break;
LP64_ONLY(case 8:)
// This instruction is not valid in 32 bits
// Note:
// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
//
// Page B - 72 Vol. 2C says
// qwreg2 to qwreg 1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
// mem64 to qwreg 1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
// F0!!!
// while 3 - 208 Vol. 2A
// F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64 RM Valid N.E.Accumulate CRC32 on r / m64.
//
// the 0 on a last bit is reserved for a different flavor of this instruction :
// F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8 RM Valid N.E.Accumulate CRC32 on r / m8.
p = REX_W;
break;
default:
assert(0, "Unsupported value for a sizeInBytes argument");
break;
}
LP64_ONLY(prefix(crc, v, p);)
emit_int8((int8_t)0x0F);
emit_int8(0x38);
emit_int8((int8_t)(0xF0 | w));
emit_int8(0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
}
void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
assert(VM_Version::supports_sse4_2(), "");
InstructionMark im(this);
int8_t w = 0x01;
Prefix p = Prefix_EMPTY;
emit_int8((int8_t)0xF2);
switch (sizeInBytes) {
case 1:
w = 0;
break;
case 2:
case 4:
break;
LP64_ONLY(case 8:)
// This instruction is not valid in 32 bits
p = REX_W;
break;
default:
assert(0, "Unsupported value for a sizeInBytes argument");
break;
}
LP64_ONLY(prefix(crc, adr, p);)
emit_int8((int8_t)0x0F);
emit_int8(0x38);
emit_int8((int8_t)(0xF0 | w));
emit_operand(crc, adr);
}
void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, /* legacy_mode */ true); emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, /* legacy_mode */ true);
@ -2951,6 +3031,15 @@ void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
emit_int8(imm8); emit_int8(imm8);
} }
void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse2(), "");
int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true,
VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
emit_int8(0x15);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(imm8);
}
void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) { void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
assert(VM_Version::supports_sse4_1(), ""); assert(VM_Version::supports_sse4_1(), "");
int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true, int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
@ -2969,6 +3058,15 @@ void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
emit_int8(imm8); emit_int8(imm8);
} }
void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
assert(VM_Version::supports_sse2(), "");
int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
emit_int8((unsigned char)0xC4);
emit_int8((unsigned char)(0xC0 | encode));
emit_int8(imm8);
}
void Assembler::pmovzxbw(XMMRegister dst, Address src) { void Assembler::pmovzxbw(XMMRegister dst, Address src) {
assert(VM_Version::supports_sse4_1(), ""); assert(VM_Version::supports_sse4_1(), "");
if (VM_Version::supports_evex()) { if (VM_Version::supports_evex()) {
@ -3984,6 +4082,16 @@ void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
} }
} }
void Assembler::mulpd(XMMRegister dst, Address src) {
_instruction_uses_vl = true;
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
if (VM_Version::supports_evex()) {
emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
} else {
emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
}
}
void Assembler::mulps(XMMRegister dst, XMMRegister src) { void Assembler::mulps(XMMRegister dst, XMMRegister src) {
_instruction_uses_vl = true; _instruction_uses_vl = true;
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@ -4172,6 +4280,26 @@ void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector
emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq); emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
} }
void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
_instruction_uses_vl = true;
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
if (VM_Version::supports_evex()) {
emit_simd_arith_q(0x15, dst, src, VEX_SIMD_66);
} else {
emit_simd_arith(0x15, dst, src, VEX_SIMD_66);
}
}
void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
_instruction_uses_vl = true;
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
if (VM_Version::supports_evex()) {
emit_simd_arith_q(0x14, dst, src, VEX_SIMD_66);
} else {
emit_simd_arith(0x14, dst, src, VEX_SIMD_66);
}
}
void Assembler::xorpd(XMMRegister dst, XMMRegister src) { void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
if (VM_Version::supports_avx512dq()) { if (VM_Version::supports_avx512dq()) {
@ -4792,8 +4920,9 @@ void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int
} }
// AND packed integers // logical operations packed integers
void Assembler::pand(XMMRegister dst, XMMRegister src) { void Assembler::pand(XMMRegister dst, XMMRegister src) {
_instruction_uses_vl = true;
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
emit_simd_arith(0xDB, dst, src, VEX_SIMD_66); emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
} }
@ -4814,6 +4943,17 @@ void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_
emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len); emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
} }
void Assembler::pandn(XMMRegister dst, XMMRegister src) {
_instruction_uses_vl = true;
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
if (VM_Version::supports_evex()) {
emit_simd_arith_q(0xDF, dst, src, VEX_SIMD_66);
}
else {
emit_simd_arith(0xDF, dst, src, VEX_SIMD_66);
}
}
void Assembler::por(XMMRegister dst, XMMRegister src) { void Assembler::por(XMMRegister dst, XMMRegister src) {
_instruction_uses_vl = true; _instruction_uses_vl = true;
NOT_LP64(assert(VM_Version::supports_sse2(), "")); NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@ -6223,6 +6363,14 @@ void Assembler::shldl(Register dst, Register src) {
emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding())); emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
} }
// 0F A4 / r ib
void Assembler::shldl(Register dst, Register src, int8_t imm8) {
emit_int8(0x0F);
emit_int8((unsigned char)0xA4);
emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
emit_int8(imm8);
}
void Assembler::shrdl(Register dst, Register src) { void Assembler::shrdl(Register dst, Register src) {
emit_int8(0x0F); emit_int8(0x0F);
emit_int8((unsigned char)0xAD); emit_int8((unsigned char)0xAD);
@ -6408,6 +6556,40 @@ void Assembler::prefix(Register reg) {
} }
} }
void Assembler::prefix(Register dst, Register src, Prefix p) {
if (src->encoding() >= 8) {
p = (Prefix)(p | REX_B);
}
if (dst->encoding() >= 8) {
p = (Prefix)( p | REX_R);
}
if (p != Prefix_EMPTY) {
// do not generate an empty prefix
prefix(p);
}
}
void Assembler::prefix(Register dst, Address adr, Prefix p) {
if (adr.base_needs_rex()) {
if (adr.index_needs_rex()) {
assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
} else {
prefix(REX_B);
}
} else {
if (adr.index_needs_rex()) {
assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
}
}
if (dst->encoding() >= 8) {
p = (Prefix)(p | REX_R);
}
if (p != Prefix_EMPTY) {
// do not generate an empty prefix
prefix(p);
}
}
void Assembler::prefix(Address adr) { void Assembler::prefix(Address adr) {
if (adr.base_needs_rex()) { if (adr.base_needs_rex()) {
if (adr.index_needs_rex()) { if (adr.index_needs_rex()) {

View File

@ -506,7 +506,8 @@ class Assembler : public AbstractAssembler {
VEX_3bytes = 0xC4, VEX_3bytes = 0xC4,
VEX_2bytes = 0xC5, VEX_2bytes = 0xC5,
EVEX_4bytes = 0x62 EVEX_4bytes = 0x62,
Prefix_EMPTY = 0x0
}; };
enum VexPrefix { enum VexPrefix {
@ -615,6 +616,8 @@ private:
int prefixq_and_encode(int dst_enc, int src_enc); int prefixq_and_encode(int dst_enc, int src_enc);
void prefix(Register reg); void prefix(Register reg);
void prefix(Register dst, Register src, Prefix p);
void prefix(Register dst, Address adr, Prefix p);
void prefix(Address adr); void prefix(Address adr);
void prefixq(Address adr); void prefixq(Address adr);
@ -1177,6 +1180,10 @@ private:
// Identify processor type and features // Identify processor type and features
void cpuid(); void cpuid();
// CRC32C
void crc32(Register crc, Register v, int8_t sizeInBytes);
void crc32(Register crc, Address adr, int8_t sizeInBytes);
// Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
void cvtsd2ss(XMMRegister dst, XMMRegister src); void cvtsd2ss(XMMRegister dst, XMMRegister src);
void cvtsd2ss(XMMRegister dst, Address src); void cvtsd2ss(XMMRegister dst, Address src);
@ -1672,10 +1679,14 @@ private:
// SSE 4.1 extract // SSE 4.1 extract
void pextrd(Register dst, XMMRegister src, int imm8); void pextrd(Register dst, XMMRegister src, int imm8);
void pextrq(Register dst, XMMRegister src, int imm8); void pextrq(Register dst, XMMRegister src, int imm8);
// SSE 2 extract
void pextrw(Register dst, XMMRegister src, int imm8);
// SSE 4.1 insert // SSE 4.1 insert
void pinsrd(XMMRegister dst, Register src, int imm8); void pinsrd(XMMRegister dst, Register src, int imm8);
void pinsrq(XMMRegister dst, Register src, int imm8); void pinsrq(XMMRegister dst, Register src, int imm8);
// SSE 2 insert
void pinsrw(XMMRegister dst, Register src, int imm8);
// SSE4.1 packed move // SSE4.1 packed move
void pmovzxbw(XMMRegister dst, XMMRegister src); void pmovzxbw(XMMRegister dst, XMMRegister src);
@ -1783,6 +1794,7 @@ private:
void setb(Condition cc, Register dst); void setb(Condition cc, Register dst);
void shldl(Register dst, Register src); void shldl(Register dst, Register src);
void shldl(Register dst, Register src, int8_t imm8);
void shll(Register dst, int imm8); void shll(Register dst, int imm8);
void shll(Register dst); void shll(Register dst);
@ -1925,6 +1937,7 @@ private:
// Multiply Packed Floating-Point Values // Multiply Packed Floating-Point Values
void mulpd(XMMRegister dst, XMMRegister src); void mulpd(XMMRegister dst, XMMRegister src);
void mulpd(XMMRegister dst, Address src);
void mulps(XMMRegister dst, XMMRegister src); void mulps(XMMRegister dst, XMMRegister src);
void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@ -1951,6 +1964,9 @@ private:
void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void unpckhpd(XMMRegister dst, XMMRegister src);
void unpcklpd(XMMRegister dst, XMMRegister src);
// Bitwise Logical XOR of Packed Floating-Point Values // Bitwise Logical XOR of Packed Floating-Point Values
void xorpd(XMMRegister dst, XMMRegister src); void xorpd(XMMRegister dst, XMMRegister src);
void xorps(XMMRegister dst, XMMRegister src); void xorps(XMMRegister dst, XMMRegister src);
@ -2046,6 +2062,9 @@ private:
void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
// Andn packed integers
void pandn(XMMRegister dst, XMMRegister src);
// Or packed integers // Or packed integers
void por(XMMRegister dst, XMMRegister src); void por(XMMRegister dst, XMMRegister src);
void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);

View File

@ -37,6 +37,8 @@ inline int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst)
inline int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { return dst_enc << 3 | src_enc; } inline int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { return dst_enc << 3 | src_enc; }
inline void Assembler::prefix(Register reg) {} inline void Assembler::prefix(Register reg) {}
inline void Assembler::prefix(Register dst, Register src, Prefix p) {}
inline void Assembler::prefix(Register dst, Address adr, Prefix p) {}
inline void Assembler::prefix(Address adr) {} inline void Assembler::prefix(Address adr) {}
inline void Assembler::prefixq(Address adr) {} inline void Assembler::prefixq(Address adr) {}

View File

@ -2457,9 +2457,6 @@ void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, L
// Should consider not saving rbx, if not necessary // Should consider not saving rbx, if not necessary
__ trigfunc('t', op->as_Op2()->fpu_stack_size()); __ trigfunc('t', op->as_Op2()->fpu_stack_size());
break; break;
case lir_exp :
__ exp_with_fallback(op->as_Op2()->fpu_stack_size());
break;
case lir_pow : case lir_pow :
__ pow_with_fallback(op->as_Op2()->fpu_stack_size()); __ pow_with_fallback(op->as_Op2()->fpu_stack_size());
break; break;

View File

@ -808,6 +808,12 @@ void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {
void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), "wrong type"); assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), "wrong type");
if (x->id() == vmIntrinsics::_dexp) {
do_ExpIntrinsic(x);
return;
}
LIRItem value(x->argument_at(0), this); LIRItem value(x->argument_at(0), this);
bool use_fpu = false; bool use_fpu = false;
@ -818,7 +824,6 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
case vmIntrinsics::_dtan: case vmIntrinsics::_dtan:
case vmIntrinsics::_dlog: case vmIntrinsics::_dlog:
case vmIntrinsics::_dlog10: case vmIntrinsics::_dlog10:
case vmIntrinsics::_dexp:
case vmIntrinsics::_dpow: case vmIntrinsics::_dpow:
use_fpu = true; use_fpu = true;
} }
@ -870,7 +875,6 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
case vmIntrinsics::_dtan: __ tan (calc_input, calc_result, tmp1, tmp2); break; case vmIntrinsics::_dtan: __ tan (calc_input, calc_result, tmp1, tmp2); break;
case vmIntrinsics::_dlog: __ log (calc_input, calc_result, tmp1); break; case vmIntrinsics::_dlog: __ log (calc_input, calc_result, tmp1); break;
case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1); break; case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1); break;
case vmIntrinsics::_dexp: __ exp (calc_input, calc_result, tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break;
case vmIntrinsics::_dpow: __ pow (calc_input, calc_input2, calc_result, tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break; case vmIntrinsics::_dpow: __ pow (calc_input, calc_input2, calc_result, tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break;
default: ShouldNotReachHere(); default: ShouldNotReachHere();
} }
@ -880,6 +884,32 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
} }
} }
void LIRGenerator::do_ExpIntrinsic(Intrinsic* x) {
LIRItem value(x->argument_at(0), this);
value.set_destroys_register();
LIR_Opr calc_result = rlock_result(x);
LIR_Opr result_reg = result_register_for(x->type());
BasicTypeList signature(1);
signature.append(T_DOUBLE);
CallingConvention* cc = frame_map()->c_calling_convention(&signature);
value.load_item_force(cc->at(0));
#ifndef _LP64
LIR_Opr tmp = FrameMap::fpu0_double_opr;
result_reg = tmp;
if (VM_Version::supports_sse2()) {
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
}
#else
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
#endif
__ move(result_reg, calc_result);
}
void LIRGenerator::do_ArrayCopy(Intrinsic* x) { void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
assert(x->number_of_arguments() == 5, "wrong type"); assert(x->number_of_arguments() == 5, "wrong type");

View File

@ -814,8 +814,7 @@ void FpuStackAllocator::handle_op2(LIR_Op2* op2) {
case lir_tan: case lir_tan:
case lir_sin: case lir_sin:
case lir_cos: case lir_cos: {
case lir_exp: {
// sin, cos and exp need two temporary fpu stack slots, so there are two temporary // sin, cos and exp need two temporary fpu stack slots, so there are two temporary
// registers (stored in right and temp of the operation). // registers (stored in right and temp of the operation).
// the stack allocator must guarantee that the stack slots are really free, // the stack allocator must guarantee that the stack slots are really free,

View File

@ -48,11 +48,11 @@ define_pd_global(intx, CompileThreshold, 10000);
define_pd_global(intx, OnStackReplacePercentage, 140); define_pd_global(intx, OnStackReplacePercentage, 140);
define_pd_global(intx, ConditionalMoveLimit, 3); define_pd_global(intx, ConditionalMoveLimit, 3);
define_pd_global(intx, FLOATPRESSURE, 6);
define_pd_global(intx, FreqInlineSize, 325); define_pd_global(intx, FreqInlineSize, 325);
define_pd_global(intx, MinJumpTableSize, 10); define_pd_global(intx, MinJumpTableSize, 10);
#ifdef AMD64 #ifdef AMD64
define_pd_global(intx, INTPRESSURE, 13); define_pd_global(intx, INTPRESSURE, 13);
define_pd_global(intx, FLOATPRESSURE, 14);
define_pd_global(intx, InteriorEntryAlignment, 16); define_pd_global(intx, InteriorEntryAlignment, 16);
define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K)); define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K));
define_pd_global(intx, LoopUnrollLimit, 60); define_pd_global(intx, LoopUnrollLimit, 60);
@ -64,6 +64,7 @@ define_pd_global(intx, CodeCacheExpansionSize, 64*K);
define_pd_global(uint64_t, MaxRAM, 128ULL*G); define_pd_global(uint64_t, MaxRAM, 128ULL*G);
#else #else
define_pd_global(intx, INTPRESSURE, 6); define_pd_global(intx, INTPRESSURE, 6);
define_pd_global(intx, FLOATPRESSURE, 6);
define_pd_global(intx, InteriorEntryAlignment, 4); define_pd_global(intx, InteriorEntryAlignment, 4);
define_pd_global(size_t, NewSizeThreadIncrease, 4*K); define_pd_global(size_t, NewSizeThreadIncrease, 4*K);
define_pd_global(intx, LoopUnrollLimit, 50); // Design center runs on 1.3.1 define_pd_global(intx, LoopUnrollLimit, 50); // Design center runs on 1.3.1
@ -82,6 +83,7 @@ define_pd_global(bool, OptoPeephole, true);
define_pd_global(bool, UseCISCSpill, true); define_pd_global(bool, UseCISCSpill, true);
define_pd_global(bool, OptoScheduling, false); define_pd_global(bool, OptoScheduling, false);
define_pd_global(bool, OptoBundling, false); define_pd_global(bool, OptoBundling, false);
define_pd_global(bool, OptoRegScheduling, true);
define_pd_global(intx, ReservedCodeCacheSize, 48*M); define_pd_global(intx, ReservedCodeCacheSize, 48*M);
define_pd_global(intx, NonProfiledCodeHeapSize, 21*M); define_pd_global(intx, NonProfiledCodeHeapSize, 21*M);

View File

@ -807,7 +807,7 @@ address InterpreterGenerator::generate_Reference_get_entry(void) {
// If G1 is not enabled then attempt to go through the accessor entry point // If G1 is not enabled then attempt to go through the accessor entry point
// Reference.get is an accessor // Reference.get is an accessor
return generate_jump_to_normal_entry(); return NULL;
} }
// //

View File

@ -0,0 +1,66 @@
/*
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
enum {
// S. Gueron / Information Processing Letters 112 (2012) 184
// shows than anything above 6K and below 32K is a good choice
// 32K does not deliver any further performance gains
// 6K=8*256 (*3 as we compute 3 blocks together)
//
// Thus selecting the smallest value so it could apply to the largest number
// of buffer sizes.
CRC32C_HIGH = 8 * 256,
// empirical
// based on ubench study using methodology described in
// V. Gopal et al. / Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction April 2011 8
//
// arbitrary value between 27 and 256
CRC32C_MIDDLE = 8 * 86,
// V. Gopal et al. / Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction April 2011 9
// shows that 240 and 1024 are equally good choices as the 216==8*27
//
// Selecting the smallest value which resulted in a significant performance improvement over
// sequential version
CRC32C_LOW = 8 * 27,
CRC32C_NUM_ChunkSizeInBytes = 3,
// We need to compute powers of 64N and 128N for each "chunk" size
CRC32C_NUM_PRECOMPUTED_CONSTANTS = ( 2 * CRC32C_NUM_ChunkSizeInBytes )
};
// Notes:
// 1. Why we need to choose a "chunk" approach?
// Overhead of computing a powers and powers of for an arbitrary buffer of size N is significant
// (implementation approaches a library perf.)
// 2. Why only 3 "chunks"?
// Performance experiments results showed that a HIGH+LOW was not delivering a stable speedup
// curve.
//
// Disclaimer:
// If you ever decide to increase/decrease number of "chunks" be sure to modify
// a) constants table generation (hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp)
// b) constant fetch from that table (macroAssembler_x86.cpp)
// c) unrolled for loop (macroAssembler_x86.cpp)

View File

@ -40,6 +40,11 @@
// Implementation of InterpreterMacroAssembler // Implementation of InterpreterMacroAssembler
void InterpreterMacroAssembler::jump_to_entry(address entry) {
assert(entry, "Entry must have been generated by now");
jump(RuntimeAddress(entry));
}
#ifndef CC_INTERP #ifndef CC_INTERP
void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) { void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) {
Label update, next, none; Label update, next, none;

View File

@ -60,6 +60,8 @@ class InterpreterMacroAssembler: public MacroAssembler {
_locals_register(LP64_ONLY(r14) NOT_LP64(rdi)), _locals_register(LP64_ONLY(r14) NOT_LP64(rdi)),
_bcp_register(LP64_ONLY(r13) NOT_LP64(rsi)) {} _bcp_register(LP64_ONLY(r13) NOT_LP64(rsi)) {}
void jump_to_entry(address entry);
void load_earlyret_value(TosState state); void load_earlyret_value(TosState state);
#ifdef CC_INTERP #ifdef CC_INTERP

View File

@ -31,17 +31,6 @@
#define __ _masm-> #define __ _masm->
// Jump into normal path for accessor and empty entry to jump to normal entry
// The "fast" optimization don't update compilation count therefore can disable inlining
// for these functions that should be inlined.
address InterpreterGenerator::generate_jump_to_normal_entry(void) {
address entry_point = __ pc();
assert(Interpreter::entry_for_kind(Interpreter::zerolocals) != NULL, "should already be generated");
__ jump(RuntimeAddress(Interpreter::entry_for_kind(Interpreter::zerolocals)));
return entry_point;
}
// Abstract method entry // Abstract method entry
// Attempt to execute abstract method. Throw exception // Attempt to execute abstract method. Throw exception
address InterpreterGenerator::generate_abstract_entry(void) { address InterpreterGenerator::generate_abstract_entry(void) {

View File

@ -36,12 +36,12 @@
address generate_native_entry(bool synchronized); address generate_native_entry(bool synchronized);
address generate_abstract_entry(void); address generate_abstract_entry(void);
address generate_math_entry(AbstractInterpreter::MethodKind kind); address generate_math_entry(AbstractInterpreter::MethodKind kind);
address generate_jump_to_normal_entry(void); address generate_accessor_entry(void) { return NULL; }
address generate_accessor_entry(void) { return generate_jump_to_normal_entry(); } address generate_empty_entry(void) { return NULL; }
address generate_empty_entry(void) { return generate_jump_to_normal_entry(); }
address generate_Reference_get_entry(); address generate_Reference_get_entry();
address generate_CRC32_update_entry(); address generate_CRC32_update_entry();
address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind); address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind);
#ifndef _LP64 #ifndef _LP64
address generate_Float_intBitsToFloat_entry(); address generate_Float_intBitsToFloat_entry();
address generate_Float_floatToRawIntBits_entry(); address generate_Float_floatToRawIntBits_entry();

View File

@ -151,10 +151,14 @@ address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKin
__ pop_fTOS(); __ pop_fTOS();
break; break;
case Interpreter::java_lang_math_exp: case Interpreter::java_lang_math_exp:
__ exp_with_fallback(0); __ subptr(rsp, 2*wordSize);
// Store to stack to convert 80bit precision back to 64bits __ fstp_d(Address(rsp, 0));
__ push_fTOS(); if (VM_Version::supports_sse2()) {
__ pop_fTOS(); __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)));
}
__ addptr(rsp, 2*wordSize);
break; break;
default : default :
ShouldNotReachHere(); ShouldNotReachHere();

View File

@ -252,6 +252,9 @@ address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKin
if (kind == Interpreter::java_lang_math_sqrt) { if (kind == Interpreter::java_lang_math_sqrt) {
__ sqrtsd(xmm0, Address(rsp, wordSize)); __ sqrtsd(xmm0, Address(rsp, wordSize));
} else if (kind == Interpreter::java_lang_math_exp) {
__ movdbl(xmm0, Address(rsp, wordSize));
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
} else { } else {
__ fld_d(Address(rsp, wordSize)); __ fld_d(Address(rsp, wordSize));
switch (kind) { switch (kind) {
@ -278,9 +281,6 @@ address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKin
// empty stack slot) // empty stack slot)
__ pow_with_fallback(0); __ pow_with_fallback(0);
break; break;
case Interpreter::java_lang_math_exp:
__ exp_with_fallback(0);
break;
default : default :
ShouldNotReachHere(); ShouldNotReachHere();
} }

View File

@ -45,6 +45,7 @@
#include "gc/g1/g1SATBCardTableModRefBS.hpp" #include "gc/g1/g1SATBCardTableModRefBS.hpp"
#include "gc/g1/heapRegion.hpp" #include "gc/g1/heapRegion.hpp"
#endif // INCLUDE_ALL_GCS #endif // INCLUDE_ALL_GCS
#include "crc32c.h"
#ifdef PRODUCT #ifdef PRODUCT
#define BLOCK_COMMENT(str) /* nothing */ #define BLOCK_COMMENT(str) /* nothing */
@ -3032,6 +3033,15 @@ void MacroAssembler::fldcw(AddressLiteral src) {
Assembler::fldcw(as_Address(src)); Assembler::fldcw(as_Address(src));
} }
void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src) {
if (reachable(src)) {
Assembler::mulpd(dst, as_Address(src));
} else {
lea(rscratch1, src);
Assembler::mulpd(dst, Address(rscratch1, 0));
}
}
void MacroAssembler::pow_exp_core_encoding() { void MacroAssembler::pow_exp_core_encoding() {
// kills rax, rcx, rdx // kills rax, rcx, rdx
subptr(rsp,sizeof(jdouble)); subptr(rsp,sizeof(jdouble));
@ -3104,19 +3114,7 @@ void MacroAssembler::fast_pow() {
BLOCK_COMMENT("} fast_pow"); BLOCK_COMMENT("} fast_pow");
} }
void MacroAssembler::fast_exp() { void MacroAssembler::pow_or_exp(int num_fpu_regs_in_use) {
// computes exp(X) = 2^(X * log2(e))
// if fast computation is not possible, result is NaN. Requires
// fallback from user of this macro.
// increase precision for intermediate steps of the computation
increase_precision();
fldl2e(); // Stack: log2(e) X ...
fmulp(1); // Stack: (X*log2(e)) ...
pow_exp_core_encoding(); // Stack: exp(X) ...
restore_precision();
}
void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
// kills rax, rcx, rdx // kills rax, rcx, rdx
// pow and exp needs 2 extra registers on the fpu stack. // pow and exp needs 2 extra registers on the fpu stack.
Label slow_case, done; Label slow_case, done;
@ -3128,22 +3126,6 @@ void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
Register tmp2 = rax; Register tmp2 = rax;
Register tmp3 = rcx; Register tmp3 = rcx;
if (is_exp) {
// Stack: X
fld_s(0); // duplicate argument for runtime call. Stack: X X
fast_exp(); // Stack: exp(X) X
fcmp(tmp, 0, false, false); // Stack: exp(X) X
// exp(X) not equal to itself: exp(X) is NaN go to slow case.
jcc(Assembler::parity, slow_case);
// get rid of duplicate argument. Stack: exp(X)
if (num_fpu_regs_in_use > 0) {
fxch();
fpop();
} else {
ffree(1);
}
jmp(done);
} else {
// Stack: X Y // Stack: X Y
Label x_negative, y_not_2; Label x_negative, y_not_2;
@ -3295,15 +3277,13 @@ void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
fchs(); // Stack: -abs(X)^Y Y fchs(); // Stack: -abs(X)^Y Y
jmp(done); jmp(done);
}
// slow case: runtime call // slow case: runtime call
bind(slow_case); bind(slow_case);
fpop(); // pop incorrect result or int(Y) fpop(); // pop incorrect result or int(Y)
fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow), fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), 2, num_fpu_regs_in_use);
is_exp ? 1 : 2, num_fpu_regs_in_use);
// Come here with result in F-TOS // Come here with result in F-TOS
bind(done); bind(done);
@ -8636,6 +8616,471 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Regi
notl(crc); // ~c notl(crc); // ~c
} }
#ifdef _LP64
// S. Gueron / Information Processing Letters 112 (2012) 184
// Algorithm 4: Computing carry-less multiplication using a precomputed lookup table.
// Input: A 32 bit value B = [byte3, byte2, byte1, byte0].
// Output: the 64-bit carry-less product of B * CONST
void MacroAssembler::crc32c_ipl_alg4(Register in, uint32_t n,
Register tmp1, Register tmp2, Register tmp3) {
lea(tmp3, ExternalAddress(StubRoutines::crc32c_table_addr()));
if (n > 0) {
addq(tmp3, n * 256 * 8);
}
// Q1 = TABLEExt[n][B & 0xFF];
movl(tmp1, in);
andl(tmp1, 0x000000FF);
shll(tmp1, 3);
addq(tmp1, tmp3);
movq(tmp1, Address(tmp1, 0));
// Q2 = TABLEExt[n][B >> 8 & 0xFF];
movl(tmp2, in);
shrl(tmp2, 8);
andl(tmp2, 0x000000FF);
shll(tmp2, 3);
addq(tmp2, tmp3);
movq(tmp2, Address(tmp2, 0));
shlq(tmp2, 8);
xorq(tmp1, tmp2);
// Q3 = TABLEExt[n][B >> 16 & 0xFF];
movl(tmp2, in);
shrl(tmp2, 16);
andl(tmp2, 0x000000FF);
shll(tmp2, 3);
addq(tmp2, tmp3);
movq(tmp2, Address(tmp2, 0));
shlq(tmp2, 16);
xorq(tmp1, tmp2);
// Q4 = TABLEExt[n][B >> 24 & 0xFF];
shrl(in, 24);
andl(in, 0x000000FF);
shll(in, 3);
addq(in, tmp3);
movq(in, Address(in, 0));
shlq(in, 24);
xorq(in, tmp1);
// return Q1 ^ Q2 << 8 ^ Q3 << 16 ^ Q4 << 24;
}
void MacroAssembler::crc32c_pclmulqdq(XMMRegister w_xtmp1,
Register in_out,
uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported,
XMMRegister w_xtmp2,
Register tmp1,
Register n_tmp2, Register n_tmp3) {
if (is_pclmulqdq_supported) {
movdl(w_xtmp1, in_out); // modified blindly
movl(tmp1, const_or_pre_comp_const_index);
movdl(w_xtmp2, tmp1);
pclmulqdq(w_xtmp1, w_xtmp2, 0);
movdq(in_out, w_xtmp1);
} else {
crc32c_ipl_alg4(in_out, const_or_pre_comp_const_index, tmp1, n_tmp2, n_tmp3);
}
}
// Recombination Alternative 2: No bit-reflections
// T1 = (CRC_A * U1) << 1
// T2 = (CRC_B * U2) << 1
// C1 = T1 >> 32
// C2 = T2 >> 32
// T1 = T1 & 0xFFFFFFFF
// T2 = T2 & 0xFFFFFFFF
// T1 = CRC32(0, T1)
// T2 = CRC32(0, T2)
// C1 = C1 ^ T1
// C2 = C2 ^ T2
// CRC = C1 ^ C2 ^ CRC_C
void MacroAssembler::crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2,
XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
Register tmp1, Register tmp2,
Register n_tmp3) {
crc32c_pclmulqdq(w_xtmp1, in_out, const_or_pre_comp_const_index_u1, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
crc32c_pclmulqdq(w_xtmp2, in1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
shlq(in_out, 1);
movl(tmp1, in_out);
shrq(in_out, 32);
xorl(tmp2, tmp2);
crc32(tmp2, tmp1, 4);
xorl(in_out, tmp2); // we don't care about upper 32 bit contents here
shlq(in1, 1);
movl(tmp1, in1);
shrq(in1, 32);
xorl(tmp2, tmp2);
crc32(tmp2, tmp1, 4);
xorl(in1, tmp2);
xorl(in_out, in1);
xorl(in_out, in2);
}
// Set N to predefined value
// Subtract from a lenght of a buffer
// execute in a loop:
// CRC_A = 0xFFFFFFFF, CRC_B = 0, CRC_C = 0
// for i = 1 to N do
// CRC_A = CRC32(CRC_A, A[i])
// CRC_B = CRC32(CRC_B, B[i])
// CRC_C = CRC32(CRC_C, C[i])
// end for
// Recombine
void MacroAssembler::crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported,
Register in_out1, Register in_out2, Register in_out3,
Register tmp1, Register tmp2, Register tmp3,
XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
Register tmp4, Register tmp5,
Register n_tmp6) {
Label L_processPartitions;
Label L_processPartition;
Label L_exit;
bind(L_processPartitions);
cmpl(in_out1, 3 * size);
jcc(Assembler::less, L_exit);
xorl(tmp1, tmp1);
xorl(tmp2, tmp2);
movq(tmp3, in_out2);
addq(tmp3, size);
bind(L_processPartition);
crc32(in_out3, Address(in_out2, 0), 8);
crc32(tmp1, Address(in_out2, size), 8);
crc32(tmp2, Address(in_out2, size * 2), 8);
addq(in_out2, 8);
cmpq(in_out2, tmp3);
jcc(Assembler::less, L_processPartition);
crc32c_rec_alt2(const_or_pre_comp_const_index_u1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, in_out3, tmp1, tmp2,
w_xtmp1, w_xtmp2, w_xtmp3,
tmp4, tmp5,
n_tmp6);
addq(in_out2, 2 * size);
subl(in_out1, 3 * size);
jmp(L_processPartitions);
bind(L_exit);
}
#else
void MacroAssembler::crc32c_ipl_alg4(Register in_out, uint32_t n,
Register tmp1, Register tmp2, Register tmp3,
XMMRegister xtmp1, XMMRegister xtmp2) {
lea(tmp3, ExternalAddress(StubRoutines::crc32c_table_addr()));
if (n > 0) {
addl(tmp3, n * 256 * 8);
}
// Q1 = TABLEExt[n][B & 0xFF];
movl(tmp1, in_out);
andl(tmp1, 0x000000FF);
shll(tmp1, 3);
addl(tmp1, tmp3);
movq(xtmp1, Address(tmp1, 0));
// Q2 = TABLEExt[n][B >> 8 & 0xFF];
movl(tmp2, in_out);
shrl(tmp2, 8);
andl(tmp2, 0x000000FF);
shll(tmp2, 3);
addl(tmp2, tmp3);
movq(xtmp2, Address(tmp2, 0));
psllq(xtmp2, 8);
pxor(xtmp1, xtmp2);
// Q3 = TABLEExt[n][B >> 16 & 0xFF];
movl(tmp2, in_out);
shrl(tmp2, 16);
andl(tmp2, 0x000000FF);
shll(tmp2, 3);
addl(tmp2, tmp3);
movq(xtmp2, Address(tmp2, 0));
psllq(xtmp2, 16);
pxor(xtmp1, xtmp2);
// Q4 = TABLEExt[n][B >> 24 & 0xFF];
shrl(in_out, 24);
andl(in_out, 0x000000FF);
shll(in_out, 3);
addl(in_out, tmp3);
movq(xtmp2, Address(in_out, 0));
psllq(xtmp2, 24);
pxor(xtmp1, xtmp2); // Result in CXMM
// return Q1 ^ Q2 << 8 ^ Q3 << 16 ^ Q4 << 24;
}
void MacroAssembler::crc32c_pclmulqdq(XMMRegister w_xtmp1,
Register in_out,
uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported,
XMMRegister w_xtmp2,
Register tmp1,
Register n_tmp2, Register n_tmp3) {
if (is_pclmulqdq_supported) {
movdl(w_xtmp1, in_out);
movl(tmp1, const_or_pre_comp_const_index);
movdl(w_xtmp2, tmp1);
pclmulqdq(w_xtmp1, w_xtmp2, 0);
// Keep result in XMM since GPR is 32 bit in length
} else {
crc32c_ipl_alg4(in_out, const_or_pre_comp_const_index, tmp1, n_tmp2, n_tmp3, w_xtmp1, w_xtmp2);
}
}
void MacroAssembler::crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2,
XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
Register tmp1, Register tmp2,
Register n_tmp3) {
crc32c_pclmulqdq(w_xtmp1, in_out, const_or_pre_comp_const_index_u1, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
crc32c_pclmulqdq(w_xtmp2, in1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3);
psllq(w_xtmp1, 1);
movdl(tmp1, w_xtmp1);
psrlq(w_xtmp1, 32);
movdl(in_out, w_xtmp1);
xorl(tmp2, tmp2);
crc32(tmp2, tmp1, 4);
xorl(in_out, tmp2);
psllq(w_xtmp2, 1);
movdl(tmp1, w_xtmp2);
psrlq(w_xtmp2, 32);
movdl(in1, w_xtmp2);
xorl(tmp2, tmp2);
crc32(tmp2, tmp1, 4);
xorl(in1, tmp2);
xorl(in_out, in1);
xorl(in_out, in2);
}
void MacroAssembler::crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported,
Register in_out1, Register in_out2, Register in_out3,
Register tmp1, Register tmp2, Register tmp3,
XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
Register tmp4, Register tmp5,
Register n_tmp6) {
Label L_processPartitions;
Label L_processPartition;
Label L_exit;
bind(L_processPartitions);
cmpl(in_out1, 3 * size);
jcc(Assembler::less, L_exit);
xorl(tmp1, tmp1);
xorl(tmp2, tmp2);
movl(tmp3, in_out2);
addl(tmp3, size);
bind(L_processPartition);
crc32(in_out3, Address(in_out2, 0), 4);
crc32(tmp1, Address(in_out2, size), 4);
crc32(tmp2, Address(in_out2, size*2), 4);
crc32(in_out3, Address(in_out2, 0+4), 4);
crc32(tmp1, Address(in_out2, size+4), 4);
crc32(tmp2, Address(in_out2, size*2+4), 4);
addl(in_out2, 8);
cmpl(in_out2, tmp3);
jcc(Assembler::less, L_processPartition);
push(tmp3);
push(in_out1);
push(in_out2);
tmp4 = tmp3;
tmp5 = in_out1;
n_tmp6 = in_out2;
crc32c_rec_alt2(const_or_pre_comp_const_index_u1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, in_out3, tmp1, tmp2,
w_xtmp1, w_xtmp2, w_xtmp3,
tmp4, tmp5,
n_tmp6);
pop(in_out2);
pop(in_out1);
pop(tmp3);
addl(in_out2, 2 * size);
subl(in_out1, 3 * size);
jmp(L_processPartitions);
bind(L_exit);
}
#endif //LP64
#ifdef _LP64
// Algorithm 2: Pipelined usage of the CRC32 instruction.
// Input: A buffer I of L bytes.
// Output: the CRC32C value of the buffer.
// Notations:
// Write L = 24N + r, with N = floor (L/24).
// r = L mod 24 (0 <= r < 24).
// Consider I as the concatenation of A|B|C|R, where A, B, C, each,
// N quadwords, and R consists of r bytes.
// A[j] = I [8j+7:8j], j= 0, 1, ..., N-1
// B[j] = I [N + 8j+7:N + 8j], j= 0, 1, ..., N-1
// C[j] = I [2N + 8j+7:2N + 8j], j= 0, 1, ..., N-1
// if r > 0 R[j] = I [3N +j], j= 0, 1, ...,r-1
void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2,
Register tmp1, Register tmp2, Register tmp3,
Register tmp4, Register tmp5, Register tmp6,
XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
bool is_pclmulqdq_supported) {
uint32_t const_or_pre_comp_const_index[CRC32C_NUM_PRECOMPUTED_CONSTANTS];
Label L_wordByWord;
Label L_byteByByteProlog;
Label L_byteByByte;
Label L_exit;
if (is_pclmulqdq_supported ) {
const_or_pre_comp_const_index[1] = *(uint32_t *)StubRoutines::_crc32c_table_addr;
const_or_pre_comp_const_index[0] = *((uint32_t *)StubRoutines::_crc32c_table_addr+1);
const_or_pre_comp_const_index[3] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 2);
const_or_pre_comp_const_index[2] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 3);
const_or_pre_comp_const_index[5] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 4);
const_or_pre_comp_const_index[4] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 5);
assert((CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1 ) == 5, "Checking whether you declared all of the constants based on the number of \"chunks\"");
} else {
const_or_pre_comp_const_index[0] = 1;
const_or_pre_comp_const_index[1] = 0;
const_or_pre_comp_const_index[2] = 3;
const_or_pre_comp_const_index[3] = 2;
const_or_pre_comp_const_index[4] = 5;
const_or_pre_comp_const_index[5] = 4;
}
crc32c_proc_chunk(CRC32C_HIGH, const_or_pre_comp_const_index[0], const_or_pre_comp_const_index[1], is_pclmulqdq_supported,
in2, in1, in_out,
tmp1, tmp2, tmp3,
w_xtmp1, w_xtmp2, w_xtmp3,
tmp4, tmp5,
tmp6);
crc32c_proc_chunk(CRC32C_MIDDLE, const_or_pre_comp_const_index[2], const_or_pre_comp_const_index[3], is_pclmulqdq_supported,
in2, in1, in_out,
tmp1, tmp2, tmp3,
w_xtmp1, w_xtmp2, w_xtmp3,
tmp4, tmp5,
tmp6);
crc32c_proc_chunk(CRC32C_LOW, const_or_pre_comp_const_index[4], const_or_pre_comp_const_index[5], is_pclmulqdq_supported,
in2, in1, in_out,
tmp1, tmp2, tmp3,
w_xtmp1, w_xtmp2, w_xtmp3,
tmp4, tmp5,
tmp6);
movl(tmp1, in2);
andl(tmp1, 0x00000007);
negl(tmp1);
addl(tmp1, in2);
addq(tmp1, in1);
BIND(L_wordByWord);
cmpq(in1, tmp1);
jcc(Assembler::greaterEqual, L_byteByByteProlog);
crc32(in_out, Address(in1, 0), 4);
addq(in1, 4);
jmp(L_wordByWord);
BIND(L_byteByByteProlog);
andl(in2, 0x00000007);
movl(tmp2, 1);
BIND(L_byteByByte);
cmpl(tmp2, in2);
jccb(Assembler::greater, L_exit);
crc32(in_out, Address(in1, 0), 1);
incq(in1);
incl(tmp2);
jmp(L_byteByByte);
BIND(L_exit);
}
#else
void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2,
Register tmp1, Register tmp2, Register tmp3,
Register tmp4, Register tmp5, Register tmp6,
XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
bool is_pclmulqdq_supported) {
uint32_t const_or_pre_comp_const_index[CRC32C_NUM_PRECOMPUTED_CONSTANTS];
Label L_wordByWord;
Label L_byteByByteProlog;
Label L_byteByByte;
Label L_exit;
if (is_pclmulqdq_supported) {
const_or_pre_comp_const_index[1] = *(uint32_t *)StubRoutines::_crc32c_table_addr;
const_or_pre_comp_const_index[0] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 1);
const_or_pre_comp_const_index[3] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 2);
const_or_pre_comp_const_index[2] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 3);
const_or_pre_comp_const_index[5] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 4);
const_or_pre_comp_const_index[4] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 5);
} else {
const_or_pre_comp_const_index[0] = 1;
const_or_pre_comp_const_index[1] = 0;
const_or_pre_comp_const_index[2] = 3;
const_or_pre_comp_const_index[3] = 2;
const_or_pre_comp_const_index[4] = 5;
const_or_pre_comp_const_index[5] = 4;
}
crc32c_proc_chunk(CRC32C_HIGH, const_or_pre_comp_const_index[0], const_or_pre_comp_const_index[1], is_pclmulqdq_supported,
in2, in1, in_out,
tmp1, tmp2, tmp3,
w_xtmp1, w_xtmp2, w_xtmp3,
tmp4, tmp5,
tmp6);
crc32c_proc_chunk(CRC32C_MIDDLE, const_or_pre_comp_const_index[2], const_or_pre_comp_const_index[3], is_pclmulqdq_supported,
in2, in1, in_out,
tmp1, tmp2, tmp3,
w_xtmp1, w_xtmp2, w_xtmp3,
tmp4, tmp5,
tmp6);
crc32c_proc_chunk(CRC32C_LOW, const_or_pre_comp_const_index[4], const_or_pre_comp_const_index[5], is_pclmulqdq_supported,
in2, in1, in_out,
tmp1, tmp2, tmp3,
w_xtmp1, w_xtmp2, w_xtmp3,
tmp4, tmp5,
tmp6);
movl(tmp1, in2);
andl(tmp1, 0x00000007);
negl(tmp1);
addl(tmp1, in2);
addl(tmp1, in1);
BIND(L_wordByWord);
cmpl(in1, tmp1);
jcc(Assembler::greaterEqual, L_byteByByteProlog);
crc32(in_out, Address(in1,0), 4);
addl(in1, 4);
jmp(L_wordByWord);
BIND(L_byteByByteProlog);
andl(in2, 0x00000007);
movl(tmp2, 1);
BIND(L_byteByByte);
cmpl(tmp2, in2);
jccb(Assembler::greater, L_exit);
movb(tmp1, Address(in1, 0));
crc32(in_out, tmp1, 1);
incl(in1);
incl(tmp2);
jmp(L_byteByByte);
BIND(L_exit);
}
#endif // LP64
#undef BIND #undef BIND
#undef BLOCK_COMMENT #undef BLOCK_COMMENT

View File

@ -907,14 +907,14 @@ class MacroAssembler: public Assembler {
// all corner cases and may result in NaN and require fallback to a // all corner cases and may result in NaN and require fallback to a
// runtime call. // runtime call.
void fast_pow(); void fast_pow();
void fast_exp(); void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp);
void increase_precision(); void increase_precision();
void restore_precision(); void restore_precision();
// computes exp(x). Fallback to runtime call included.
void exp_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(true, num_fpu_regs_in_use); }
// computes pow(x,y). Fallback to runtime call included. // computes pow(x,y). Fallback to runtime call included.
void pow_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(false, num_fpu_regs_in_use); } void pow_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(num_fpu_regs_in_use); }
private: private:
@ -925,7 +925,7 @@ private:
void pow_exp_core_encoding(); void pow_exp_core_encoding();
// computes pow(x,y) or exp(x). Fallback to runtime call included. // computes pow(x,y) or exp(x). Fallback to runtime call included.
void pow_or_exp(bool is_exp, int num_fpu_regs_in_use); void pow_or_exp(int num_fpu_regs_in_use);
// these are private because users should be doing movflt/movdbl // these are private because users should be doing movflt/movdbl
@ -971,6 +971,10 @@ public:
void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); } void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
void movsd(XMMRegister dst, AddressLiteral src); void movsd(XMMRegister dst, AddressLiteral src);
void mulpd(XMMRegister dst, XMMRegister src) { Assembler::mulpd(dst, src); }
void mulpd(XMMRegister dst, Address src) { Assembler::mulpd(dst, src); }
void mulpd(XMMRegister dst, AddressLiteral src);
void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); } void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); }
void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); } void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); }
void mulsd(XMMRegister dst, AddressLiteral src); void mulsd(XMMRegister dst, AddressLiteral src);
@ -1278,9 +1282,42 @@ public:
Register raxReg); Register raxReg);
#endif #endif
// CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic. // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic.
void update_byte_crc32(Register crc, Register val, Register table); void update_byte_crc32(Register crc, Register val, Register table);
void kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp); void kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp);
// CRC32C code for java.util.zip.CRC32C::updateBytes() intrinsic
// Note on a naming convention:
// Prefix w = register only used on a Westmere+ architecture
// Prefix n = register only used on a Nehalem architecture
#ifdef _LP64
void crc32c_ipl_alg4(Register in_out, uint32_t n,
Register tmp1, Register tmp2, Register tmp3);
#else
void crc32c_ipl_alg4(Register in_out, uint32_t n,
Register tmp1, Register tmp2, Register tmp3,
XMMRegister xtmp1, XMMRegister xtmp2);
#endif
void crc32c_pclmulqdq(XMMRegister w_xtmp1,
Register in_out,
uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported,
XMMRegister w_xtmp2,
Register tmp1,
Register n_tmp2, Register n_tmp3);
void crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2,
XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
Register tmp1, Register tmp2,
Register n_tmp3);
void crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported,
Register in_out1, Register in_out2, Register in_out3,
Register tmp1, Register tmp2, Register tmp3,
XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
Register tmp4, Register tmp5,
Register n_tmp6);
void crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2,
Register tmp1, Register tmp2, Register tmp3,
Register tmp4, Register tmp5, Register tmp6,
XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3,
bool is_pclmulqdq_supported);
// Fold 128-bit data chunk // Fold 128-bit data chunk
void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset); void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset);
void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf); void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf);

View File

@ -0,0 +1,677 @@
/*
* Copyright (c) 2015, Intel Corporation.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
/******************************************************************************/
// ALGORITHM DESCRIPTION
// ---------------------
//
// Description:
// Let K = 64 (table size).
// x x/log(2) n
// e = 2 = 2 * T[j] * (1 + P(y))
// where
// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K]
// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2]
// j/K
// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
//
// P(y) is a minimax polynomial approximation of exp(x)-1
// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
//
// To avoid problems with arithmetic overflow and underflow,
// n n1 n2
// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
// where BIAS is a value of exponent bias.
//
// Special cases:
// exp(NaN) = NaN
// exp(+INF) = +INF
// exp(-INF) = 0
// exp(x) = 1 for subnormals
// for finite argument, only exp(0)=1 is exact
// For IEEE double
// if x > 709.782712893383973096 then exp(x) overflow
// if x < -745.133219101941108420 then exp(x) underflow
//
/******************************************************************************/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "macroAssembler_x86.hpp"
#ifdef _MSC_VER
#define ALIGNED_(x) __declspec(align(x))
#else
#define ALIGNED_(x) __attribute__ ((aligned(x)))
#endif
#ifdef _LP64
ALIGNED_(16) juint _cv[] =
{
0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL,
0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL,
0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL,
0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL,
0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
};
ALIGNED_(16) juint _shifter[] =
{
0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
};
ALIGNED_(16) juint _mmask[] =
{
0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
};
ALIGNED_(16) juint _bias[] =
{
0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
};
ALIGNED_(16) juint _Tbl_addr[] =
{
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
0x000fa7c1UL
};
ALIGNED_(16) juint _ALLONES[] =
{
0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
};
ALIGNED_(16) juint _ebias[] =
{
0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
};
ALIGNED_(4) juint _XMAX[] =
{
0xffffffffUL, 0x7fefffffUL
};
ALIGNED_(4) juint _XMIN[] =
{
0x00000000UL, 0x00100000UL
};
ALIGNED_(4) juint _INF[] =
{
0x00000000UL, 0x7ff00000UL
};
ALIGNED_(4) juint _ZERO[] =
{
0x00000000UL, 0x00000000UL
};
ALIGNED_(4) juint _ONE_val[] =
{
0x00000000UL, 0x3ff00000UL
};
// Registers:
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, tmp - r11
// Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start;
assert_different_registers(tmp, eax, ecx, edx);
jmp(start);
address cv = (address)_cv;
address Shifter = (address)_shifter;
address mmask = (address)_mmask;
address bias = (address)_bias;
address Tbl_addr = (address)_Tbl_addr;
address ALLONES = (address)_ALLONES;
address ebias = (address)_ebias;
address XMAX = (address)_XMAX;
address XMIN = (address)_XMIN;
address INF = (address)_INF;
address ZERO = (address)_ZERO;
address ONE_val = (address)_ONE_val;
bind(start);
subq(rsp, 24);
movsd(Address(rsp, 8), xmm0);
unpcklpd(xmm0, xmm0);
movdqu(xmm1, ExternalAddress(cv)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
movdqu(xmm6, ExternalAddress(Shifter)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
movdqu(xmm2, ExternalAddress(16+cv)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
movdqu(xmm3, ExternalAddress(32+cv)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
pextrw(eax, xmm0, 3);
andl(eax, 32767);
movl(edx, 16527);
subl(edx, eax);
subl(eax, 15504);
orl(edx, eax);
cmpl(edx, INT_MIN);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
mulpd(xmm1, xmm0);
addpd(xmm1, xmm6);
movapd(xmm7, xmm1);
subpd(xmm1, xmm6);
mulpd(xmm2, xmm1);
movdqu(xmm4, ExternalAddress(64+cv)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
mulpd(xmm3, xmm1);
movdqu(xmm5, ExternalAddress(80+cv)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
subpd(xmm0, xmm2);
movdl(eax, xmm7);
movl(ecx, eax);
andl(ecx, 63);
shll(ecx, 4);
sarl(eax, 6);
movl(edx, eax);
movdqu(xmm6, ExternalAddress(mmask)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
pand(xmm7, xmm6);
movdqu(xmm6, ExternalAddress(bias)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
paddq(xmm7, xmm6);
psllq(xmm7, 46);
subpd(xmm0, xmm3);
lea(tmp, ExternalAddress(Tbl_addr));
movdqu(xmm2, Address(ecx,tmp));
mulpd(xmm4, xmm0);
movapd(xmm6, xmm0);
movapd(xmm1, xmm0);
mulpd(xmm6, xmm6);
mulpd(xmm0, xmm6);
addpd(xmm5, xmm4);
mulsd(xmm0, xmm6);
mulpd(xmm6, ExternalAddress(48+cv)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
addsd(xmm1, xmm2);
unpckhpd(xmm2, xmm2);
mulpd(xmm0, xmm5);
addsd(xmm1, xmm0);
por(xmm2, xmm7);
unpckhpd(xmm0, xmm0);
addsd(xmm0, xmm1);
addsd(xmm0, xmm6);
addl(edx, 894);
cmpl(edx, 1916);
jcc (Assembler::above, L_2TAG_PACKET_1_0_2);
mulsd(xmm0, xmm2);
addsd(xmm0, xmm2);
jmp (B1_5);
bind(L_2TAG_PACKET_1_0_2);
xorpd(xmm3, xmm3);
movdqu(xmm4, ExternalAddress(ALLONES)); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
movl(edx, -1022);
subl(edx, eax);
movdl(xmm5, edx);
psllq(xmm4, xmm5);
movl(ecx, eax);
sarl(eax, 1);
pinsrw(xmm3, eax, 3);
movdqu(xmm6, ExternalAddress(ebias)); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
psllq(xmm3, 4);
psubd(xmm2, xmm3);
mulsd(xmm0, xmm2);
cmpl(edx, 52);
jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
pand(xmm4, xmm2);
paddd(xmm3, xmm6);
subsd(xmm2, xmm4);
addsd(xmm0, xmm2);
cmpl(ecx, 1023);
jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
pextrw(ecx, xmm0, 3);
andl(ecx, 32768);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
movapd(xmm6, xmm0);
addsd(xmm0, xmm4);
mulsd(xmm0, xmm3);
pextrw(ecx, xmm0, 3);
andl(ecx, 32752);
cmpl(ecx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_5_0_2);
jmp(B1_5);
bind(L_2TAG_PACKET_5_0_2);
mulsd(xmm6, xmm3);
mulsd(xmm4, xmm3);
movdqu(xmm0, xmm6);
pxor(xmm6, xmm4);
psrad(xmm6, 31);
pshufd(xmm6, xmm6, 85);
psllq(xmm0, 1);
psrlq(xmm0, 1);
pxor(xmm0, xmm6);
psrlq(xmm6, 63);
paddq(xmm0, xmm6);
paddq(xmm0, xmm4);
movl(Address(rsp,0), 15);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_4_0_2);
addsd(xmm0, xmm4);
mulsd(xmm0, xmm3);
jmp(B1_5);
bind(L_2TAG_PACKET_3_0_2);
addsd(xmm0, xmm4);
mulsd(xmm0, xmm3);
pextrw(ecx, xmm0, 3);
andl(ecx, 32752);
cmpl(ecx, 32752);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
jmp(B1_5);
bind(L_2TAG_PACKET_2_0_2);
paddd(xmm3, xmm6);
addpd(xmm0, xmm2);
mulsd(xmm0, xmm3);
movl(Address(rsp,0), 15);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_8_0_2);
cmpl(eax, 2146435072);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
movl(eax, Address(rsp,12));
cmpl(eax, INT_MIN);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2);
movsd(xmm0, ExternalAddress(XMAX)); // 0xffffffffUL, 0x7fefffffUL
mulsd(xmm0, xmm0);
bind(L_2TAG_PACKET_7_0_2);
movl(Address(rsp,0), 14);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_10_0_2);
movsd(xmm0, ExternalAddress(XMIN)); // 0x00000000UL, 0x00100000UL
mulsd(xmm0, xmm0);
movl(Address(rsp,0), 15);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_9_0_2);
movl(edx, Address(rsp,8));
cmpl(eax, 2146435072);
jcc(Assembler::above, L_2TAG_PACKET_11_0_2);
cmpl(edx, 0);
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
movl(eax, Address(rsp,12));
cmpl(eax, 2146435072);
jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2);
movsd(xmm0, ExternalAddress(INF)); // 0x00000000UL, 0x7ff00000UL
jmp(B1_5);
bind(L_2TAG_PACKET_12_0_2);
movsd(xmm0, ExternalAddress(ZERO)); // 0x00000000UL, 0x00000000UL
jmp(B1_5);
bind(L_2TAG_PACKET_11_0_2);
movsd(xmm0, Address(rsp, 8));
addsd(xmm0, xmm0);
jmp(B1_5);
bind(L_2TAG_PACKET_0_0_2);
movl(eax, Address(rsp, 12));
andl(eax, 2147483647);
cmpl(eax, 1083179008);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
movsd(Address(rsp, 8), xmm0);
addsd(xmm0, ExternalAddress(ONE_val)); // 0x00000000UL, 0x3ff00000UL
jmp(B1_5);
bind(L_2TAG_PACKET_6_0_2);
movq(Address(rsp, 16), xmm0);
bind(B1_3);
movq(xmm0, Address(rsp, 16));
bind(B1_5);
addq(rsp, 24);
}
#endif
#ifndef _LP64
ALIGNED_(16) juint _static_const_table[] =
{
0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL,
0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL,
0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL,
0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL,
0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL,
0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL,
0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL,
0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL,
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL,
0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL,
0x00100000UL
};
//registers,
// input: (rbp + 8)
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, rbx (tmp)
// Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
assert_different_registers(tmp, eax, ecx, edx);
jmp(start);
address static_const_table = (address)_static_const_table;
bind(start);
subl(rsp, 120);
movl(Address(rsp, 64), tmp);
lea(tmp, ExternalAddress(static_const_table));
movdqu(xmm0, Address(rsp, 128));
unpcklpd(xmm0, xmm0);
movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
pextrw(eax, xmm0, 3);
andl(eax, 32767);
movl(edx, 16527);
subl(edx, eax);
subl(eax, 15504);
orl(edx, eax);
cmpl(edx, INT_MIN);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
mulpd(xmm1, xmm0);
addpd(xmm1, xmm6);
movapd(xmm7, xmm1);
subpd(xmm1, xmm6);
mulpd(xmm2, xmm1);
movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
mulpd(xmm3, xmm1);
movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
subpd(xmm0, xmm2);
movdl(eax, xmm7);
movl(ecx, eax);
andl(ecx, 63);
shll(ecx, 4);
sarl(eax, 6);
movl(edx, eax);
movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
pand(xmm7, xmm6);
movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
paddq(xmm7, xmm6);
psllq(xmm7, 46);
subpd(xmm0, xmm3);
movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160));
mulpd(xmm4, xmm0);
movapd(xmm6, xmm0);
movapd(xmm1, xmm0);
mulpd(xmm6, xmm6);
mulpd(xmm0, xmm6);
addpd(xmm5, xmm4);
mulsd(xmm0, xmm6);
mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
addsd(xmm1, xmm2);
unpckhpd(xmm2, xmm2);
mulpd(xmm0, xmm5);
addsd(xmm1, xmm0);
por(xmm2, xmm7);
unpckhpd(xmm0, xmm0);
addsd(xmm0, xmm1);
addsd(xmm0, xmm6);
addl(edx, 894);
cmpl(edx, 1916);
jcc (Assembler::above, L_2TAG_PACKET_1_0_2);
mulsd(xmm0, xmm2);
addsd(xmm0, xmm2);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_1_0_2);
fnstcw(Address(rsp, 24));
movzwl(edx, Address(rsp, 24));
orl(edx, 768);
movw(Address(rsp, 28), edx);
fldcw(Address(rsp, 28));
movl(edx, eax);
sarl(eax, 1);
subl(edx, eax);
movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL
pandn(xmm6, xmm2);
addl(eax, 1023);
movdl(xmm3, eax);
psllq(xmm3, 52);
por(xmm6, xmm3);
addl(edx, 1023);
movdl(xmm4, edx);
psllq(xmm4, 52);
movsd(Address(rsp, 8), xmm0);
fld_d(Address(rsp, 8));
movsd(Address(rsp, 16), xmm6);
fld_d(Address(rsp, 16));
fmula(1);
faddp(1);
movsd(Address(rsp, 8), xmm4);
fld_d(Address(rsp, 8));
fmulp(1);
fstp_d(Address(rsp, 8));
movsd(xmm0,Address(rsp, 8));
fldcw(Address(rsp, 24));
pextrw(ecx, xmm0, 3);
andl(ecx, 32752);
cmpl(ecx, 32752);
jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
cmpl(ecx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
jmp(L_2TAG_PACKET_2_0_2);
cmpl(ecx, INT_MIN);
jcc(Assembler::less, L_2TAG_PACKET_3_0_2);
cmpl(ecx, -1064950997);
jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
jcc(Assembler::greater, L_2TAG_PACKET_4_0_2);
movl(edx, Address(rsp, 128));
cmpl(edx ,-17155601);
jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
jmp(L_2TAG_PACKET_4_0_2);
bind(L_2TAG_PACKET_3_0_2);
movl(edx, 14);
jmp(L_2TAG_PACKET_5_0_2);
bind(L_2TAG_PACKET_4_0_2);
movl(edx, 15);
bind(L_2TAG_PACKET_5_0_2);
movsd(Address(rsp, 0), xmm0);
movsd(xmm0, Address(rsp, 128));
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_7_0_2);
cmpl(eax, 2146435072);
jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2);
movl(eax, Address(rsp, 132));
cmpl(eax, INT_MIN);
jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2);
movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL
mulsd(xmm0, xmm0);
movl(edx, 14);
jmp(L_2TAG_PACKET_5_0_2);
bind(L_2TAG_PACKET_9_0_2);
movsd(xmm0, Address(tmp, 1216));
mulsd(xmm0, xmm0);
movl(edx, 15);
jmp(L_2TAG_PACKET_5_0_2);
bind(L_2TAG_PACKET_8_0_2);
movl(edx, Address(rsp, 128));
cmpl(eax, 2146435072);
jcc(Assembler::above, L_2TAG_PACKET_10_0_2);
cmpl(edx, 0);
jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2);
movl(eax, Address(rsp, 132));
cmpl(eax, 2146435072);
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_11_0_2);
movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_10_0_2);
movsd(xmm0, Address(rsp, 128));
addsd(xmm0, xmm0);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_0_0_2);
movl(eax, Address(rsp, 132));
andl(eax, 2147483647);
cmpl(eax, 1083179008);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
movsd(xmm0, Address(rsp, 128));
addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_2_0_2);
movsd(Address(rsp, 48), xmm0);
fld_d(Address(rsp, 48));
bind(L_2TAG_PACKET_6_0_2);
movl(tmp, Address(rsp, 64));
}
#endif

View File

@ -2134,14 +2134,6 @@ class StubGenerator: public StubCodeGenerator {
__ trigfunc('t'); __ trigfunc('t');
__ ret(0); __ ret(0);
} }
{
StubCodeMark mark(this, "StubRoutines", "exp");
StubRoutines::_intrinsic_exp = (double (*)(double)) __ pc();
__ fld_d(Address(rsp, 4));
__ exp_with_fallback(0);
__ ret(0);
}
{ {
StubCodeMark mark(this, "StubRoutines", "pow"); StubCodeMark mark(this, "StubRoutines", "pow");
StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc(); StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc();
@ -2991,6 +2983,89 @@ class StubGenerator: public StubCodeGenerator {
return start; return start;
} }
/**
* Arguments:
*
* Inputs:
* rsp(4) - int crc
* rsp(8) - byte* buf
* rsp(12) - int length
* rsp(16) - table_start - optional (present only when doing a library_calll,
* not used by x86 algorithm)
*
* Ouput:
* rax - int crc result
*/
address generate_updateBytesCRC32C(bool is_pclmulqdq_supported) {
assert(UseCRC32CIntrinsics, "need SSE4_2");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C");
address start = __ pc();
const Register crc = rax; // crc
const Register buf = rcx; // source java byte array address
const Register len = rdx; // length
const Register d = rbx;
const Register g = rsi;
const Register h = rdi;
const Register empty = 0; // will never be used, in order not
// to change a signature for crc32c_IPL_Alg2_Alt2
// between 64/32 I'm just keeping it here
assert_different_registers(crc, buf, len, d, g, h);
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
Address crc_arg(rsp, 4 + 4 + 0); // ESP+4 +
// we need to add additional 4 because __ enter
// have just pushed ebp on a stack
Address buf_arg(rsp, 4 + 4 + 4);
Address len_arg(rsp, 4 + 4 + 8);
// Load up:
__ movl(crc, crc_arg);
__ movl(buf, buf_arg);
__ movl(len, len_arg);
__ push(d);
__ push(g);
__ push(h);
__ crc32c_ipl_alg2_alt2(crc, buf, len,
d, g, h,
empty, empty, empty,
xmm0, xmm1, xmm2,
is_pclmulqdq_supported);
__ pop(h);
__ pop(g);
__ pop(d);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
address generate_libmExp() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = rbx;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_exp(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
// Safefetch stubs. // Safefetch stubs.
void generate_safefetch(const char* name, int size, address* entry, void generate_safefetch(const char* name, int size, address* entry,
address* fault_pc, address* continuation_pc) { address* fault_pc, address* continuation_pc) {
@ -3204,6 +3279,16 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table; StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table;
StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
} }
if (UseCRC32CIntrinsics) {
bool supports_clmul = VM_Version::supports_clmul();
StubRoutines::x86::generate_CRC32C_table(supports_clmul);
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
}
if (VM_Version::supports_sse2()) {
StubRoutines::_dexp = generate_libmExp();
}
} }

View File

@ -3038,19 +3038,6 @@ class StubGenerator: public StubCodeGenerator {
__ addq(rsp, 8); __ addq(rsp, 8);
__ ret(0); __ ret(0);
} }
{
StubCodeMark mark(this, "StubRoutines", "exp");
StubRoutines::_intrinsic_exp = (double (*)(double)) __ pc();
__ subq(rsp, 8);
__ movdbl(Address(rsp, 0), xmm0);
__ fld_d(Address(rsp, 0));
__ exp_with_fallback(0);
__ fstp_d(Address(rsp, 0));
__ movdbl(xmm0, Address(rsp, 0));
__ addq(rsp, 8);
__ ret(0);
}
{ {
StubCodeMark mark(this, "StubRoutines", "pow"); StubCodeMark mark(this, "StubRoutines", "pow");
StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc(); StubRoutines::_intrinsic_pow = (double (*)(double,double)) __ pc();
@ -3958,6 +3945,64 @@ class StubGenerator: public StubCodeGenerator {
return start; return start;
} }
/**
* Arguments:
*
* Inputs:
* c_rarg0 - int crc
* c_rarg1 - byte* buf
* c_rarg2 - long length
* c_rarg3 - table_start - optional (present only when doing a library_calll,
* not used by x86 algorithm)
*
* Ouput:
* rax - int crc result
*/
address generate_updateBytesCRC32C(bool is_pclmulqdq_supported) {
assert(UseCRC32CIntrinsics, "need SSE4_2");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C");
address start = __ pc();
//reg.arg int#0 int#1 int#2 int#3 int#4 int#5 float regs
//Windows RCX RDX R8 R9 none none XMM0..XMM3
//Lin / Sol RDI RSI RDX RCX R8 R9 XMM0..XMM7
const Register crc = c_rarg0; // crc
const Register buf = c_rarg1; // source java byte array address
const Register len = c_rarg2; // length
const Register a = rax;
const Register j = r9;
const Register k = r10;
const Register l = r11;
#ifdef _WIN64
const Register y = rdi;
const Register z = rsi;
#else
const Register y = rcx;
const Register z = r8;
#endif
assert_different_registers(crc, buf, len, a, j, k, l, y, z);
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
#ifdef _WIN64
__ push(y);
__ push(z);
#endif
__ crc32c_ipl_alg2_alt2(crc, buf, len,
a, j, k,
l, y, z,
c_farg0, c_farg1, c_farg2,
is_pclmulqdq_supported);
__ movl(rax, crc);
#ifdef _WIN64
__ pop(z);
__ pop(y);
#endif
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
/** /**
* Arguments: * Arguments:
@ -4122,6 +4167,44 @@ class StubGenerator: public StubCodeGenerator {
return start; return start;
} }
address generate_libmExp() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = r11;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
#ifdef _WIN64
// save the xmm registers which must be preserved 6-7
__ movdqu(xmm_save(6), as_XMMRegister(6));
__ movdqu(xmm_save(7), as_XMMRegister(7));
#endif
__ fast_exp(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
#ifdef _WIN64
// restore xmm regs belonging to calling function
__ movdqu(as_XMMRegister(6), xmm_save(6));
__ movdqu(as_XMMRegister(7), xmm_save(7));
#endif
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
#undef __ #undef __
#define __ masm-> #define __ masm->
@ -4302,6 +4385,14 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table; StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table;
StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
} }
if (UseCRC32CIntrinsics) {
bool supports_clmul = VM_Version::supports_clmul();
StubRoutines::x86::generate_CRC32C_table(supports_clmul);
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
}
StubRoutines::_dexp = generate_libmExp();
} }
void generate_all() { void generate_all() {

View File

@ -27,6 +27,7 @@
#include "runtime/frame.inline.hpp" #include "runtime/frame.inline.hpp"
#include "runtime/stubRoutines.hpp" #include "runtime/stubRoutines.hpp"
#include "runtime/thread.inline.hpp" #include "runtime/thread.inline.hpp"
#include "crc32c.h"
// Implementation of the platform-specific part of StubRoutines - for // Implementation of the platform-specific part of StubRoutines - for
// a description of how to extend it, see the stubRoutines.hpp file. // a description of how to extend it, see the stubRoutines.hpp file.
@ -130,3 +131,107 @@ juint StubRoutines::x86::_crc_table[] =
0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
0x2d02ef8dUL 0x2d02ef8dUL
}; };
#define D 32
#define P 0x82F63B78 // Reflection of Castagnoli (0x11EDC6F41)
#define TILL_CYCLE 31
uint32_t _crc32c_pow_2k_table[TILL_CYCLE]; // because _crc32c_pow_2k_table[TILL_CYCLE == 31] == _crc32c_pow_2k_table[0]
// A. Kadatch and B. Jenkins / Everything we know about CRC but afraid to forget September 3, 2010 8
// Listing 1: Multiplication of normalized polynomials
// "a" and "b" occupy D least significant bits.
uint32_t crc32c_multiply(uint32_t a, uint32_t b) {
uint32_t product = 0;
uint32_t b_pow_x_table[D + 1]; // b_pow_x_table[k] = (b * x**k) mod P
b_pow_x_table[0] = b;
for (int k = 0; k < D; ++k) {
// If "a" has non-zero coefficient at x**k,/ add ((b * x**k) mod P) to the result.
if ((a & (uint64_t)(1 << (D - 1 - k))) != 0) product ^= b_pow_x_table[k];
// Compute b_pow_x_table[k+1] = (b ** x**(k+1)) mod P.
if (b_pow_x_table[k] & 1) {
// If degree of (b_pow_x_table[k] * x) is D, then
// degree of (b_pow_x_table[k] * x - P) is less than D.
b_pow_x_table[k + 1] = (b_pow_x_table[k] >> 1) ^ P;
}
else {
b_pow_x_table[k + 1] = b_pow_x_table[k] >> 1;
}
}
return product;
}
#undef D
#undef P
// A. Kadatch and B. Jenkins / Everything we know about CRC but afraid to forget September 3, 2010 9
void crc32c_init_pow_2k(void) {
// _crc32c_pow_2k_table(0) =
// x^(2^k) mod P(x) = x mod P(x) = x
// Since we are operating on a reflected values
// x = 10b, reflect(x) = 0x40000000
_crc32c_pow_2k_table[0] = 0x40000000;
for (int k = 1; k < TILL_CYCLE; k++) {
// _crc32c_pow_2k_table(k+1) = _crc32c_pow_2k_table(k-1)^2 mod P(x)
uint32_t tmp = _crc32c_pow_2k_table[k - 1];
_crc32c_pow_2k_table[k] = crc32c_multiply(tmp, tmp);
}
}
// x^N mod P(x)
uint32_t crc32c_f_pow_n(uint32_t n) {
// result = 1 (polynomial)
uint32_t one, result = 0x80000000, i = 0;
while (one = (n & 1), (n == 1 || n - one > 0)) {
if (one) {
result = crc32c_multiply(result, _crc32c_pow_2k_table[i]);
}
n >>= 1;
i++;
}
return result;
}
juint *StubRoutines::x86::_crc32c_table;
void StubRoutines::x86::generate_CRC32C_table(bool is_pclmulqdq_table_supported) {
static juint pow_n[CRC32C_NUM_PRECOMPUTED_CONSTANTS];
crc32c_init_pow_2k();
pow_n[0] = crc32c_f_pow_n(CRC32C_HIGH * 8); // 8N * 8 = 64N
pow_n[1] = crc32c_f_pow_n(CRC32C_HIGH * 8 * 2); // 128N
pow_n[2] = crc32c_f_pow_n(CRC32C_MIDDLE * 8);
pow_n[3] = crc32c_f_pow_n(CRC32C_MIDDLE * 8 * 2);
pow_n[4] = crc32c_f_pow_n(CRC32C_LOW * 8);
pow_n[CRC32C_NUM_PRECOMPUTED_CONSTANTS - 1] =
crc32c_f_pow_n(CRC32C_LOW * 8 * 2);
if (is_pclmulqdq_table_supported) {
_crc32c_table = pow_n;
} else {
static julong pclmulqdq_table[CRC32C_NUM_PRECOMPUTED_CONSTANTS * 256];
for (int j = 0; j < CRC32C_NUM_PRECOMPUTED_CONSTANTS; j++) {
static juint X_CONST = pow_n[j];
for (int64_t i = 0; i < 256; i++) { // to force 64 bit wide computations
// S. Gueron / Information Processing Letters 112 (2012) 184
// Algorithm 3: Generating a carry-less multiplication lookup table.
// Input: A 32-bit constant, X_CONST.
// Output: A table of 256 entries, each one is a 64-bit quadword,
// that can be used for computing "byte" * X_CONST, for a given byte.
pclmulqdq_table[j * 256 + i] =
((i & 1) * X_CONST) ^ ((i & 2) * X_CONST) ^ ((i & 4) * X_CONST) ^
((i & 8) * X_CONST) ^ ((i & 16) * X_CONST) ^ ((i & 32) * X_CONST) ^
((i & 64) * X_CONST) ^ ((i & 128) * X_CONST);
}
}
_crc32c_table = (juint*)pclmulqdq_table;
}
}

View File

@ -36,6 +36,8 @@
// masks and table for CRC32 // masks and table for CRC32
static uint64_t _crc_by128_masks[]; static uint64_t _crc_by128_masks[];
static juint _crc_table[]; static juint _crc_table[];
// table for CRC32C
static juint* _crc32c_table;
// swap mask for ghash // swap mask for ghash
static address _ghash_long_swap_mask_addr; static address _ghash_long_swap_mask_addr;
static address _ghash_byte_swap_mask_addr; static address _ghash_byte_swap_mask_addr;
@ -46,5 +48,6 @@
static address crc_by128_masks_addr() { return (address)_crc_by128_masks; } static address crc_by128_masks_addr() { return (address)_crc_by128_masks; }
static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; } static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; } static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }
static void generate_CRC32C_table(bool is_pclmulqdq_supported);
#endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP #endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP

View File

@ -697,15 +697,14 @@ address InterpreterGenerator::generate_Reference_get_entry(void) {
__ jmp(rdi); __ jmp(rdi);
__ bind(slow_path); __ bind(slow_path);
(void) generate_normal_entry(false); __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
return entry; return entry;
} }
#endif // INCLUDE_ALL_GCS #endif // INCLUDE_ALL_GCS
// If G1 is not enabled then attempt to go through the accessor entry point // If G1 is not enabled then attempt to go through the accessor entry point
// Reference.get is an accessor // Reference.get is an accessor
return generate_jump_to_normal_entry(); return NULL;
} }
/** /**
@ -753,12 +752,10 @@ address InterpreterGenerator::generate_CRC32_update_entry() {
// generate a vanilla native entry as the slow path // generate a vanilla native entry as the slow path
__ bind(slow_path); __ bind(slow_path);
__ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
(void) generate_native_entry(false);
return entry; return entry;
} }
return generate_native_entry(false); return NULL;
} }
/** /**
@ -790,18 +787,25 @@ address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpret
const Register buf = rdx; // source java byte array address const Register buf = rdx; // source java byte array address
const Register len = rdi; // length const Register len = rdi; // length
// value x86_32
// interp. arg ptr ESP + 4
// int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
// 3 2 1 0
// int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
// 4 2,3 1 0
// Arguments are reversed on java expression stack // Arguments are reversed on java expression stack
__ movl(len, Address(rsp, wordSize)); // Length __ movl(len, Address(rsp, 4 + 0)); // Length
// Calculate address of start element // Calculate address of start element
if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
__ movptr(buf, Address(rsp, 3*wordSize)); // long buf __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long buf
__ addptr(buf, Address(rsp, 2*wordSize)); // + offset __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
__ movl(crc, Address(rsp, 5*wordSize)); // Initial CRC __ movl(crc, Address(rsp, 4 + 4 * wordSize)); // Initial CRC
} else { } else {
__ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array
__ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
__ addptr(buf, Address(rsp, 2*wordSize)); // + offset __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
__ movl(crc, Address(rsp, 4*wordSize)); // Initial CRC __ movl(crc, Address(rsp, 4 + 3 * wordSize)); // Initial CRC
} }
__ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len); __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len);
@ -814,12 +818,57 @@ address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpret
// generate a vanilla native entry as the slow path // generate a vanilla native entry as the slow path
__ bind(slow_path); __ bind(slow_path);
__ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
return entry;
}
return NULL;
}
(void) generate_native_entry(false); /**
* Method entry for static native methods:
* int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
* int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end)
*/
address InterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
if (UseCRC32CIntrinsics) {
address entry = __ pc();
// Load parameters
const Register crc = rax; // crc
const Register buf = rcx; // source java byte array address
const Register len = rdx; // length
const Register end = len;
// value x86_32
// interp. arg ptr ESP + 4
// int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int end)
// 3 2 1 0
// int java.util.zip.CRC32.updateByteBuffer(int crc, long address, int off, int end)
// 4 2,3 1 0
// Arguments are reversed on java expression stack
__ movl(end, Address(rsp, 4 + 0)); // end
__ subl(len, Address(rsp, 4 + 1 * wordSize)); // end - offset == length
// Calculate address of start element
if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
__ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long address
__ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
__ movl(crc, Address(rsp, 4 + 4 * wordSize)); // Initial CRC
} else {
__ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array
__ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
__ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset
__ movl(crc, Address(rsp, 4 + 3 * wordSize)); // Initial CRC
}
__ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len);
// result in rax
// _areturn
__ pop(rdi); // get return address
__ mov(rsp, rsi); // set sp to sender sp
__ jmp(rdi);
return entry; return entry;
} }
return generate_native_entry(false); return NULL;
} }
/** /**
@ -827,10 +876,8 @@ address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpret
* java.lang.Float.intBitsToFloat(int bits) * java.lang.Float.intBitsToFloat(int bits)
*/ */
address InterpreterGenerator::generate_Float_intBitsToFloat_entry() { address InterpreterGenerator::generate_Float_intBitsToFloat_entry() {
address entry;
if (UseSSE >= 1) { if (UseSSE >= 1) {
entry = __ pc(); address entry = __ pc();
// rsi: the sender's SP // rsi: the sender's SP
@ -844,11 +891,10 @@ address InterpreterGenerator::generate_Float_intBitsToFloat_entry() {
__ pop(rdi); // get return address __ pop(rdi); // get return address
__ mov(rsp, rsi); // set rsp to the sender's SP __ mov(rsp, rsi); // set rsp to the sender's SP
__ jmp(rdi); __ jmp(rdi);
} else { return entry;
entry = generate_native_entry(false);
} }
return entry; return NULL;
} }
/** /**
@ -856,10 +902,8 @@ address InterpreterGenerator::generate_Float_intBitsToFloat_entry() {
* java.lang.Float.floatToRawIntBits(float value) * java.lang.Float.floatToRawIntBits(float value)
*/ */
address InterpreterGenerator::generate_Float_floatToRawIntBits_entry() { address InterpreterGenerator::generate_Float_floatToRawIntBits_entry() {
address entry;
if (UseSSE >= 1) { if (UseSSE >= 1) {
entry = __ pc(); address entry = __ pc();
// rsi: the sender's SP // rsi: the sender's SP
@ -873,11 +917,10 @@ address InterpreterGenerator::generate_Float_floatToRawIntBits_entry() {
__ pop(rdi); // get return address __ pop(rdi); // get return address
__ mov(rsp, rsi); // set rsp to the sender's SP __ mov(rsp, rsi); // set rsp to the sender's SP
__ jmp(rdi); __ jmp(rdi);
} else { return entry;
entry = generate_native_entry(false);
} }
return entry; return NULL;
} }
@ -886,10 +929,8 @@ address InterpreterGenerator::generate_Float_floatToRawIntBits_entry() {
* java.lang.Double.longBitsToDouble(long bits) * java.lang.Double.longBitsToDouble(long bits)
*/ */
address InterpreterGenerator::generate_Double_longBitsToDouble_entry() { address InterpreterGenerator::generate_Double_longBitsToDouble_entry() {
address entry;
if (UseSSE >= 2) { if (UseSSE >= 2) {
entry = __ pc(); address entry = __ pc();
// rsi: the sender's SP // rsi: the sender's SP
@ -903,11 +944,10 @@ address InterpreterGenerator::generate_Double_longBitsToDouble_entry() {
__ pop(rdi); // get return address __ pop(rdi); // get return address
__ mov(rsp, rsi); // set rsp to the sender's SP __ mov(rsp, rsi); // set rsp to the sender's SP
__ jmp(rdi); __ jmp(rdi);
} else { return entry;
entry = generate_native_entry(false);
} }
return entry; return NULL;
} }
/** /**
@ -915,10 +955,8 @@ address InterpreterGenerator::generate_Double_longBitsToDouble_entry() {
* java.lang.Double.doubleToRawLongBits(double value) * java.lang.Double.doubleToRawLongBits(double value)
*/ */
address InterpreterGenerator::generate_Double_doubleToRawLongBits_entry() { address InterpreterGenerator::generate_Double_doubleToRawLongBits_entry() {
address entry;
if (UseSSE >= 2) { if (UseSSE >= 2) {
entry = __ pc(); address entry = __ pc();
// rsi: the sender's SP // rsi: the sender's SP
@ -933,11 +971,10 @@ address InterpreterGenerator::generate_Double_doubleToRawLongBits_entry() {
__ pop(rdi); // get return address __ pop(rdi); // get return address
__ mov(rsp, rsi); // set rsp to the sender's SP __ mov(rsp, rsi); // set rsp to the sender's SP
__ jmp(rdi); __ jmp(rdi);
} else { return entry;
entry = generate_native_entry(false);
} }
return entry; return NULL;
} }
// //

View File

@ -677,15 +677,14 @@ address InterpreterGenerator::generate_Reference_get_entry(void) {
// generate a vanilla interpreter entry as the slow path // generate a vanilla interpreter entry as the slow path
__ bind(slow_path); __ bind(slow_path);
(void) generate_normal_entry(false); __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
return entry; return entry;
} }
#endif // INCLUDE_ALL_GCS #endif // INCLUDE_ALL_GCS
// If G1 is not enabled then attempt to go through the accessor entry point // If G1 is not enabled then attempt to go through the accessor entry point
// Reference.get is an accessor // Reference.get is an accessor
return generate_jump_to_normal_entry(); return NULL;
} }
/** /**
@ -733,12 +732,10 @@ address InterpreterGenerator::generate_CRC32_update_entry() {
// generate a vanilla native entry as the slow path // generate a vanilla native entry as the slow path
__ bind(slow_path); __ bind(slow_path);
__ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
(void) generate_native_entry(false);
return entry; return entry;
} }
return generate_native_entry(false); return NULL;
} }
/** /**
@ -796,12 +793,61 @@ address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpret
// generate a vanilla native entry as the slow path // generate a vanilla native entry as the slow path
__ bind(slow_path); __ bind(slow_path);
__ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
return entry;
}
return NULL;
}
(void) generate_native_entry(false); /**
* Method entry for static native methods:
* int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
* int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end)
*/
address InterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
if (UseCRC32CIntrinsics) {
address entry = __ pc();
// Load parameters
const Register crc = c_rarg0; // crc
const Register buf = c_rarg1; // source java byte array address
const Register len = c_rarg2;
const Register off = c_rarg3; // offset
const Register end = len;
// Arguments are reversed on java expression stack
// Calculate address of start element
if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
__ movptr(buf, Address(rsp, 3 * wordSize)); // long buf
__ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset
__ addq(buf, off); // + offset
__ movl(crc, Address(rsp, 5 * wordSize)); // Initial CRC
// Note on 5 * wordSize vs. 4 * wordSize:
// * int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end)
// 4 2,3 1 0
// end starts at SP + 8
// The Java(R) Virtual Machine Specification Java SE 7 Edition
// 4.10.2.3. Values of Types long and double
// "When calculating operand stack length, values of type long and double have length two."
} else {
__ movptr(buf, Address(rsp, 3 * wordSize)); // byte[] array
__ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
__ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset
__ addq(buf, off); // + offset
__ movl(crc, Address(rsp, 4 * wordSize)); // Initial CRC
}
__ movl(end, Address(rsp, wordSize)); // end
__ subl(end, off); // end - off
__ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len);
// result in rax
// _areturn
__ pop(rdi); // get return address
__ mov(rsp, r13); // set sp to sender sp
__ jmp(rdi);
return entry; return entry;
} }
return generate_native_entry(false);
return NULL;
} }
// Interpreter stub for calling a native method. (asm interpreter) // Interpreter stub for calling a native method. (asm interpreter)

View File

@ -661,6 +661,18 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
} }
if (supports_sse4_2()) {
if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
UseCRC32CIntrinsics = true;
}
}
else if (UseCRC32CIntrinsics) {
if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
warning("CRC32C intrinsics are not available on this CPU");
}
FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
}
// The AES intrinsic stubs require AES instruction support (of course) // The AES intrinsic stubs require AES instruction support (of course)
// but also require sse3 mode for instructions it use. // but also require sse3 mode for instructions it use.
if (UseAES && (UseSSE > 2)) { if (UseAES && (UseSSE > 2)) {
@ -704,12 +716,6 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
} }
if (UseCRC32CIntrinsics) {
if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics))
warning("CRC32C intrinsics are not available on this CPU");
FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
}
if (UseAdler32Intrinsics) { if (UseAdler32Intrinsics) {
warning("Adler32Intrinsics not available on this CPU."); warning("Adler32Intrinsics not available on this CPU.");
FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);

View File

@ -1712,6 +1712,18 @@ const bool Matcher::match_rule_supported(int opcode) {
return ret_value; // Per default match rules are supported. return ret_value; // Per default match rules are supported.
} }
const int Matcher::float_pressure(int default_pressure_threshold) {
int float_pressure_threshold = default_pressure_threshold;
#ifdef _LP64
if (UseAVX > 2) {
// Increase pressure threshold on machines with AVX3 which have
// 2x more XMM registers.
float_pressure_threshold = default_pressure_threshold * 2;
}
#endif
return float_pressure_threshold;
}
// Max vector size in bytes. 0 if not supported. // Max vector size in bytes. 0 if not supported.
const int Matcher::vector_width_in_bytes(BasicType bt) { const int Matcher::vector_width_in_bytes(BasicType bt) {
assert(is_java_primitive(bt), "only primitive type vectors"); assert(is_java_primitive(bt), "only primitive type vectors");

View File

@ -9911,35 +9911,6 @@ instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXR
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
predicate (UseSSE<=1);
match(Set dpr1 (ExpD dpr1));
effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
format %{ "fast_exp $dpr1 -> $dpr1 // KILL $rax, $rcx, $rdx" %}
ins_encode %{
__ fast_exp();
%}
ins_pipe( pipe_slow );
%}
instruct expD_reg(regD dst, regD src, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
predicate (UseSSE>=2);
match(Set dst (ExpD src));
effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
format %{ "fast_exp $dst -> $src // KILL $rax, $rcx, $rdx" %}
ins_encode %{
__ subptr(rsp, 8);
__ movdbl(Address(rsp, 0), $src$$XMMRegister);
__ fld_d(Address(rsp, 0));
__ fast_exp();
__ fstp_d(Address(rsp, 0));
__ movdbl($dst$$XMMRegister, Address(rsp, 0));
__ addptr(rsp, 8);
%}
ins_pipe( pipe_slow );
%}
instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1); predicate (UseSSE<=1);
// The source Double operand on FPU stack // The source Double operand on FPU stack

View File

@ -3767,6 +3767,22 @@ operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
%} %}
%} %}
operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
%{
constraint(ALLOC_IN_RC(ptr_reg));
predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
match(AddP reg (LShiftL (ConvI2L idx) scale));
op_cost(10);
format %{"[$reg + pos $idx << $scale]" %}
interface(MEMORY_INTER) %{
base($reg);
index($idx);
scale($scale);
disp(0x0);
%}
%}
// Indirect Memory Times Scale Plus Index Register Plus Offset Operand // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale) operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
%{ %{
@ -4159,7 +4175,7 @@ operand cmpOpUCF2() %{
// case of this is memory operands. // case of this is memory operands.
opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex, opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
indIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset, indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
indCompressedOopOffset, indCompressedOopOffset,
indirectNarrow, indOffset8Narrow, indOffset32Narrow, indirectNarrow, indOffset8Narrow, indOffset32Narrow,
indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow, indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
@ -5186,6 +5202,17 @@ instruct leaPIdxScale(rRegP dst, indIndexScale mem)
ins_pipe(ialu_reg_reg_fat); ins_pipe(ialu_reg_reg_fat);
%} %}
instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
%{
match(Set dst mem);
ins_cost(110);
format %{ "leaq $dst, $mem\t# ptr idxscale" %}
opcode(0x8D);
ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
ins_pipe(ialu_reg_reg_fat);
%}
instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem) instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
%{ %{
match(Set dst mem); match(Set dst mem);
@ -9871,22 +9898,6 @@ instruct powD_reg(regD dst, regD src0, regD src1, rax_RegI rax, rdx_RegI rdx, rc
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
instruct expD_reg(regD dst, regD src, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{
match(Set dst (ExpD src));
effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
format %{ "fast_exp $dst -> $src // KILL $rax, $rcx, $rdx" %}
ins_encode %{
__ subptr(rsp, 8);
__ movdbl(Address(rsp, 0), $src$$XMMRegister);
__ fld_d(Address(rsp, 0));
__ fast_exp();
__ fstp_d(Address(rsp, 0));
__ movdbl($dst$$XMMRegister, Address(rsp, 0));
__ addptr(rsp, 8);
%}
ins_pipe( pipe_slow );
%}
//----------Arithmetic Conversion Instructions--------------------------------- //----------Arithmetic Conversion Instructions---------------------------------
instruct roundFloat_nop(regF dst) instruct roundFloat_nop(regF dst)

View File

@ -816,7 +816,7 @@ address InterpreterGenerator::generate_Reference_get_entry(void) {
// If G1 is not enabled then attempt to go through the normal entry point // If G1 is not enabled then attempt to go through the normal entry point
// Reference.get could be instrumented by jvmti // Reference.get could be instrumented by jvmti
return generate_normal_entry(false); return NULL;
} }
address InterpreterGenerator::generate_native_entry(bool synchronized) { address InterpreterGenerator::generate_native_entry(bool synchronized) {

View File

@ -42,4 +42,5 @@
// Not supported // Not supported
address generate_CRC32_update_entry() { return NULL; } address generate_CRC32_update_entry() { return NULL; }
address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; } address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
#endif // CPU_ZERO_VM_INTERPRETERGENERATOR_ZERO_HPP #endif // CPU_ZERO_VM_INTERPRETERGENERATOR_ZERO_HPP

View File

@ -4006,7 +4006,6 @@ int MatchRule::is_expensive() const {
strcmp(opType,"DivD")==0 || strcmp(opType,"DivD")==0 ||
strcmp(opType,"DivF")==0 || strcmp(opType,"DivF")==0 ||
strcmp(opType,"DivI")==0 || strcmp(opType,"DivI")==0 ||
strcmp(opType,"ExpD")==0 ||
strcmp(opType,"LogD")==0 || strcmp(opType,"LogD")==0 ||
strcmp(opType,"Log10D")==0 || strcmp(opType,"Log10D")==0 ||
strcmp(opType,"ModD")==0 || strcmp(opType,"ModD")==0 ||
@ -4143,6 +4142,8 @@ bool MatchRule::is_vector() const {
"SubVB","SubVS","SubVI","SubVL","SubVF","SubVD", "SubVB","SubVS","SubVI","SubVL","SubVF","SubVD",
"MulVS","MulVI","MulVL","MulVF","MulVD", "MulVS","MulVI","MulVL","MulVF","MulVD",
"DivVF","DivVD", "DivVF","DivVD",
"AbsVF","AbsVD",
"NegVF","NegVD",
"SqrtVD", "SqrtVD",
"AndV" ,"XorV" ,"OrV", "AndV" ,"XorV" ,"OrV",
"AddReductionVI", "AddReductionVL", "AddReductionVI", "AddReductionVL",

View File

@ -3363,11 +3363,9 @@ const char* GraphBuilder::check_can_parse(ciMethod* callee) const {
return NULL; return NULL;
} }
// negative filter: should callee NOT be inlined? returns NULL, ok to inline, or rejection msg // negative filter: should callee NOT be inlined? returns NULL, ok to inline, or rejection msg
const char* GraphBuilder::should_not_inline(ciMethod* callee) const { const char* GraphBuilder::should_not_inline(ciMethod* callee) const {
if ( callee->should_exclude()) return "excluded by CompilerOracle"; if ( callee->should_not_inline()) return "disallowed by CompileCommand";
if ( callee->should_not_inline()) return "disallowed by CompilerOracle";
if ( callee->dont_inline()) return "don't inline by annotation"; if ( callee->dont_inline()) return "don't inline by annotation";
return NULL; return NULL;
} }
@ -3698,7 +3696,7 @@ bool GraphBuilder::try_inline_full(ciMethod* callee, bool holder_known, Bytecode
const char* msg = ""; const char* msg = "";
if (callee->force_inline()) msg = "force inline by annotation"; if (callee->force_inline()) msg = "force inline by annotation";
if (callee->should_inline()) msg = "force inline by CompileOracle"; if (callee->should_inline()) msg = "force inline by CompileCommand";
print_inlining(callee, msg); print_inlining(callee, msg);
} else { } else {
// use heuristic controls on inlining // use heuristic controls on inlining

View File

@ -732,8 +732,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
case lir_sin: case lir_sin:
case lir_cos: case lir_cos:
case lir_log: case lir_log:
case lir_log10: case lir_log10: {
case lir_exp: {
assert(op->as_Op2() != NULL, "must be"); assert(op->as_Op2() != NULL, "must be");
LIR_Op2* op2 = (LIR_Op2*)op; LIR_Op2* op2 = (LIR_Op2*)op;
@ -743,9 +742,6 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
// overlap with the input. // overlap with the input.
assert(op2->_info == NULL, "not used"); assert(op2->_info == NULL, "not used");
assert(op2->_tmp5->is_illegal(), "not used"); assert(op2->_tmp5->is_illegal(), "not used");
assert(op2->_tmp2->is_valid() == (op->code() == lir_exp), "not used");
assert(op2->_tmp3->is_valid() == (op->code() == lir_exp), "not used");
assert(op2->_tmp4->is_valid() == (op->code() == lir_exp), "not used");
assert(op2->_opr1->is_valid(), "used"); assert(op2->_opr1->is_valid(), "used");
do_input(op2->_opr1); do_temp(op2->_opr1); do_input(op2->_opr1); do_temp(op2->_opr1);
@ -1775,7 +1771,6 @@ const char * LIR_Op::name() const {
case lir_tan: s = "tan"; break; case lir_tan: s = "tan"; break;
case lir_log: s = "log"; break; case lir_log: s = "log"; break;
case lir_log10: s = "log10"; break; case lir_log10: s = "log10"; break;
case lir_exp: s = "exp"; break;
case lir_pow: s = "pow"; break; case lir_pow: s = "pow"; break;
case lir_logic_and: s = "logic_and"; break; case lir_logic_and: s = "logic_and"; break;
case lir_logic_or: s = "logic_or"; break; case lir_logic_or: s = "logic_or"; break;

View File

@ -961,7 +961,6 @@ enum LIR_Code {
, lir_tan , lir_tan
, lir_log , lir_log
, lir_log10 , lir_log10
, lir_exp
, lir_pow , lir_pow
, lir_logic_and , lir_logic_and
, lir_logic_or , lir_logic_or
@ -2199,7 +2198,6 @@ class LIR_List: public CompilationResourceObj {
void sin (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_sin , from, tmp1, to, tmp2)); } void sin (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_sin , from, tmp1, to, tmp2)); }
void cos (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_cos , from, tmp1, to, tmp2)); } void cos (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_cos , from, tmp1, to, tmp2)); }
void tan (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_tan , from, tmp1, to, tmp2)); } void tan (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_Op2(lir_tan , from, tmp1, to, tmp2)); }
void exp (LIR_Opr from, LIR_Opr to, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, LIR_Opr tmp4, LIR_Opr tmp5) { append(new LIR_Op2(lir_exp , from, tmp1, to, tmp2, tmp3, tmp4, tmp5)); }
void pow (LIR_Opr arg1, LIR_Opr arg2, LIR_Opr res, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, LIR_Opr tmp4, LIR_Opr tmp5) { append(new LIR_Op2(lir_pow, arg1, arg2, res, tmp1, tmp2, tmp3, tmp4, tmp5)); } void pow (LIR_Opr arg1, LIR_Opr arg2, LIR_Opr res, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, LIR_Opr tmp4, LIR_Opr tmp5) { append(new LIR_Op2(lir_pow, arg1, arg2, res, tmp1, tmp2, tmp3, tmp4, tmp5)); }
void add (LIR_Opr left, LIR_Opr right, LIR_Opr res) { append(new LIR_Op2(lir_add, left, right, res)); } void add (LIR_Opr left, LIR_Opr right, LIR_Opr res) { append(new LIR_Op2(lir_add, left, right, res)); }

View File

@ -739,7 +739,6 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
case lir_cos: case lir_cos:
case lir_log: case lir_log:
case lir_log10: case lir_log10:
case lir_exp:
case lir_pow: case lir_pow:
intrinsic_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op); intrinsic_op(op->code(), op->in_opr1(), op->in_opr2(), op->result_opr(), op);
break; break;

View File

@ -244,6 +244,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
void do_getClass(Intrinsic* x); void do_getClass(Intrinsic* x);
void do_currentThread(Intrinsic* x); void do_currentThread(Intrinsic* x);
void do_MathIntrinsic(Intrinsic* x); void do_MathIntrinsic(Intrinsic* x);
void do_ExpIntrinsic(Intrinsic* x);
void do_ArrayCopy(Intrinsic* x); void do_ArrayCopy(Intrinsic* x);
void do_CompareAndSwap(Intrinsic* x, ValueType* type); void do_CompareAndSwap(Intrinsic* x, ValueType* type);
void do_NIOCheckIndex(Intrinsic* x); void do_NIOCheckIndex(Intrinsic* x);

View File

@ -6588,7 +6588,6 @@ void LinearScanStatistic::collect(LinearScan* allocator) {
case lir_log10: case lir_log10:
case lir_log: case lir_log:
case lir_pow: case lir_pow:
case lir_exp:
case lir_logic_and: case lir_logic_and:
case lir_logic_or: case lir_logic_or:
case lir_logic_xor: case lir_logic_xor:

View File

@ -317,6 +317,7 @@ const char* Runtime1::name_for_address(address entry) {
FUNCTION_CASE(entry, TRACE_TIME_METHOD); FUNCTION_CASE(entry, TRACE_TIME_METHOD);
#endif #endif
FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32()); FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32());
FUNCTION_CASE(entry, StubRoutines::dexp());
#undef FUNCTION_CASE #undef FUNCTION_CASE

View File

@ -1043,18 +1043,6 @@ MethodCounters* ciMethod::ensure_method_counters() {
return method_counters; return method_counters;
} }
// ------------------------------------------------------------------
// ciMethod::should_exclude
//
// Should this method be excluded from compilation?
bool ciMethod::should_exclude() {
check_is_loaded();
VM_ENTRY_MARK;
methodHandle mh(THREAD, get_Method());
bool ignore;
return CompilerOracle::should_exclude(mh, ignore);
}
// ------------------------------------------------------------------ // ------------------------------------------------------------------
// ciMethod::should_inline // ciMethod::should_inline
// //

View File

@ -266,7 +266,6 @@ class ciMethod : public ciMetadata {
int resolve_vtable_index(ciKlass* caller, ciKlass* receiver); int resolve_vtable_index(ciKlass* caller, ciKlass* receiver);
// Compilation directives // Compilation directives
bool should_exclude();
bool should_inline(); bool should_inline();
bool should_not_inline(); bool should_not_inline();
bool should_print_assembly(); bool should_print_assembly();

View File

@ -1157,7 +1157,7 @@ bool CompileBroker::compilation_is_prohibited(methodHandle method, int osr_bci,
method->print_short_name(tty); method->print_short_name(tty);
tty->cr(); tty->cr();
} }
method->set_not_compilable(CompLevel_all, !quietly, "excluded by CompilerOracle"); method->set_not_compilable(CompLevel_all, !quietly, "excluded by CompileCommand");
} }
return false; return false;

View File

@ -24,149 +24,17 @@
#include "precompiled.hpp" #include "precompiled.hpp"
#include "compiler/compilerOracle.hpp" #include "compiler/compilerOracle.hpp"
#include "compiler/methodMatcher.hpp"
#include "memory/allocation.inline.hpp" #include "memory/allocation.inline.hpp"
#include "memory/oopFactory.hpp" #include "memory/oopFactory.hpp"
#include "memory/resourceArea.hpp" #include "memory/resourceArea.hpp"
#include "oops/klass.hpp" #include "oops/klass.hpp"
#include "oops/method.hpp" #include "oops/method.hpp"
#include "oops/oop.inline.hpp"
#include "oops/symbol.hpp" #include "oops/symbol.hpp"
#include "runtime/handles.inline.hpp" #include "runtime/handles.inline.hpp"
#include "runtime/jniHandles.hpp" #include "runtime/jniHandles.hpp"
#include "runtime/os.hpp" #include "runtime/os.hpp"
class MethodMatcher : public CHeapObj<mtCompiler> {
public:
enum Mode {
Exact,
Prefix = 1,
Suffix = 2,
Substring = Prefix | Suffix,
Any,
Unknown = -1
};
protected:
Symbol* _class_name;
Symbol* _method_name;
Symbol* _signature;
Mode _class_mode;
Mode _method_mode;
MethodMatcher* _next;
static bool match(Symbol* candidate, Symbol* match, Mode match_mode);
Symbol* class_name() const { return _class_name; }
Symbol* method_name() const { return _method_name; }
Symbol* signature() const { return _signature; }
public:
MethodMatcher(Symbol* class_name, Mode class_mode,
Symbol* method_name, Mode method_mode,
Symbol* signature, MethodMatcher* next);
MethodMatcher(Symbol* class_name, Symbol* method_name, MethodMatcher* next);
// utility method
MethodMatcher* find(methodHandle method) {
Symbol* class_name = method->method_holder()->name();
Symbol* method_name = method->name();
for (MethodMatcher* current = this; current != NULL; current = current->_next) {
if (match(class_name, current->class_name(), current->_class_mode) &&
match(method_name, current->method_name(), current->_method_mode) &&
(current->signature() == NULL || current->signature() == method->signature())) {
return current;
}
}
return NULL;
}
bool match(methodHandle method) {
return find(method) != NULL;
}
MethodMatcher* next() const { return _next; }
static void print_symbol(Symbol* h, Mode mode) {
ResourceMark rm;
if (mode == Suffix || mode == Substring || mode == Any) {
tty->print("*");
}
if (mode != Any) {
h->print_symbol_on(tty);
}
if (mode == Prefix || mode == Substring) {
tty->print("*");
}
}
void print_base() {
print_symbol(class_name(), _class_mode);
tty->print(".");
print_symbol(method_name(), _method_mode);
if (signature() != NULL) {
signature()->print_symbol_on(tty);
}
}
virtual void print() {
print_base();
tty->cr();
}
};
MethodMatcher::MethodMatcher(Symbol* class_name, Symbol* method_name, MethodMatcher* next) {
_class_name = class_name;
_method_name = method_name;
_next = next;
_class_mode = MethodMatcher::Exact;
_method_mode = MethodMatcher::Exact;
_signature = NULL;
}
MethodMatcher::MethodMatcher(Symbol* class_name, Mode class_mode,
Symbol* method_name, Mode method_mode,
Symbol* signature, MethodMatcher* next):
_class_mode(class_mode)
, _method_mode(method_mode)
, _next(next)
, _class_name(class_name)
, _method_name(method_name)
, _signature(signature) {
}
bool MethodMatcher::match(Symbol* candidate, Symbol* match, Mode match_mode) {
if (match_mode == Any) {
return true;
}
if (match_mode == Exact) {
return candidate == match;
}
ResourceMark rm;
const char * candidate_string = candidate->as_C_string();
const char * match_string = match->as_C_string();
switch (match_mode) {
case Prefix:
return strstr(candidate_string, match_string) == candidate_string;
case Suffix: {
size_t clen = strlen(candidate_string);
size_t mlen = strlen(match_string);
return clen >= mlen && strcmp(candidate_string + clen - mlen, match_string) == 0;
}
case Substring:
return strstr(candidate_string, match_string) != NULL;
default:
return false;
}
}
enum OptionType { enum OptionType {
IntxType, IntxType,
UintxType, UintxType,
@ -202,114 +70,6 @@ template<> OptionType get_type_for<double>() {
return DoubleType; return DoubleType;
} }
template<typename T>
static const T copy_value(const T value) {
return value;
}
template<> const ccstr copy_value<ccstr>(const ccstr value) {
return (const ccstr)os::strdup_check_oom(value);
}
template <typename T>
class TypedMethodOptionMatcher : public MethodMatcher {
const char* _option;
OptionType _type;
const T _value;
public:
TypedMethodOptionMatcher(Symbol* class_name, Mode class_mode,
Symbol* method_name, Mode method_mode,
Symbol* signature, const char* opt,
const T value, MethodMatcher* next) :
MethodMatcher(class_name, class_mode, method_name, method_mode, signature, next),
_type(get_type_for<T>()), _value(copy_value<T>(value)) {
_option = os::strdup_check_oom(opt);
}
~TypedMethodOptionMatcher() {
os::free((void*)_option);
}
TypedMethodOptionMatcher* match(methodHandle method, const char* opt) {
TypedMethodOptionMatcher* current = this;
while (current != NULL) {
current = (TypedMethodOptionMatcher*)current->find(method);
if (current == NULL) {
return NULL;
}
if (strcmp(current->_option, opt) == 0) {
return current;
}
current = current->next();
}
return NULL;
}
TypedMethodOptionMatcher* next() {
return (TypedMethodOptionMatcher*)_next;
}
OptionType get_type(void) {
return _type;
};
T value() { return _value; }
void print() {
ttyLocker ttyl;
print_base();
tty->print(" %s", _option);
tty->print(" <unknown option type>");
tty->cr();
}
};
template<>
void TypedMethodOptionMatcher<intx>::print() {
ttyLocker ttyl;
print_base();
tty->print(" intx %s", _option);
tty->print(" = " INTX_FORMAT, _value);
tty->cr();
};
template<>
void TypedMethodOptionMatcher<uintx>::print() {
ttyLocker ttyl;
print_base();
tty->print(" uintx %s", _option);
tty->print(" = " UINTX_FORMAT, _value);
tty->cr();
};
template<>
void TypedMethodOptionMatcher<bool>::print() {
ttyLocker ttyl;
print_base();
tty->print(" bool %s", _option);
tty->print(" = %s", _value ? "true" : "false");
tty->cr();
};
template<>
void TypedMethodOptionMatcher<ccstr>::print() {
ttyLocker ttyl;
print_base();
tty->print(" const char* %s", _option);
tty->print(" = '%s'", _value);
tty->cr();
};
template<>
void TypedMethodOptionMatcher<double>::print() {
ttyLocker ttyl;
print_base();
tty->print(" double %s", _option);
tty->print(" = %f", _value);
tty->cr();
};
// this must parallel the command_names below // this must parallel the command_names below
enum OracleCommand { enum OracleCommand {
UnknownCommand = -1, UnknownCommand = -1,
@ -342,8 +102,198 @@ static const char * command_names[] = {
}; };
class MethodMatcher; class MethodMatcher;
static MethodMatcher* lists[OracleCommandCount] = { 0, }; class TypedMethodOptionMatcher;
static BasicMatcher* lists[OracleCommandCount] = { 0, };
static TypedMethodOptionMatcher* option_list = NULL;
class TypedMethodOptionMatcher : public MethodMatcher {
private:
TypedMethodOptionMatcher* _next;
const char* _option;
OptionType _type;
public:
union {
bool bool_value;
intx intx_value;
uintx uintx_value;
double double_value;
ccstr ccstr_value;
} _u;
TypedMethodOptionMatcher() : MethodMatcher(),
_next(NULL),
_type(UnknownType) {
_option = NULL;
memset(&_u, 0, sizeof(_u));
}
static TypedMethodOptionMatcher* parse_method_pattern(char*& line, const char*& error_msg);
TypedMethodOptionMatcher* match(methodHandle method, const char* opt, OptionType type);
void init(const char* opt, OptionType type, TypedMethodOptionMatcher* next) {
_next = next;
_type = type;
_option = os::strdup_check_oom(opt);
}
void set_next(TypedMethodOptionMatcher* next) {_next = next; }
TypedMethodOptionMatcher* next() { return _next; }
OptionType type() { return _type; }
template<typename T> T value();
template<typename T> void set_value(T value);
void print();
void print_all();
TypedMethodOptionMatcher* clone();
~TypedMethodOptionMatcher();
};
// A few templated accessors instead of a full template class.
template<> intx TypedMethodOptionMatcher::value<intx>() {
return _u.intx_value;
}
template<> uintx TypedMethodOptionMatcher::value<uintx>() {
return _u.uintx_value;
}
template<> bool TypedMethodOptionMatcher::value<bool>() {
return _u.bool_value;
}
template<> double TypedMethodOptionMatcher::value<double>() {
return _u.double_value;
}
template<> ccstr TypedMethodOptionMatcher::value<ccstr>() {
return _u.ccstr_value;
}
template<> void TypedMethodOptionMatcher::set_value(intx value) {
_u.intx_value = value;
}
template<> void TypedMethodOptionMatcher::set_value(uintx value) {
_u.uintx_value = value;
}
template<> void TypedMethodOptionMatcher::set_value(double value) {
_u.double_value = value;
}
template<> void TypedMethodOptionMatcher::set_value(bool value) {
_u.bool_value = value;
}
template<> void TypedMethodOptionMatcher::set_value(ccstr value) {
_u.ccstr_value = (const ccstr)os::strdup_check_oom(value);
}
void TypedMethodOptionMatcher::print() {
ttyLocker ttyl;
print_base(tty);
switch (_type) {
case IntxType:
tty->print_cr(" intx %s = " INTX_FORMAT, _option, value<intx>());
break;
case UintxType:
tty->print_cr(" uintx %s = " UINTX_FORMAT, _option, value<uintx>());
break;
case BoolType:
tty->print_cr(" bool %s = %s", _option, value<bool>() ? "true" : "false");
break;
case DoubleType:
tty->print_cr(" double %s = %f", _option, value<double>());
break;
case CcstrType:
tty->print_cr(" const char* %s = '%s'", _option, value<ccstr>());
break;
default:
ShouldNotReachHere();
}
}
void TypedMethodOptionMatcher::print_all() {
print();
if (_next != NULL) {
tty->print(" ");
_next->print_all();
}
}
TypedMethodOptionMatcher* TypedMethodOptionMatcher::clone() {
TypedMethodOptionMatcher* m = new TypedMethodOptionMatcher();
m->_class_mode = _class_mode;
m->_class_name = _class_name;
m->_method_mode = _method_mode;
m->_method_name = _method_name;
m->_signature = _signature;
// Need to ref count the symbols
if (_class_name != NULL) {
_class_name->increment_refcount();
}
if (_method_name != NULL) {
_method_name->increment_refcount();
}
if (_signature != NULL) {
_signature->increment_refcount();
}
return m;
}
TypedMethodOptionMatcher::~TypedMethodOptionMatcher() {
if (_option != NULL) {
os::free((void*)_option);
}
if (_class_name != NULL) {
_class_name->decrement_refcount();
}
if (_method_name != NULL) {
_method_name->decrement_refcount();
}
if (_signature != NULL) {
_signature->decrement_refcount();
}
}
TypedMethodOptionMatcher* TypedMethodOptionMatcher::parse_method_pattern(char*& line, const char*& error_msg) {
assert(error_msg == NULL, "Dont call here with error_msg already set");
TypedMethodOptionMatcher* tom = new TypedMethodOptionMatcher();
MethodMatcher::parse_method_pattern(line, error_msg, tom);
if (error_msg != NULL) {
delete tom;
return NULL;
}
return tom;
}
TypedMethodOptionMatcher* TypedMethodOptionMatcher::match(methodHandle method, const char* opt, OptionType type) {
TypedMethodOptionMatcher* current = this;
while (current != NULL) {
// Fastest compare first.
if (current->type() == type) {
if (strcmp(current->_option, opt) == 0) {
if (current->matches(method)) {
return current;
}
}
}
current = current->next();
}
return NULL;
}
template<typename T>
static void add_option_string(TypedMethodOptionMatcher* matcher,
const char* option,
T value) {
assert(matcher != option_list, "No circular lists please");
matcher->init(option, get_type_for<T>(), option_list);
matcher->set_value<T>(value);
option_list = matcher;
return;
}
static bool check_predicate(OracleCommand command, methodHandle method) { static bool check_predicate(OracleCommand command, methodHandle method) {
return ((lists[command] != NULL) && return ((lists[command] != NULL) &&
@ -351,51 +301,27 @@ static bool check_predicate(OracleCommand command, methodHandle method) {
lists[command]->match(method)); lists[command]->match(method));
} }
static void add_predicate(OracleCommand command, BasicMatcher* bm) {
static MethodMatcher* add_predicate(OracleCommand command,
Symbol* class_name, MethodMatcher::Mode c_mode,
Symbol* method_name, MethodMatcher::Mode m_mode,
Symbol* signature) {
assert(command != OptionCommand, "must use add_option_string"); assert(command != OptionCommand, "must use add_option_string");
if (command == LogCommand && !LogCompilation && lists[LogCommand] == NULL) if (command == LogCommand && !LogCompilation && lists[LogCommand] == NULL) {
tty->print_cr("Warning: +LogCompilation must be enabled in order for individual methods to be logged."); tty->print_cr("Warning: +LogCompilation must be enabled in order for individual methods to be logged.");
lists[command] = new MethodMatcher(class_name, c_mode, method_name, m_mode, signature, lists[command]);
return lists[command];
}
template<typename T>
static MethodMatcher* add_option_string(Symbol* class_name, MethodMatcher::Mode c_mode,
Symbol* method_name, MethodMatcher::Mode m_mode,
Symbol* signature,
const char* option,
T value) {
lists[OptionCommand] = new TypedMethodOptionMatcher<T>(class_name, c_mode, method_name, m_mode,
signature, option, value, lists[OptionCommand]);
return lists[OptionCommand];
}
template<typename T>
static bool get_option_value(methodHandle method, const char* option, T& value) {
TypedMethodOptionMatcher<T>* m;
if (lists[OptionCommand] != NULL
&& (m = ((TypedMethodOptionMatcher<T>*)lists[OptionCommand])->match(method, option)) != NULL
&& m->get_type() == get_type_for<T>()) {
value = m->value();
return true;
} else {
return false;
} }
} bm->set_next(lists[command]);
lists[command] = bm;
bool CompilerOracle::has_option_string(methodHandle method, const char* option) { return;
bool value = false;
get_option_value(method, option, value);
return value;
} }
template<typename T> template<typename T>
bool CompilerOracle::has_option_value(methodHandle method, const char* option, T& value) { bool CompilerOracle::has_option_value(methodHandle method, const char* option, T& value) {
return ::get_option_value(method, option, value); if (option_list != NULL) {
TypedMethodOptionMatcher* m = option_list->match(method, option, get_type_for<T>());
if (m != NULL) {
value = m->value<T>();
return true;
}
}
return false;
} }
// Explicit instantiation for all OptionTypes supported. // Explicit instantiation for all OptionTypes supported.
@ -405,6 +331,12 @@ template bool CompilerOracle::has_option_value<bool>(methodHandle method, const
template bool CompilerOracle::has_option_value<ccstr>(methodHandle method, const char* option, ccstr& value); template bool CompilerOracle::has_option_value<ccstr>(methodHandle method, const char* option, ccstr& value);
template bool CompilerOracle::has_option_value<double>(methodHandle method, const char* option, double& value); template bool CompilerOracle::has_option_value<double>(methodHandle method, const char* option, double& value);
bool CompilerOracle::has_option_string(methodHandle method, const char* option) {
bool value = false;
has_option_value(method, option, value);
return value;
}
bool CompilerOracle::should_exclude(methodHandle method, bool& quietly) { bool CompilerOracle::should_exclude(methodHandle method, bool& quietly) {
quietly = true; quietly = true;
if (lists[ExcludeCommand] != NULL) { if (lists[ExcludeCommand] != NULL) {
@ -420,19 +352,18 @@ bool CompilerOracle::should_exclude(methodHandle method, bool& quietly) {
return false; return false;
} }
bool CompilerOracle::should_inline(methodHandle method) { bool CompilerOracle::should_inline(methodHandle method) {
return (check_predicate(InlineCommand, method)); return (check_predicate(InlineCommand, method));
} }
// Check both DontInlineCommand and ExcludeCommand here
// - consistent behavior for all compilers
bool CompilerOracle::should_not_inline(methodHandle method) { bool CompilerOracle::should_not_inline(methodHandle method) {
return (check_predicate(DontInlineCommand, method)); return check_predicate(DontInlineCommand, method) || check_predicate(ExcludeCommand, method);
} }
bool CompilerOracle::should_print(methodHandle method) { bool CompilerOracle::should_print(methodHandle method) {
return (check_predicate(PrintCommand, method)); return check_predicate(PrintCommand, method);
} }
bool CompilerOracle::should_print_methods() { bool CompilerOracle::should_print_methods() {
@ -445,12 +376,10 @@ bool CompilerOracle::should_log(methodHandle method) {
return (check_predicate(LogCommand, method)); return (check_predicate(LogCommand, method));
} }
bool CompilerOracle::should_break_at(methodHandle method) { bool CompilerOracle::should_break_at(methodHandle method) {
return check_predicate(BreakCommand, method); return check_predicate(BreakCommand, method);
} }
static OracleCommand parse_command_name(const char * line, int* bytes_read) { static OracleCommand parse_command_name(const char * line, int* bytes_read) {
assert(ARRAY_SIZE(command_names) == OracleCommandCount, assert(ARRAY_SIZE(command_names) == OracleCommandCount,
"command_names size mismatch"); "command_names size mismatch");
@ -516,83 +445,11 @@ static void usage() {
tty->cr(); tty->cr();
}; };
// The JVM specification defines the allowed characters.
// Tokens that are disallowed by the JVM specification can have
// a meaning to the parser so we need to include them here.
// The parser does not enforce all rules of the JVMS - a successful parse
// does not mean that it is an allowed name. Illegal names will
// be ignored since they never can match a class or method.
//
// '\0' and 0xf0-0xff are disallowed in constant string values
// 0x20 ' ', 0x09 '\t' and, 0x2c ',' are used in the matching
// 0x5b '[' and 0x5d ']' can not be used because of the matcher
// 0x28 '(' and 0x29 ')' are used for the signature
// 0x2e '.' is always replaced before the matching
// 0x2f '/' is only used in the class name as package separator
#define RANGEBASE "\x1\x2\x3\x4\x5\x6\x7\x8\xa\xb\xc\xd\xe\xf" \
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" \
"\x21\x22\x23\x24\x25\x26\x27\x2a\x2b\x2c\x2d" \
"\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" \
"\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" \
"\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5c\x5e\x5f" \
"\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" \
"\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" \
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" \
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" \
"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" \
"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" \
"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" \
"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" \
"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
#define RANGE0 "[*" RANGEBASE "]"
#define RANGESLASH "[*" RANGEBASE "/]"
static MethodMatcher::Mode check_mode(char name[], const char*& error_msg) {
int match = MethodMatcher::Exact;
while (name[0] == '*') {
match |= MethodMatcher::Suffix;
// Copy remaining string plus NUL to the beginning
memmove(name, name + 1, strlen(name + 1) + 1);
}
if (strcmp(name, "*") == 0) return MethodMatcher::Any;
size_t len = strlen(name);
while (len > 0 && name[len - 1] == '*') {
match |= MethodMatcher::Prefix;
name[--len] = '\0';
}
if (strstr(name, "*") != NULL) {
error_msg = " Embedded * not allowed";
return MethodMatcher::Unknown;
}
return (MethodMatcher::Mode)match;
}
static bool scan_line(const char * line,
char class_name[], MethodMatcher::Mode* c_mode,
char method_name[], MethodMatcher::Mode* m_mode,
int* bytes_read, const char*& error_msg) {
*bytes_read = 0;
error_msg = NULL;
if (2 == sscanf(line, "%*[ \t]%255" RANGESLASH "%*[ ]" "%255" RANGE0 "%n", class_name, method_name, bytes_read)) {
*c_mode = check_mode(class_name, error_msg);
*m_mode = check_mode(method_name, error_msg);
return *c_mode != MethodMatcher::Unknown && *m_mode != MethodMatcher::Unknown;
}
return false;
}
// Scan next flag and value in line, return MethodMatcher object on success, NULL on failure. // Scan next flag and value in line, return MethodMatcher object on success, NULL on failure.
// On failure, error_msg contains description for the first error. // On failure, error_msg contains description for the first error.
// For future extensions: set error_msg on first error. // For future extensions: set error_msg on first error.
static MethodMatcher* scan_flag_and_value(const char* type, const char* line, int& total_bytes_read, static void scan_flag_and_value(const char* type, const char* line, int& total_bytes_read,
Symbol* c_name, MethodMatcher::Mode c_match, TypedMethodOptionMatcher* matcher,
Symbol* m_name, MethodMatcher::Mode m_match,
Symbol* signature,
char* errorbuf, const int buf_size) { char* errorbuf, const int buf_size) {
total_bytes_read = 0; total_bytes_read = 0;
int bytes_read = 0; int bytes_read = 0;
@ -608,7 +465,8 @@ static MethodMatcher* scan_flag_and_value(const char* type, const char* line, in
intx value; intx value;
if (sscanf(line, "%*[ \t]" INTX_FORMAT "%n", &value, &bytes_read) == 1) { if (sscanf(line, "%*[ \t]" INTX_FORMAT "%n", &value, &bytes_read) == 1) {
total_bytes_read += bytes_read; total_bytes_read += bytes_read;
return add_option_string(c_name, c_match, m_name, m_match, signature, flag, value); add_option_string(matcher, flag, value);
return;
} else { } else {
jio_snprintf(errorbuf, buf_size, " Value cannot be read for flag %s of type %s ", flag, type); jio_snprintf(errorbuf, buf_size, " Value cannot be read for flag %s of type %s ", flag, type);
} }
@ -616,7 +474,8 @@ static MethodMatcher* scan_flag_and_value(const char* type, const char* line, in
uintx value; uintx value;
if (sscanf(line, "%*[ \t]" UINTX_FORMAT "%n", &value, &bytes_read) == 1) { if (sscanf(line, "%*[ \t]" UINTX_FORMAT "%n", &value, &bytes_read) == 1) {
total_bytes_read += bytes_read; total_bytes_read += bytes_read;
return add_option_string(c_name, c_match, m_name, m_match, signature, flag, value); add_option_string(matcher, flag, value);
return;
} else { } else {
jio_snprintf(errorbuf, buf_size, " Value cannot be read for flag %s of type %s", flag, type); jio_snprintf(errorbuf, buf_size, " Value cannot be read for flag %s of type %s", flag, type);
} }
@ -625,7 +484,8 @@ static MethodMatcher* scan_flag_and_value(const char* type, const char* line, in
char* value = NEW_RESOURCE_ARRAY(char, strlen(line) + 1); char* value = NEW_RESOURCE_ARRAY(char, strlen(line) + 1);
if (sscanf(line, "%*[ \t]%255[_a-zA-Z0-9]%n", value, &bytes_read) == 1) { if (sscanf(line, "%*[ \t]%255[_a-zA-Z0-9]%n", value, &bytes_read) == 1) {
total_bytes_read += bytes_read; total_bytes_read += bytes_read;
return add_option_string(c_name, c_match, m_name, m_match, signature, flag, (ccstr)value); add_option_string(matcher, flag, (ccstr)value);
return;
} else { } else {
jio_snprintf(errorbuf, buf_size, " Value cannot be read for flag %s of type %s", flag, type); jio_snprintf(errorbuf, buf_size, " Value cannot be read for flag %s of type %s", flag, type);
} }
@ -646,7 +506,8 @@ static MethodMatcher* scan_flag_and_value(const char* type, const char* line, in
next_value += bytes_read; next_value += bytes_read;
end_value = next_value-1; end_value = next_value-1;
} }
return add_option_string(c_name, c_match, m_name, m_match, signature, flag, (ccstr)value); add_option_string(matcher, flag, (ccstr)value);
return;
} else { } else {
jio_snprintf(errorbuf, buf_size, " Value cannot be read for flag %s of type %s", flag, type); jio_snprintf(errorbuf, buf_size, " Value cannot be read for flag %s of type %s", flag, type);
} }
@ -655,10 +516,12 @@ static MethodMatcher* scan_flag_and_value(const char* type, const char* line, in
if (sscanf(line, "%*[ \t]%255[a-zA-Z]%n", value, &bytes_read) == 1) { if (sscanf(line, "%*[ \t]%255[a-zA-Z]%n", value, &bytes_read) == 1) {
if (strcmp(value, "true") == 0) { if (strcmp(value, "true") == 0) {
total_bytes_read += bytes_read; total_bytes_read += bytes_read;
return add_option_string(c_name, c_match, m_name, m_match, signature, flag, true); add_option_string(matcher, flag, true);
return;
} else if (strcmp(value, "false") == 0) { } else if (strcmp(value, "false") == 0) {
total_bytes_read += bytes_read; total_bytes_read += bytes_read;
return add_option_string(c_name, c_match, m_name, m_match, signature, flag, false); add_option_string(matcher, flag, false);
return;
} else { } else {
jio_snprintf(errorbuf, buf_size, " Value cannot be read for flag %s of type %s", flag, type); jio_snprintf(errorbuf, buf_size, " Value cannot be read for flag %s of type %s", flag, type);
} }
@ -673,7 +536,8 @@ static MethodMatcher* scan_flag_and_value(const char* type, const char* line, in
char value[512] = ""; char value[512] = "";
jio_snprintf(value, sizeof(value), "%s.%s", buffer[0], buffer[1]); jio_snprintf(value, sizeof(value), "%s.%s", buffer[0], buffer[1]);
total_bytes_read += bytes_read; total_bytes_read += bytes_read;
return add_option_string(c_name, c_match, m_name, m_match, signature, flag, atof(value)); add_option_string(matcher, flag, atof(value));
return;
} else { } else {
jio_snprintf(errorbuf, buf_size, " Value cannot be read for flag %s of type %s", flag, type); jio_snprintf(errorbuf, buf_size, " Value cannot be read for flag %s of type %s", flag, type);
} }
@ -683,7 +547,7 @@ static MethodMatcher* scan_flag_and_value(const char* type, const char* line, in
} else { } else {
jio_snprintf(errorbuf, buf_size, " Flag name for type %s should be alphanumeric ", type); jio_snprintf(errorbuf, buf_size, " Flag name for type %s should be alphanumeric ", type);
} }
return NULL; return;
} }
int skip_whitespace(char* line) { int skip_whitespace(char* line) {
@ -693,31 +557,20 @@ int skip_whitespace(char* line) {
return whitespace_read; return whitespace_read;
} }
void CompilerOracle::print_parse_error(const char*& error_msg, char* original_line) {
assert(error_msg != NULL, "Must have error_message");
ttyLocker ttyl;
tty->print_cr("CompileCommand: An error occurred during parsing");
tty->print_cr("Line: %s", original_line);
tty->print_cr("Error: %s", error_msg);
CompilerOracle::print_tip();
}
void CompilerOracle::parse_from_line(char* line) { void CompilerOracle::parse_from_line(char* line) {
if (line[0] == '\0') return; if (line[0] == '\0') return;
if (line[0] == '#') return; if (line[0] == '#') return;
bool have_colon = (strstr(line, "::") != NULL);
for (char* lp = line; *lp != '\0'; lp++) {
// Allow '.' to separate the class name from the method name.
// This is the preferred spelling of methods:
// exclude java/lang/String.indexOf(I)I
// Allow ',' for spaces (eases command line quoting).
// exclude,java/lang/String.indexOf
// For backward compatibility, allow space as separator also.
// exclude java/lang/String indexOf
// exclude,java/lang/String,indexOf
// For easy cut-and-paste of method names, allow VM output format
// as produced by Method::print_short_name:
// exclude java.lang.String::indexOf
// For simple implementation convenience here, convert them all to space.
if (have_colon) {
if (*lp == '.') *lp = '/'; // dots build the package prefix
if (*lp == ':') *lp = ' ';
}
if (*lp == ',' || *lp == '.') *lp = ' ';
}
char* original_line = line; char* original_line = line;
int bytes_read; int bytes_read;
OracleCommand command = parse_command_name(line, &bytes_read); OracleCommand command = parse_command_name(line, &bytes_read);
@ -742,32 +595,7 @@ void CompilerOracle::parse_from_line(char* line) {
return; return;
} }
MethodMatcher::Mode c_match = MethodMatcher::Exact; const char* error_msg = NULL;
MethodMatcher::Mode m_match = MethodMatcher::Exact;
char class_name[256];
char method_name[256];
char sig[1024];
char errorbuf[1024];
const char* error_msg = NULL; // description of first error that appears
MethodMatcher* match = NULL;
if (scan_line(line, class_name, &c_match, method_name, &m_match, &bytes_read, error_msg)) {
EXCEPTION_MARK;
Symbol* c_name = SymbolTable::new_symbol(class_name, CHECK);
Symbol* m_name = SymbolTable::new_symbol(method_name, CHECK);
Symbol* signature = NULL;
line += bytes_read;
// there might be a signature following the method.
// signatures always begin with ( so match that by hand
line += skip_whitespace(line);
if (1 == sscanf(line, "(%254[[);/" RANGEBASE "]%n", sig + 1, &bytes_read)) {
sig[0] = '(';
line += bytes_read;
signature = SymbolTable::new_symbol(sig, CHECK);
}
if (command == OptionCommand) { if (command == OptionCommand) {
// Look for trailing options. // Look for trailing options.
// //
@ -783,18 +611,24 @@ void CompilerOracle::parse_from_line(char* line) {
// the following types: intx, uintx, bool, ccstr, ccstrlist, and double. // the following types: intx, uintx, bool, ccstr, ccstrlist, and double.
// //
// For future extensions: extend scan_flag_and_value() // For future extensions: extend scan_flag_and_value()
char option[256]; // stores flag for Type (1) and type of Type (2) char option[256]; // stores flag for Type (1) and type of Type (2)
line++; // skip the ','
TypedMethodOptionMatcher* archetype = TypedMethodOptionMatcher::parse_method_pattern(line, error_msg);
if (archetype == NULL) {
assert(error_msg != NULL, "Must have error_message");
print_parse_error(error_msg, original_line);
return;
}
line += skip_whitespace(line); line += skip_whitespace(line);
// This is unnecessarily complex. Should retire multi-option lines and skip while loop
while (sscanf(line, "%255[a-zA-Z0-9]%n", option, &bytes_read) == 1) { while (sscanf(line, "%255[a-zA-Z0-9]%n", option, &bytes_read) == 1) {
if (match != NULL && !_quiet) {
// Print out the last match added
ttyLocker ttyl;
tty->print("CompileCommand: %s ", command_names[command]);
match->print();
}
line += bytes_read; line += bytes_read;
// typed_matcher is used as a blueprint for each option, deleted at the end
TypedMethodOptionMatcher* typed_matcher = archetype->clone();
if (strcmp(option, "intx") == 0 if (strcmp(option, "intx") == 0
|| strcmp(option, "uintx") == 0 || strcmp(option, "uintx") == 0
|| strcmp(option, "bool") == 0 || strcmp(option, "bool") == 0
@ -802,49 +636,45 @@ void CompilerOracle::parse_from_line(char* line) {
|| strcmp(option, "ccstrlist") == 0 || strcmp(option, "ccstrlist") == 0
|| strcmp(option, "double") == 0 || strcmp(option, "double") == 0
) { ) {
char errorbuf[1024] = {0};
// Type (2) option: parse flag name and value. // Type (2) option: parse flag name and value.
match = scan_flag_and_value(option, line, bytes_read, scan_flag_and_value(option, line, bytes_read, typed_matcher, errorbuf, sizeof(errorbuf));
c_name, c_match, m_name, m_match, signature, if (*errorbuf != '\0') {
errorbuf, sizeof(errorbuf));
if (match == NULL) {
error_msg = errorbuf; error_msg = errorbuf;
break; print_parse_error(error_msg, original_line);
return;
} }
line += bytes_read; line += bytes_read;
} else { } else {
// Type (1) option // Type (1) option
match = add_option_string(c_name, c_match, m_name, m_match, signature, option, true); add_option_string(typed_matcher, option, true);
}
if (typed_matcher != NULL && !_quiet) {
// Print out the last match added
assert(error_msg == NULL, "No error here");
ttyLocker ttyl;
tty->print("CompileCommand: %s ", command_names[command]);
typed_matcher->print();
} }
line += skip_whitespace(line); line += skip_whitespace(line);
} // while( } // while(
} else { delete archetype;
match = add_predicate(command, c_name, c_match, m_name, m_match, signature); } else { // not an OptionCommand)
} assert(error_msg == NULL, "Don't call here with error_msg already set");
BasicMatcher* matcher = BasicMatcher::parse_method_pattern(line, error_msg);
if (error_msg != NULL) {
assert(matcher == NULL, "consistency");
print_parse_error(error_msg, original_line);
return;
} }
add_predicate(command, matcher);
if (!_quiet) {
ttyLocker ttyl; ttyLocker ttyl;
if (error_msg != NULL) {
// an error has happened
tty->print_cr("CompileCommand: An error occured during parsing");
tty->print_cr(" \"%s\"", original_line);
if (error_msg != NULL) {
tty->print_cr("%s", error_msg);
}
CompilerOracle::print_tip();
} else {
// check for remaining characters
bytes_read = 0;
sscanf(line, "%*[ \t]%n", &bytes_read);
if (line[bytes_read] != '\0') {
tty->print_cr("CompileCommand: Bad pattern");
tty->print_cr(" \"%s\"", original_line);
tty->print_cr(" Unrecognized text %s after command ", line);
CompilerOracle::print_tip();
} else if (match != NULL && !_quiet) {
tty->print("CompileCommand: %s ", command_names[command]); tty->print("CompileCommand: %s ", command_names[command]);
match->print(); matcher->print(tty);
tty->cr();
} }
} }
} }
@ -1045,10 +875,12 @@ void CompilerOracle::parse_compile_only(char * line) {
Symbol* m_name = SymbolTable::new_symbol(methodName, CHECK); Symbol* m_name = SymbolTable::new_symbol(methodName, CHECK);
Symbol* signature = NULL; Symbol* signature = NULL;
add_predicate(CompileOnlyCommand, c_name, c_match, m_name, m_match, signature); BasicMatcher* bm = new BasicMatcher();
bm->init(c_name, c_match, m_name, m_match, signature);
add_predicate(CompileOnlyCommand, bm);
if (PrintVMOptions) { if (PrintVMOptions) {
tty->print("CompileOnly: compileonly "); tty->print("CompileOnly: compileonly ");
lists[CompileOnlyCommand]->print(); lists[CompileOnlyCommand]->print_all(tty);
} }
className = NULL; className = NULL;

View File

@ -35,6 +35,7 @@ class CompilerOracle : AllStatic {
private: private:
static bool _quiet; static bool _quiet;
static void print_tip(); static void print_tip();
static void print_parse_error(const char*& error_msg, char* original_line);
public: public:

View File

@ -0,0 +1,347 @@
/*
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "compiler/methodMatcher.hpp"
#include "memory/oopFactory.hpp"
#include "oops/oop.inline.hpp"
// The JVM specification defines the allowed characters.
// Tokens that are disallowed by the JVM specification can have
// a meaning to the parser so we need to include them here.
// The parser does not enforce all rules of the JVMS - a successful parse
// does not mean that it is an allowed name. Illegal names will
// be ignored since they never can match a class or method.
//
// '\0' and 0xf0-0xff are disallowed in constant string values
// 0x20 ' ', 0x09 '\t' and, 0x2c ',' are used in the matching
// 0x5b '[' and 0x5d ']' can not be used because of the matcher
// 0x28 '(' and 0x29 ')' are used for the signature
// 0x2e '.' is always replaced before the matching
// 0x2f '/' is only used in the class name as package separator
#define RANGEBASE "\x1\x2\x3\x4\x5\x6\x7\x8\xa\xb\xc\xd\xe\xf" \
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" \
"\x21\x22\x23\x24\x25\x26\x27\x2a\x2b\x2c\x2d" \
"\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" \
"\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" \
"\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5c\x5e\x5f" \
"\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" \
"\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" \
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" \
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" \
"\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf" \
"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" \
"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" \
"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" \
"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
#define RANGE0 "[*" RANGEBASE "]"
#define RANGESLASH "[*" RANGEBASE "/]"
MethodMatcher::MethodMatcher():
_class_mode(Exact)
, _method_mode(Exact)
, _class_name(NULL)
, _method_name(NULL)
, _signature(NULL) {
}
MethodMatcher::~MethodMatcher() {
if (_class_name != NULL) {
_class_name->decrement_refcount();
}
if (_method_name != NULL) {
_method_name->decrement_refcount();
}
if (_signature != NULL) {
_signature->decrement_refcount();
}
}
void MethodMatcher::init(Symbol* class_name, Mode class_mode,
Symbol* method_name, Mode method_mode,
Symbol* signature) {
_class_mode = class_mode;
_method_mode = method_mode;
_class_name = class_name;
_method_name = method_name;
_signature = signature;
}
bool MethodMatcher::canonicalize(char * line, const char *& error_msg) {
char* colon = strstr(line, "::");
bool have_colon = (colon != NULL);
if (have_colon) {
// Don't allow multiple '::'
if (colon + 2 != '\0') {
if (strstr(colon+2, "::")) {
error_msg = "Method pattern only allows one '::' allowed";
return false;
}
}
bool in_signature = false;
char* pos = line;
if (pos != NULL) {
for (char* lp = pos + 1; *lp != '\0'; lp++) {
if (*lp == '(') {
break;
}
if (*lp == '/') {
error_msg = "Method pattern uses '/' together with '::'";
return false;
}
}
}
} else {
// Don't allow mixed package separators
char* pos = strchr(line, '.');
bool in_signature = false;
if (pos != NULL) {
for (char* lp = pos + 1; *lp != '\0'; lp++) {
if (*lp == '(') {
in_signature = true;
}
// After any comma the method pattern has ended
if (*lp == ',') {
break;
}
if (!in_signature && (*lp == '/')) {
error_msg = "Method pattern uses mixed '/' and '.' package separators";
return false;
}
if (*lp == '.') {
error_msg = "Method pattern uses multiple '.' in pattern";
return false;
}
}
}
}
for (char* lp = line; *lp != '\0'; lp++) {
// Allow '.' to separate the class name from the method name.
// This is the preferred spelling of methods:
// exclude java/lang/String.indexOf(I)I
// Allow ',' for spaces (eases command line quoting).
// exclude,java/lang/String.indexOf
// For backward compatibility, allow space as separator also.
// exclude java/lang/String indexOf
// exclude,java/lang/String,indexOf
// For easy cut-and-paste of method names, allow VM output format
// as produced by Method::print_short_name:
// exclude java.lang.String::indexOf
// For simple implementation convenience here, convert them all to space.
if (have_colon) {
if (*lp == '.') *lp = '/'; // dots build the package prefix
if (*lp == ':') *lp = ' ';
}
if (*lp == ',' || *lp == '.') *lp = ' ';
}
return true;
}
bool MethodMatcher::match(Symbol* candidate, Symbol* match, Mode match_mode) const {
if (match_mode == Any) {
return true;
}
if (match_mode == Exact) {
return candidate == match;
}
ResourceMark rm;
const char * candidate_string = candidate->as_C_string();
const char * match_string = match->as_C_string();
switch (match_mode) {
case Prefix:
return strstr(candidate_string, match_string) == candidate_string;
case Suffix: {
size_t clen = strlen(candidate_string);
size_t mlen = strlen(match_string);
return clen >= mlen && strcmp(candidate_string + clen - mlen, match_string) == 0;
}
case Substring:
return strstr(candidate_string, match_string) != NULL;
default:
return false;
}
}
static MethodMatcher::Mode check_mode(char name[], const char*& error_msg) {
int match = MethodMatcher::Exact;
if (name[0] == '*') {
if (strlen(name) == 1) {
return MethodMatcher::Any;
}
match |= MethodMatcher::Suffix;
memmove(name, name + 1, strlen(name + 1) + 1);
}
size_t len = strlen(name);
if (len > 0 && name[len - 1] == '*') {
match |= MethodMatcher::Prefix;
name[--len] = '\0';
}
if (strlen(name) == 0) {
error_msg = "** Not a valid pattern";
return MethodMatcher::Any;
}
if (strstr(name, "*") != NULL) {
error_msg = " Embedded * not allowed";
return MethodMatcher::Unknown;
}
return (MethodMatcher::Mode)match;
}
// Skip any leading spaces
void skip_leading_spaces(char*& line, int* total_bytes_read ) {
int bytes_read = 0;
sscanf(line, "%*[ \t]%n", &bytes_read);
if (bytes_read > 0) {
line += bytes_read;
*total_bytes_read += bytes_read;
}
}
void MethodMatcher::parse_method_pattern(char*& line, const char*& error_msg, MethodMatcher* matcher) {
MethodMatcher::Mode c_match;
MethodMatcher::Mode m_match;
char class_name[256] = {0};
char method_name[256] = {0};
char sig[1024] = {0};
int bytes_read = 0;
int total_bytes_read = 0;
assert(error_msg == NULL, "Dont call here with error_msg already set");
if (!MethodMatcher::canonicalize(line, error_msg)) {
assert(error_msg != NULL, "Message must be set if parsing failed");
return;
}
skip_leading_spaces(line, &total_bytes_read);
if (2 == sscanf(line, "%255" RANGESLASH "%*[ ]" "%255" RANGE0 "%n", class_name, method_name, &bytes_read)) {
c_match = check_mode(class_name, error_msg);
m_match = check_mode(method_name, error_msg);
if ((strchr(class_name, '<') != NULL) || (strchr(class_name, '>') != NULL)) {
error_msg = "Chars '<' and '>' not allowed in class name";
return;
}
if ((strchr(method_name, '<') != NULL) || (strchr(method_name, '>') != NULL)) {
if ((strncmp("<init>", method_name, 255) != 0) && (strncmp("<clinit>", method_name, 255) != 0)) {
error_msg = "Chars '<' and '>' only allowed in <init> and <clinit>";
return;
}
}
if (c_match == MethodMatcher::Unknown || m_match == MethodMatcher::Unknown) {
assert(error_msg != NULL, "Must have been set by check_mode()");
return;
}
EXCEPTION_MARK;
Symbol* signature = NULL;
line += bytes_read;
bytes_read = 0;
skip_leading_spaces(line, &total_bytes_read);
// there might be a signature following the method.
// signatures always begin with ( so match that by hand
if (line[0] == '(') {
line++;
sig[0] = '(';
// scan the rest
if (1 == sscanf(line, "%254[[);/" RANGEBASE "]%n", sig+1, &bytes_read)) {
if (strchr(sig, '*') != NULL) {
error_msg = " Wildcard * not allowed in signature";
return;
}
line += bytes_read;
}
signature = SymbolTable::new_symbol(sig, CHECK);
}
Symbol* c_name = SymbolTable::new_symbol(class_name, CHECK);
Symbol* m_name = SymbolTable::new_symbol(method_name, CHECK);
matcher->init(c_name, c_match, m_name, m_match, signature);
return;
} else {
error_msg = "Could not parse method pattern";
}
}
bool MethodMatcher::matches(methodHandle method) const {
Symbol* class_name = method->method_holder()->name();
Symbol* method_name = method->name();
Symbol* signature = method->signature();
if (match(class_name, this->class_name(), _class_mode) &&
match(method_name, this->method_name(), _method_mode) &&
((this->signature() == NULL) || match(signature, this->signature(), Prefix))) {
return true;
}
return false;
}
void MethodMatcher::print_symbol(outputStream* st, Symbol* h, Mode mode) {
ResourceMark rm;
if (mode == Suffix || mode == Substring || mode == Any) {
st->print("*");
}
if (mode != Any) {
h->print_symbol_on(st);
}
if (mode == Prefix || mode == Substring) {
st->print("*");
}
}
void MethodMatcher::print_base(outputStream* st) {
print_symbol(st, class_name(), _class_mode);
st->print(".");
print_symbol(st, method_name(), _method_mode);
if (signature() != NULL) {
signature()->print_symbol_on(st);
}
}

View File

@ -0,0 +1,126 @@
/*
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_VM_COMPILER_METHODMATCHER_HPP
#define SHARE_VM_COMPILER_METHODMATCHER_HPP
#include "memory/allocation.inline.hpp"
#include "runtime/handles.inline.hpp"
#include "memory/resourceArea.hpp"
class MethodMatcher : public CHeapObj<mtCompiler> {
public:
enum Mode {
Exact,
Prefix = 1,
Suffix = 2,
Substring = Prefix | Suffix,
Any,
Unknown = -1
};
protected:
Symbol* _class_name;
Symbol* _method_name;
Symbol* _signature;
Mode _class_mode;
Mode _method_mode;
public:
Symbol* class_name() const { return _class_name; }
Mode class_mode() const { return _class_mode; }
Symbol* method_name() const { return _method_name; }
Mode method_mode() const { return _method_mode; }
Symbol* signature() const { return _signature; }
MethodMatcher();
~MethodMatcher();
void init(Symbol* class_name, Mode class_mode, Symbol* method_name, Mode method_mode, Symbol* signature);
static void parse_method_pattern(char*& line, const char*& error_msg, MethodMatcher* m);
static void print_symbol(outputStream* st, Symbol* h, Mode mode);
bool matches(methodHandle method) const;
void print_base(outputStream* st);
private:
static bool canonicalize(char * line, const char *& error_msg);
bool match(Symbol* candidate, Symbol* match, Mode match_mode) const;
};
class BasicMatcher : public MethodMatcher {
private:
BasicMatcher* _next;
public:
BasicMatcher() : MethodMatcher(),
_next(NULL) {
}
BasicMatcher(BasicMatcher* next) :
_next(next) {
}
static BasicMatcher* parse_method_pattern(char* line, const char*& error_msg) {
assert(error_msg == NULL, "Dont call here with error_msg already set");
BasicMatcher* bm = new BasicMatcher();
MethodMatcher::parse_method_pattern(line, error_msg, bm);
if (error_msg != NULL) {
delete bm;
return NULL;
}
// check for bad trailing characters
int bytes_read = 0;
sscanf(line, "%*[ \t]%n", &bytes_read);
if (line[bytes_read] != '\0') {
error_msg = "Unrecognized trailing text after method pattern";
delete bm;
return NULL;
}
return bm;
}
bool match(methodHandle method) {
for (BasicMatcher* current = this; current != NULL; current = current->next()) {
if (current->matches(method)) {
return true;
}
}
return false;
}
void set_next(BasicMatcher* next) { _next = next; }
BasicMatcher* next() { return _next; }
void print(outputStream* st) { print_base(st); }
void print_all(outputStream* st) {
print_base(st);
if (_next != NULL) {
_next->print_all(st);
}
}
};
#endif // SHARE_VM_COMPILER_METHODMATCHER_HPP

View File

@ -58,7 +58,6 @@ OopMapStream::OopMapStream(const ImmutableOopMap* oop_map, int oop_types_mask) {
_valid_omv = false; _valid_omv = false;
} }
void OopMapStream::find_next() { void OopMapStream::find_next() {
while(_position++ < _size) { while(_position++ < _size) {
_omv.read_from(_stream); _omv.read_from(_stream);
@ -156,9 +155,7 @@ void OopMap::set_oop(VMReg reg) {
void OopMap::set_value(VMReg reg) { void OopMap::set_value(VMReg reg) {
// At this time, we only need value entries in our OopMap when ZapDeadCompiledLocals is active. // At this time, we don't need value entries in our OopMap.
if (ZapDeadCompiledLocals)
set_xxx(reg, OopMapValue::value_value, VMRegImpl::Bad());
} }
@ -199,7 +196,6 @@ void OopMapSet::grow_om_data() {
set_om_data(new_data); set_om_data(new_data);
} }
void OopMapSet::add_gc_map(int pc_offset, OopMap *map ) { void OopMapSet::add_gc_map(int pc_offset, OopMap *map ) {
assert(om_size() != -1,"Cannot grow a fixed OopMapSet"); assert(om_size() != -1,"Cannot grow a fixed OopMapSet");
@ -345,7 +341,7 @@ void OopMapSet::all_do(const frame *fr, const RegisterMap *reg_map,
do { do {
omv = oms.current(); omv = oms.current();
oop* loc = fr->oopmapreg_to_location(omv.reg(),reg_map); oop* loc = fr->oopmapreg_to_location(omv.reg(),reg_map);
if ( loc != NULL ) { guarantee(loc != NULL, "missing saved register");
oop *base_loc = fr->oopmapreg_to_location(omv.content_reg(), reg_map); oop *base_loc = fr->oopmapreg_to_location(omv.content_reg(), reg_map);
oop *derived_loc = loc; oop *derived_loc = loc;
oop val = *base_loc; oop val = *base_loc;
@ -355,7 +351,7 @@ void OopMapSet::all_do(const frame *fr, const RegisterMap *reg_map,
// implicit null check is used in compiled code. // implicit null check is used in compiled code.
// The narrow_oop_base could be NULL or be the address // The narrow_oop_base could be NULL or be the address
// of the page below heap depending on compressed oops mode. // of the page below heap depending on compressed oops mode.
} else } else {
derived_oop_fn(base_loc, derived_loc); derived_oop_fn(base_loc, derived_loc);
} }
oms.next(); oms.next();
@ -363,13 +359,17 @@ void OopMapSet::all_do(const frame *fr, const RegisterMap *reg_map,
} }
} }
// We want coop, value and oop oop_types // We want coop and oop oop_types
int mask = OopMapValue::oop_value | OopMapValue::value_value | OopMapValue::narrowoop_value; int mask = OopMapValue::oop_value | OopMapValue::narrowoop_value;
{ {
for (OopMapStream oms(map,mask); !oms.is_done(); oms.next()) { for (OopMapStream oms(map,mask); !oms.is_done(); oms.next()) {
omv = oms.current(); omv = oms.current();
oop* loc = fr->oopmapreg_to_location(omv.reg(),reg_map); oop* loc = fr->oopmapreg_to_location(omv.reg(),reg_map);
if ( loc != NULL ) { // It should be an error if no location can be found for a
// register mentioned as contained an oop of some kind. Maybe
// this was allowed previously because value_value items might
// be missing?
guarantee(loc != NULL, "missing saved register");
if ( omv.type() == OopMapValue::oop_value ) { if ( omv.type() == OopMapValue::oop_value ) {
oop val = *loc; oop val = *loc;
if (val == (oop)NULL || Universe::is_narrow_oop_base(val)) { if (val == (oop)NULL || Universe::is_narrow_oop_base(val)) {
@ -395,14 +395,12 @@ void OopMapSet::all_do(const frame *fr, const RegisterMap *reg_map,
} }
#endif // ASSERT #endif // ASSERT
oop_fn->do_oop(loc); oop_fn->do_oop(loc);
} else if ( omv.type() == OopMapValue::value_value ) {
assert((*loc) == (oop)NULL || !Universe::is_narrow_oop_base(*loc),
"found invalid value pointer");
value_fn->do_oop(loc);
} else if ( omv.type() == OopMapValue::narrowoop_value ) { } else if ( omv.type() == OopMapValue::narrowoop_value ) {
narrowOop *nl = (narrowOop*)loc; narrowOop *nl = (narrowOop*)loc;
#ifndef VM_LITTLE_ENDIAN #ifndef VM_LITTLE_ENDIAN
if (!omv.reg()->is_stack()) { VMReg vmReg = omv.reg();
// Don't do this on SPARC float registers as they can be individually addressed
if (!vmReg->is_stack() SPARC_ONLY(&& !vmReg->is_FloatRegister())) {
// compressed oops in registers only take up 4 bytes of an // compressed oops in registers only take up 4 bytes of an
// 8 byte register but they are in the wrong part of the // 8 byte register but they are in the wrong part of the
// word so adjust loc to point at the right place. // word so adjust loc to point at the right place.
@ -413,7 +411,6 @@ void OopMapSet::all_do(const frame *fr, const RegisterMap *reg_map,
} }
} }
} }
}
} }
@ -485,9 +482,6 @@ void print_register_type(OopMapValue::oop_types x, VMReg optional,
case OopMapValue::oop_value: case OopMapValue::oop_value:
st->print("Oop"); st->print("Oop");
break; break;
case OopMapValue::value_value:
st->print("Value");
break;
case OopMapValue::narrowoop_value: case OopMapValue::narrowoop_value:
st->print("NarrowOop"); st->print("NarrowOop");
break; break;

View File

@ -33,7 +33,6 @@
// Interface for generating the frame map for compiled code. A frame map // Interface for generating the frame map for compiled code. A frame map
// describes for a specific pc whether each register and frame stack slot is: // describes for a specific pc whether each register and frame stack slot is:
// Oop - A GC root for current frame // Oop - A GC root for current frame
// Value - Live non-oop, non-float value: int, either half of double
// Dead - Dead; can be Zapped for debugging // Dead - Dead; can be Zapped for debugging
// CalleeXX - Callee saved; also describes which caller register is saved // CalleeXX - Callee saved; also describes which caller register is saved
// DerivedXX - A derived oop; original oop is described. // DerivedXX - A derived oop; original oop is described.
@ -54,7 +53,7 @@ private:
public: public:
// Constants // Constants
enum { type_bits = 5, enum { type_bits = 4,
register_bits = BitsPerShort - type_bits }; register_bits = BitsPerShort - type_bits };
enum { type_shift = 0, enum { type_shift = 0,
@ -68,10 +67,9 @@ public:
enum oop_types { // must fit in type_bits enum oop_types { // must fit in type_bits
unused_value =0, // powers of 2, for masking OopMapStream unused_value =0, // powers of 2, for masking OopMapStream
oop_value = 1, oop_value = 1,
value_value = 2, narrowoop_value = 2,
narrowoop_value = 4, callee_saved_value = 4,
callee_saved_value = 8, derived_oop_value= 8 };
derived_oop_value= 16 };
// Constructors // Constructors
OopMapValue () { set_value(0); set_content_reg(VMRegImpl::Bad()); } OopMapValue () { set_value(0); set_content_reg(VMRegImpl::Bad()); }
@ -96,13 +94,11 @@ public:
// Querying // Querying
bool is_oop() { return mask_bits(value(), type_mask_in_place) == oop_value; } bool is_oop() { return mask_bits(value(), type_mask_in_place) == oop_value; }
bool is_value() { return mask_bits(value(), type_mask_in_place) == value_value; }
bool is_narrowoop() { return mask_bits(value(), type_mask_in_place) == narrowoop_value; } bool is_narrowoop() { return mask_bits(value(), type_mask_in_place) == narrowoop_value; }
bool is_callee_saved() { return mask_bits(value(), type_mask_in_place) == callee_saved_value; } bool is_callee_saved() { return mask_bits(value(), type_mask_in_place) == callee_saved_value; }
bool is_derived_oop() { return mask_bits(value(), type_mask_in_place) == derived_oop_value; } bool is_derived_oop() { return mask_bits(value(), type_mask_in_place) == derived_oop_value; }
void set_oop() { set_value((value() & register_mask_in_place) | oop_value); } void set_oop() { set_value((value() & register_mask_in_place) | oop_value); }
void set_value() { set_value((value() & register_mask_in_place) | value_value); }
void set_narrowoop() { set_value((value() & register_mask_in_place) | narrowoop_value); } void set_narrowoop() { set_value((value() & register_mask_in_place) | narrowoop_value); }
void set_callee_saved() { set_value((value() & register_mask_in_place) | callee_saved_value); } void set_callee_saved() { set_value((value() & register_mask_in_place) | callee_saved_value); }
void set_derived_oop() { set_value((value() & register_mask_in_place) | derived_oop_value); } void set_derived_oop() { set_value((value() & register_mask_in_place) | derived_oop_value); }

View File

@ -90,6 +90,8 @@ class AbstractInterpreter: AllStatic {
java_util_zip_CRC32_update, // implementation of java.util.zip.CRC32.update() java_util_zip_CRC32_update, // implementation of java.util.zip.CRC32.update()
java_util_zip_CRC32_updateBytes, // implementation of java.util.zip.CRC32.updateBytes() java_util_zip_CRC32_updateBytes, // implementation of java.util.zip.CRC32.updateBytes()
java_util_zip_CRC32_updateByteBuffer, // implementation of java.util.zip.CRC32.updateByteBuffer() java_util_zip_CRC32_updateByteBuffer, // implementation of java.util.zip.CRC32.updateByteBuffer()
java_util_zip_CRC32C_updateBytes, // implementation of java.util.zip.CRC32C.updateBytes(crc, b[], off, end)
java_util_zip_CRC32C_updateDirectByteBuffer, // implementation of java.util.zip.CRC32C.updateDirectByteBuffer(crc, address, off, end)
java_lang_Float_intBitsToFloat, // implementation of java.lang.Float.intBitsToFloat() java_lang_Float_intBitsToFloat, // implementation of java.lang.Float.intBitsToFloat()
java_lang_Float_floatToRawIntBits, // implementation of java.lang.Float.floatToRawIntBits() java_lang_Float_floatToRawIntBits, // implementation of java.lang.Float.floatToRawIntBits()
java_lang_Double_longBitsToDouble, // implementation of java.lang.Double.longBitsToDouble() java_lang_Double_longBitsToDouble, // implementation of java.lang.Double.longBitsToDouble()

View File

@ -104,7 +104,10 @@ CodeletMark::~CodeletMark() {
(*_masm)->flush(); (*_masm)->flush();
// Commit Codelet. // Commit Codelet.
AbstractInterpreter::code()->commit((*_masm)->code()->pure_insts_size(), (*_masm)->code()->strings()); int committed_code_size = (*_masm)->code()->pure_insts_size();
if (committed_code_size) {
AbstractInterpreter::code()->commit(committed_code_size, (*_masm)->code()->strings());
}
// Make sure nobody can use _masm outside a CodeletMark lifespan. // Make sure nobody can use _masm outside a CodeletMark lifespan.
*_masm = NULL; *_masm = NULL;
} }
@ -234,6 +237,13 @@ AbstractInterpreter::MethodKind AbstractInterpreter::method_kind(methodHandle m)
case vmIntrinsics::_updateByteBufferCRC32 : return java_util_zip_CRC32_updateByteBuffer; case vmIntrinsics::_updateByteBufferCRC32 : return java_util_zip_CRC32_updateByteBuffer;
} }
} }
if (UseCRC32CIntrinsics) {
// Use optimized stub code for CRC32C methods.
switch (m->intrinsic_id()) {
case vmIntrinsics::_updateBytesCRC32C : return java_util_zip_CRC32C_updateBytes;
case vmIntrinsics::_updateDirectByteBufferCRC32C : return java_util_zip_CRC32C_updateDirectByteBuffer;
}
}
switch(m->intrinsic_id()) { switch(m->intrinsic_id()) {
case vmIntrinsics::_intBitsToFloat: return java_lang_Float_intBitsToFloat; case vmIntrinsics::_intBitsToFloat: return java_lang_Float_intBitsToFloat;
@ -349,6 +359,8 @@ void AbstractInterpreter::print_method_kind(MethodKind kind) {
case java_util_zip_CRC32_update : tty->print("java_util_zip_CRC32_update"); break; case java_util_zip_CRC32_update : tty->print("java_util_zip_CRC32_update"); break;
case java_util_zip_CRC32_updateBytes : tty->print("java_util_zip_CRC32_updateBytes"); break; case java_util_zip_CRC32_updateBytes : tty->print("java_util_zip_CRC32_updateBytes"); break;
case java_util_zip_CRC32_updateByteBuffer : tty->print("java_util_zip_CRC32_updateByteBuffer"); break; case java_util_zip_CRC32_updateByteBuffer : tty->print("java_util_zip_CRC32_updateByteBuffer"); break;
case java_util_zip_CRC32C_updateBytes : tty->print("java_util_zip_CRC32C_updateBytes"); break;
case java_util_zip_CRC32C_updateDirectByteBuffer: tty->print("java_util_zip_CRC32C_updateDirectByteByffer"); break;
default: default:
if (kind >= method_handle_invoke_FIRST && if (kind >= method_handle_invoke_FIRST &&
kind <= method_handle_invoke_LAST) { kind <= method_handle_invoke_LAST) {
@ -537,14 +549,15 @@ void AbstractInterpreterGenerator::initialize_method_handle_entries() {
address InterpreterGenerator::generate_method_entry( address InterpreterGenerator::generate_method_entry(
AbstractInterpreter::MethodKind kind) { AbstractInterpreter::MethodKind kind) {
// determine code generation flags // determine code generation flags
bool native = false;
bool synchronized = false; bool synchronized = false;
address entry_point = NULL; address entry_point = NULL;
switch (kind) { switch (kind) {
case Interpreter::zerolocals : break; case Interpreter::zerolocals : break;
case Interpreter::zerolocals_synchronized: synchronized = true; break; case Interpreter::zerolocals_synchronized: synchronized = true; break;
case Interpreter::native : entry_point = generate_native_entry(false); break; case Interpreter::native : native = true; break;
case Interpreter::native_synchronized : entry_point = generate_native_entry(true); break; case Interpreter::native_synchronized : native = true; synchronized = true; break;
case Interpreter::empty : entry_point = generate_empty_entry(); break; case Interpreter::empty : entry_point = generate_empty_entry(); break;
case Interpreter::accessor : entry_point = generate_accessor_entry(); break; case Interpreter::accessor : entry_point = generate_accessor_entry(); break;
case Interpreter::abstract : entry_point = generate_abstract_entry(); break; case Interpreter::abstract : entry_point = generate_abstract_entry(); break;
@ -562,28 +575,32 @@ address InterpreterGenerator::generate_method_entry(
: entry_point = generate_Reference_get_entry(); break; : entry_point = generate_Reference_get_entry(); break;
#ifndef CC_INTERP #ifndef CC_INTERP
case Interpreter::java_util_zip_CRC32_update case Interpreter::java_util_zip_CRC32_update
: entry_point = generate_CRC32_update_entry(); break; : native = true; entry_point = generate_CRC32_update_entry(); break;
case Interpreter::java_util_zip_CRC32_updateBytes case Interpreter::java_util_zip_CRC32_updateBytes
: // fall thru : // fall thru
case Interpreter::java_util_zip_CRC32_updateByteBuffer case Interpreter::java_util_zip_CRC32_updateByteBuffer
: entry_point = generate_CRC32_updateBytes_entry(kind); break; : native = true; entry_point = generate_CRC32_updateBytes_entry(kind); break;
case Interpreter::java_util_zip_CRC32C_updateBytes
: // fall thru
case Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer
: entry_point = generate_CRC32C_updateBytes_entry(kind); break;
#if defined(TARGET_ARCH_x86) && !defined(_LP64) #if defined(TARGET_ARCH_x86) && !defined(_LP64)
// On x86_32 platforms, a special entry is generated for the following four methods. // On x86_32 platforms, a special entry is generated for the following four methods.
// On other platforms the normal entry is used to enter these methods. // On other platforms the normal entry is used to enter these methods.
case Interpreter::java_lang_Float_intBitsToFloat case Interpreter::java_lang_Float_intBitsToFloat
: entry_point = generate_Float_intBitsToFloat_entry(); break; : native = true; entry_point = generate_Float_intBitsToFloat_entry(); break;
case Interpreter::java_lang_Float_floatToRawIntBits case Interpreter::java_lang_Float_floatToRawIntBits
: entry_point = generate_Float_floatToRawIntBits_entry(); break; : native = true; entry_point = generate_Float_floatToRawIntBits_entry(); break;
case Interpreter::java_lang_Double_longBitsToDouble case Interpreter::java_lang_Double_longBitsToDouble
: entry_point = generate_Double_longBitsToDouble_entry(); break; : native = true; entry_point = generate_Double_longBitsToDouble_entry(); break;
case Interpreter::java_lang_Double_doubleToRawLongBits case Interpreter::java_lang_Double_doubleToRawLongBits
: entry_point = generate_Double_doubleToRawLongBits_entry(); break; : native = true; entry_point = generate_Double_doubleToRawLongBits_entry(); break;
#else #else
case Interpreter::java_lang_Float_intBitsToFloat: case Interpreter::java_lang_Float_intBitsToFloat:
case Interpreter::java_lang_Float_floatToRawIntBits: case Interpreter::java_lang_Float_floatToRawIntBits:
case Interpreter::java_lang_Double_longBitsToDouble: case Interpreter::java_lang_Double_longBitsToDouble:
case Interpreter::java_lang_Double_doubleToRawLongBits: case Interpreter::java_lang_Double_doubleToRawLongBits:
entry_point = generate_native_entry(false); native = true;
break; break;
#endif // defined(TARGET_ARCH_x86) && !defined(_LP64) #endif // defined(TARGET_ARCH_x86) && !defined(_LP64)
#endif // CC_INTERP #endif // CC_INTERP
@ -596,5 +613,18 @@ address InterpreterGenerator::generate_method_entry(
return entry_point; return entry_point;
} }
return generate_normal_entry(synchronized); // We expect the normal and native entry points to be generated first so we can reuse them.
if (native) {
entry_point = Interpreter::entry_for_kind(synchronized ? Interpreter::native_synchronized : Interpreter::native);
if (entry_point == NULL) {
entry_point = generate_native_entry(synchronized);
}
} else {
entry_point = Interpreter::entry_for_kind(synchronized ? Interpreter::zerolocals_synchronized : Interpreter::zerolocals);
if (entry_point == NULL) {
entry_point = generate_normal_entry(synchronized);
}
}
return entry_point;
} }

View File

@ -213,31 +213,6 @@ void InterpreterOopMap::iterate_oop(OffsetClosure* oop_closure) const {
} }
} }
#ifdef ENABLE_ZAP_DEAD_LOCALS
void InterpreterOopMap::iterate_all(OffsetClosure* oop_closure, OffsetClosure* value_closure, OffsetClosure* dead_closure) {
int n = number_of_entries();
int word_index = 0;
uintptr_t value = 0;
uintptr_t mask = 0;
// iterate over entries
for (int i = 0; i < n; i++, mask <<= bits_per_entry) {
// get current word
if (mask == 0) {
value = bit_mask()[word_index++];
mask = 1;
}
// test for dead values & oops, and for live values
if ((value & (mask << dead_bit_number)) != 0) dead_closure->offset_do(i); // call this for all dead values or oops
else if ((value & (mask << oop_bit_number)) != 0) oop_closure->offset_do(i); // call this for all live oops
else value_closure->offset_do(i); // call this for all live values
}
}
#endif
void InterpreterOopMap::print() const { void InterpreterOopMap::print() const {
int n = number_of_entries(); int n = number_of_entries();
tty->print("oop map for "); tty->print("oop map for ");
@ -297,12 +272,6 @@ bool OopMapCacheEntry::verify_mask(CellTypeState* vars, CellTypeState* stack, in
bool v2 = vars[i].is_reference() ? true : false; bool v2 = vars[i].is_reference() ? true : false;
assert(v1 == v2, "locals oop mask generation error"); assert(v1 == v2, "locals oop mask generation error");
if (TraceOopMapGeneration && Verbose) tty->print("%d", v1 ? 1 : 0); if (TraceOopMapGeneration && Verbose) tty->print("%d", v1 ? 1 : 0);
#ifdef ENABLE_ZAP_DEAD_LOCALS
bool v3 = is_dead(i) ? true : false;
bool v4 = !vars[i].is_live() ? true : false;
assert(v3 == v4, "locals live mask generation error");
assert(!(v1 && v3), "dead value marked as oop");
#endif
} }
if (TraceOopMapGeneration && Verbose) { tty->cr(); tty->print("Stack (%d): ", stack_top); } if (TraceOopMapGeneration && Verbose) { tty->cr(); tty->print("Stack (%d): ", stack_top); }
@ -311,12 +280,6 @@ bool OopMapCacheEntry::verify_mask(CellTypeState* vars, CellTypeState* stack, in
bool v2 = stack[j].is_reference() ? true : false; bool v2 = stack[j].is_reference() ? true : false;
assert(v1 == v2, "stack oop mask generation error"); assert(v1 == v2, "stack oop mask generation error");
if (TraceOopMapGeneration && Verbose) tty->print("%d", v1 ? 1 : 0); if (TraceOopMapGeneration && Verbose) tty->print("%d", v1 ? 1 : 0);
#ifdef ENABLE_ZAP_DEAD_LOCALS
bool v3 = is_dead(max_locals + j) ? true : false;
bool v4 = !stack[j].is_live() ? true : false;
assert(v3 == v4, "stack live mask generation error");
assert(!(v1 && v3), "dead value marked as oop");
#endif
} }
if (TraceOopMapGeneration && Verbose) tty->cr(); if (TraceOopMapGeneration && Verbose) tty->cr();
return true; return true;

View File

@ -141,9 +141,6 @@ class InterpreterOopMap: ResourceObj {
int expression_stack_size() const { return _expression_stack_size; } int expression_stack_size() const { return _expression_stack_size; }
#ifdef ENABLE_ZAP_DEAD_LOCALS
void iterate_all(OffsetClosure* oop_closure, OffsetClosure* value_closure, OffsetClosure* dead_closure);
#endif
}; };
class OopMapCache : public CHeapObj<mtClass> { class OopMapCache : public CHeapObj<mtClass> {

View File

@ -412,17 +412,6 @@ void TemplateInterpreterGenerator::generate_all() {
method_entry(java_lang_math_pow ) method_entry(java_lang_math_pow )
method_entry(java_lang_ref_reference_get) method_entry(java_lang_ref_reference_get)
if (UseCRC32Intrinsics) {
method_entry(java_util_zip_CRC32_update)
method_entry(java_util_zip_CRC32_updateBytes)
method_entry(java_util_zip_CRC32_updateByteBuffer)
}
method_entry(java_lang_Float_intBitsToFloat);
method_entry(java_lang_Float_floatToRawIntBits);
method_entry(java_lang_Double_longBitsToDouble);
method_entry(java_lang_Double_doubleToRawLongBits);
initialize_method_handle_entries(); initialize_method_handle_entries();
// all native method kinds (must be one contiguous block) // all native method kinds (must be one contiguous block)
@ -431,6 +420,22 @@ void TemplateInterpreterGenerator::generate_all() {
method_entry(native_synchronized) method_entry(native_synchronized)
Interpreter::_native_entry_end = Interpreter::code()->code_end(); Interpreter::_native_entry_end = Interpreter::code()->code_end();
if (UseCRC32Intrinsics) {
method_entry(java_util_zip_CRC32_update)
method_entry(java_util_zip_CRC32_updateBytes)
method_entry(java_util_zip_CRC32_updateByteBuffer)
}
if (UseCRC32CIntrinsics) {
method_entry(java_util_zip_CRC32C_updateBytes)
method_entry(java_util_zip_CRC32C_updateDirectByteBuffer)
}
method_entry(java_lang_Float_intBitsToFloat);
method_entry(java_lang_Float_floatToRawIntBits);
method_entry(java_lang_Double_longBitsToDouble);
method_entry(java_lang_Double_doubleToRawLongBits);
#undef method_entry #undef method_entry
// Bytecodes // Bytecodes

View File

@ -358,6 +358,8 @@ void Block::dump(const PhaseCFG* cfg) const {
PhaseCFG::PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher) PhaseCFG::PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher)
: Phase(CFG) : Phase(CFG)
, _block_arena(arena) , _block_arena(arena)
, _regalloc(NULL)
, _scheduling_for_pressure(false)
, _root(root) , _root(root)
, _matcher(matcher) , _matcher(matcher)
, _node_to_block_mapping(arena) , _node_to_block_mapping(arena)

View File

@ -37,6 +37,7 @@ class MachCallNode;
class Matcher; class Matcher;
class RootNode; class RootNode;
class VectorSet; class VectorSet;
class PhaseChaitin;
struct Tarjan; struct Tarjan;
//------------------------------Block_Array------------------------------------ //------------------------------Block_Array------------------------------------
@ -383,6 +384,12 @@ class PhaseCFG : public Phase {
// Arena for the blocks to be stored in // Arena for the blocks to be stored in
Arena* _block_arena; Arena* _block_arena;
// Info used for scheduling
PhaseChaitin* _regalloc;
// Register pressure heuristic used?
bool _scheduling_for_pressure;
// The matcher for this compilation // The matcher for this compilation
Matcher& _matcher; Matcher& _matcher;
@ -433,12 +440,14 @@ class PhaseCFG : public Phase {
// to late. Helper for schedule_late. // to late. Helper for schedule_late.
Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self); Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self);
bool schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call); bool schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call, intptr_t* recacl_pressure_nodes);
void set_next_call(Block* block, Node* n, VectorSet& next_call); void set_next_call(Block* block, Node* n, VectorSet& next_call);
void needed_for_next_call(Block* block, Node* this_call, VectorSet& next_call); void needed_for_next_call(Block* block, Node* this_call, VectorSet& next_call);
// Perform basic-block local scheduling // Perform basic-block local scheduling
Node* select(Block* block, Node_List& worklist, GrowableArray<int>& ready_cnt, VectorSet& next_call, uint sched_slot); Node* select(Block* block, Node_List& worklist, GrowableArray<int>& ready_cnt, VectorSet& next_call, uint sched_slot,
intptr_t* recacl_pressure_nodes);
void adjust_register_pressure(Node* n, Block* block, intptr_t *recalc_pressure_nodes, bool finalize_mode);
// Schedule a call next in the block // Schedule a call next in the block
uint sched_call(Block* block, uint node_cnt, Node_List& worklist, GrowableArray<int>& ready_cnt, MachCallNode* mcall, VectorSet& next_call); uint sched_call(Block* block, uint node_cnt, Node_List& worklist, GrowableArray<int>& ready_cnt, MachCallNode* mcall, VectorSet& next_call);

View File

@ -114,7 +114,7 @@ bool InlineTree::should_inline(ciMethod* callee_method, ciMethod* caller_method,
CompileTask::print_inline_indent(inline_level()); CompileTask::print_inline_indent(inline_level());
tty->print_cr("Inlined method is hot: "); tty->print_cr("Inlined method is hot: ");
} }
set_msg("force inline by CompilerOracle"); set_msg("force inline by CompileCommand");
_forced_inline = true; _forced_inline = true;
return true; return true;
} }
@ -223,12 +223,12 @@ bool InlineTree::should_not_inline(ciMethod *callee_method,
// ignore heuristic controls on inlining // ignore heuristic controls on inlining
if (callee_method->should_inline()) { if (callee_method->should_inline()) {
set_msg("force inline by CompilerOracle"); set_msg("force inline by CompileCommand");
return false; return false;
} }
if (callee_method->should_not_inline()) { if (callee_method->should_not_inline()) {
set_msg("disallowed by CompilerOracle"); set_msg("disallowed by CompileCommand");
return true; return true;
} }
@ -470,11 +470,6 @@ bool pass_initial_checks(ciMethod* caller_method, int caller_bci, ciMethod* call
} }
} }
} }
// We will attempt to see if a class/field/etc got properly loaded. If it
// did not, it may attempt to throw an exception during our probing. Catch
// and ignore such exceptions and do not attempt to compile the method.
if( callee_method->should_exclude() ) return false;
return true; return true;
} }

View File

@ -69,22 +69,6 @@
develop(bool, StressGCM, false, \ develop(bool, StressGCM, false, \
"Randomize instruction scheduling in GCM") \ "Randomize instruction scheduling in GCM") \
\ \
notproduct(intx, CompileZapFirst, 0, \
"If +ZapDeadCompiledLocals, " \
"skip this many before compiling in zap calls") \
\
notproduct(intx, CompileZapLast, -1, \
"If +ZapDeadCompiledLocals, " \
"compile this many after skipping (incl. skip count, -1 = all)") \
\
notproduct(intx, ZapDeadCompiledLocalsFirst, 0, \
"If +ZapDeadCompiledLocals, " \
"skip this many before really doing it") \
\
notproduct(intx, ZapDeadCompiledLocalsLast, -1, \
"If +ZapDeadCompiledLocals, " \
"do this many after skipping (incl. skip count, -1 = all)") \
\
develop(intx, OptoPrologueNops, 0, \ develop(intx, OptoPrologueNops, 0, \
"Insert this many extra nop instructions " \ "Insert this many extra nop instructions " \
"in the prologue of every nmethod") \ "in the prologue of every nmethod") \
@ -306,6 +290,9 @@
product_pd(bool, OptoScheduling, \ product_pd(bool, OptoScheduling, \
"Instruction Scheduling after register allocation") \ "Instruction Scheduling after register allocation") \
\ \
product_pd(bool, OptoRegScheduling, \
"Instruction Scheduling before register allocation for pressure") \
\
product(bool, PartialPeelLoop, true, \ product(bool, PartialPeelLoop, true, \
"Partial peel (rotate) loops") \ "Partial peel (rotate) loops") \
\ \

View File

@ -907,6 +907,18 @@ public:
// Convenience for initialization->maybe_set_complete(phase) // Convenience for initialization->maybe_set_complete(phase)
bool maybe_set_complete(PhaseGVN* phase); bool maybe_set_complete(PhaseGVN* phase);
// Return true if allocation doesn't escape thread, its escape state
// needs be noEscape or ArgEscape. InitializeNode._does_not_escape
// is true when its allocation's escape state is noEscape or
// ArgEscape. In case allocation's InitializeNode is NULL, check
// AlllocateNode._is_non_escaping flag.
// AlllocateNode._is_non_escaping is true when its escape state is
// noEscape.
bool does_not_escape_thread() {
InitializeNode* init = NULL;
return _is_non_escaping || (((init = initialization()) != NULL) && init->does_not_escape());
}
}; };
//------------------------------AllocateArray--------------------------------- //------------------------------AllocateArray---------------------------------

View File

@ -191,7 +191,7 @@ uint LiveRangeMap::find_const(uint lrg) const {
return next; return next;
} }
PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher) PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher, bool scheduling_info_generated)
: PhaseRegAlloc(unique, cfg, matcher, : PhaseRegAlloc(unique, cfg, matcher,
#ifndef PRODUCT #ifndef PRODUCT
print_chaitin_statistics print_chaitin_statistics
@ -205,6 +205,11 @@ PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher)
, _spilled_twice(Thread::current()->resource_area()) , _spilled_twice(Thread::current()->resource_area())
, _lo_degree(0), _lo_stk_degree(0), _hi_degree(0), _simplified(0) , _lo_degree(0), _lo_stk_degree(0), _hi_degree(0), _simplified(0)
, _oldphi(unique) , _oldphi(unique)
, _scheduling_info_generated(scheduling_info_generated)
, _sched_int_pressure(0, INTPRESSURE)
, _sched_float_pressure(0, FLOATPRESSURE)
, _scratch_int_pressure(0, INTPRESSURE)
, _scratch_float_pressure(0, FLOATPRESSURE)
#ifndef PRODUCT #ifndef PRODUCT
, _trace_spilling(TraceSpilling || C->method_has_option("TraceSpilling")) , _trace_spilling(TraceSpilling || C->method_has_option("TraceSpilling"))
#endif #endif
@ -350,7 +355,7 @@ void PhaseChaitin::Register_Allocate() {
// all copy-related live ranges low and then using the max copy-related // all copy-related live ranges low and then using the max copy-related
// live range as a cut-off for LIVE and the IFG. In other words, I can // live range as a cut-off for LIVE and the IFG. In other words, I can
// build a subset of LIVE and IFG just for copies. // build a subset of LIVE and IFG just for copies.
PhaseLive live(_cfg, _lrg_map.names(), &live_arena); PhaseLive live(_cfg, _lrg_map.names(), &live_arena, false);
// Need IFG for coalescing and coloring // Need IFG for coalescing and coloring
PhaseIFG ifg(&live_arena); PhaseIFG ifg(&live_arena);
@ -690,6 +695,29 @@ void PhaseChaitin::de_ssa() {
_lrg_map.reset_uf_map(lr_counter); _lrg_map.reset_uf_map(lr_counter);
} }
void PhaseChaitin::mark_ssa() {
// Use ssa names to populate the live range maps or if no mask
// is available, use the 0 entry.
uint max_idx = 0;
for ( uint i = 0; i < _cfg.number_of_blocks(); i++ ) {
Block* block = _cfg.get_block(i);
uint cnt = block->number_of_nodes();
// Handle all the normal Nodes in the block
for ( uint j = 0; j < cnt; j++ ) {
Node *n = block->get_node(j);
// Pre-color to the zero live range, or pick virtual register
const RegMask &rm = n->out_RegMask();
_lrg_map.map(n->_idx, rm.is_NotEmpty() ? n->_idx : 0);
max_idx = (n->_idx > max_idx) ? n->_idx : max_idx;
}
}
_lrg_map.set_max_lrg_id(max_idx+1);
// Reset the Union-Find mapping to be identity
_lrg_map.reset_uf_map(max_idx+1);
}
// Gather LiveRanGe information, including register masks. Modification of // Gather LiveRanGe information, including register masks. Modification of
// cisc spillable in_RegMasks should not be done before AggressiveCoalesce. // cisc spillable in_RegMasks should not be done before AggressiveCoalesce.
@ -707,7 +735,9 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
for (uint j = 1; j < block->number_of_nodes(); j++) { for (uint j = 1; j < block->number_of_nodes(); j++) {
Node* n = block->get_node(j); Node* n = block->get_node(j);
uint input_edge_start =1; // Skip control most nodes uint input_edge_start =1; // Skip control most nodes
bool is_machine_node = false;
if (n->is_Mach()) { if (n->is_Mach()) {
is_machine_node = true;
input_edge_start = n->as_Mach()->oper_input_base(); input_edge_start = n->as_Mach()->oper_input_base();
} }
uint idx = n->is_Copy(); uint idx = n->is_Copy();
@ -929,6 +959,7 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
// Convert operand number to edge index number // Convert operand number to edge index number
inp = n->as_Mach()->operand_index(inp); inp = n->as_Mach()->operand_index(inp);
} }
// Prepare register mask for each input // Prepare register mask for each input
for( uint k = input_edge_start; k < cnt; k++ ) { for( uint k = input_edge_start; k < cnt; k++ ) {
uint vreg = _lrg_map.live_range_id(n->in(k)); uint vreg = _lrg_map.live_range_id(n->in(k));
@ -948,6 +979,12 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
n->as_Mach()->use_cisc_RegMask(); n->as_Mach()->use_cisc_RegMask();
} }
if (is_machine_node && _scheduling_info_generated) {
MachNode* cur_node = n->as_Mach();
// this is cleaned up by register allocation
if (k >= cur_node->num_opnds()) continue;
}
LRG &lrg = lrgs(vreg); LRG &lrg = lrgs(vreg);
// // Testing for floating point code shape // // Testing for floating point code shape
// Node *test = n->in(k); // Node *test = n->in(k);
@ -989,7 +1026,7 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
// double can interfere with TWO aligned pairs, or effectively // double can interfere with TWO aligned pairs, or effectively
// FOUR registers! // FOUR registers!
#ifdef ASSERT #ifdef ASSERT
if (is_vect) { if (is_vect && !_scheduling_info_generated) {
if (lrg.num_regs() != 0) { if (lrg.num_regs() != 0) {
assert(lrgmask.is_aligned_sets(lrg.num_regs()), "vector should be aligned"); assert(lrgmask.is_aligned_sets(lrg.num_regs()), "vector should be aligned");
assert(!lrg._fat_proj, "sanity"); assert(!lrg._fat_proj, "sanity");

View File

@ -399,7 +399,6 @@ class PhaseChaitin : public PhaseRegAlloc {
int _trip_cnt; int _trip_cnt;
int _alternate; int _alternate;
LRG &lrgs(uint idx) const { return _ifg->lrgs(idx); }
PhaseLive *_live; // Liveness, used in the interference graph PhaseLive *_live; // Liveness, used in the interference graph
PhaseIFG *_ifg; // Interference graph (for original chunk) PhaseIFG *_ifg; // Interference graph (for original chunk)
Node_List **_lrg_nodes; // Array of node; lists for lrgs which spill Node_List **_lrg_nodes; // Array of node; lists for lrgs which spill
@ -464,16 +463,28 @@ class PhaseChaitin : public PhaseRegAlloc {
#endif #endif
public: public:
PhaseChaitin( uint unique, PhaseCFG &cfg, Matcher &matcher ); PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher, bool track_liveout_pressure);
~PhaseChaitin() {} ~PhaseChaitin() {}
LiveRangeMap _lrg_map; LiveRangeMap _lrg_map;
LRG &lrgs(uint idx) const { return _ifg->lrgs(idx); }
// Do all the real work of allocate // Do all the real work of allocate
void Register_Allocate(); void Register_Allocate();
float high_frequency_lrg() const { return _high_frequency_lrg; } float high_frequency_lrg() const { return _high_frequency_lrg; }
// Used when scheduling info generated, not in general register allocation
bool _scheduling_info_generated;
void set_ifg(PhaseIFG &ifg) { _ifg = &ifg; }
void set_live(PhaseLive &live) { _live = &live; }
PhaseLive* get_live() { return _live; }
// Populate the live range maps with ssa info for scheduling
void mark_ssa();
#ifndef PRODUCT #ifndef PRODUCT
bool trace_spilling() const { return _trace_spilling; } bool trace_spilling() const { return _trace_spilling; }
#endif #endif
@ -516,7 +527,11 @@ private:
uint _final_pressure; uint _final_pressure;
// number of live ranges that constitute high register pressure // number of live ranges that constitute high register pressure
const uint _high_pressure_limit; uint _high_pressure_limit;
// initial pressure observed
uint _start_pressure;
public: public:
// lower the register pressure and look for a low to high pressure // lower the register pressure and look for a low to high pressure
@ -537,6 +552,14 @@ private:
} }
} }
void init(int limit) {
_current_pressure = 0;
_high_pressure_index = 0;
_final_pressure = 0;
_high_pressure_limit = limit;
_start_pressure = 0;
}
uint high_pressure_index() const { uint high_pressure_index() const {
return _high_pressure_index; return _high_pressure_index;
} }
@ -545,6 +568,10 @@ private:
return _final_pressure; return _final_pressure;
} }
uint start_pressure() const {
return _start_pressure;
}
uint current_pressure() const { uint current_pressure() const {
return _current_pressure; return _current_pressure;
} }
@ -561,6 +588,15 @@ private:
_high_pressure_index = 0; _high_pressure_index = 0;
} }
void set_start_pressure(int value) {
_start_pressure = value;
_final_pressure = value;
}
void set_current_pressure(int value) {
_current_pressure = value;
}
void check_pressure_at_fatproj(uint fatproj_location, RegMask& fatproj_mask) { void check_pressure_at_fatproj(uint fatproj_location, RegMask& fatproj_mask) {
// this pressure is only valid at this instruction, i.e. we don't need to lower // this pressure is only valid at this instruction, i.e. we don't need to lower
// the register pressure since the fat proj was never live before (going backwards) // the register pressure since the fat proj was never live before (going backwards)
@ -579,12 +615,11 @@ private:
Pressure(uint high_pressure_index, uint high_pressure_limit) Pressure(uint high_pressure_index, uint high_pressure_limit)
: _current_pressure(0) : _current_pressure(0)
, _high_pressure_index(high_pressure_index) , _high_pressure_index(high_pressure_index)
, _final_pressure(0)
, _high_pressure_limit(high_pressure_limit) , _high_pressure_limit(high_pressure_limit)
, _final_pressure(0) {} , _start_pressure(0) {}
}; };
void lower_pressure(Block* b, uint location, LRG& lrg, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure);
void raise_pressure(Block* b, LRG& lrg, Pressure& int_pressure, Pressure& float_pressure);
void check_for_high_pressure_transition_at_fatproj(uint& block_reg_pressure, uint location, LRG& lrg, Pressure& pressure, const int op_regtype); void check_for_high_pressure_transition_at_fatproj(uint& block_reg_pressure, uint location, LRG& lrg, Pressure& pressure, const int op_regtype);
void add_input_to_liveout(Block* b, Node* n, IndexSet* liveout, double cost, Pressure& int_pressure, Pressure& float_pressure); void add_input_to_liveout(Block* b, Node* n, IndexSet* liveout, double cost, Pressure& int_pressure, Pressure& float_pressure);
void compute_initial_block_pressure(Block* b, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure, double cost); void compute_initial_block_pressure(Block* b, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure, double cost);
@ -600,10 +635,25 @@ private:
// acceptable register sets do not overlap, then they do not interfere. // acceptable register sets do not overlap, then they do not interfere.
uint build_ifg_physical( ResourceArea *a ); uint build_ifg_physical( ResourceArea *a );
public:
// Gather LiveRanGe information, including register masks and base pointer/ // Gather LiveRanGe information, including register masks and base pointer/
// derived pointer relationships. // derived pointer relationships.
void gather_lrg_masks( bool mod_cisc_masks ); void gather_lrg_masks( bool mod_cisc_masks );
// user visible pressure variables for scheduling
Pressure _sched_int_pressure;
Pressure _sched_float_pressure;
Pressure _scratch_int_pressure;
Pressure _scratch_float_pressure;
// Pressure functions for user context
void lower_pressure(Block* b, uint location, LRG& lrg, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure);
void raise_pressure(Block* b, LRG& lrg, Pressure& int_pressure, Pressure& float_pressure);
void compute_entry_block_pressure(Block* b);
void compute_exit_block_pressure(Block* b);
void print_pressure_info(Pressure& pressure, const char *str);
private:
// Force the bases of derived pointers to be alive at GC points. // Force the bases of derived pointers to be alive at GC points.
bool stretch_base_pointer_live_ranges( ResourceArea *a ); bool stretch_base_pointer_live_ranges( ResourceArea *a );
// Helper to stretch above; recursively discover the base Node for // Helper to stretch above; recursively discover the base Node for

View File

@ -131,7 +131,6 @@ macro(DivModL)
macro(EncodeISOArray) macro(EncodeISOArray)
macro(EncodeP) macro(EncodeP)
macro(EncodePKlass) macro(EncodePKlass)
macro(ExpD)
macro(FastLock) macro(FastLock)
macro(FastUnlock) macro(FastUnlock)
macro(Goto) macro(Goto)
@ -290,6 +289,10 @@ macro(MulVD)
macro(MulReductionVD) macro(MulReductionVD)
macro(DivVF) macro(DivVF)
macro(DivVD) macro(DivVD)
macro(AbsVF)
macro(AbsVD)
macro(NegVF)
macro(NegVD)
macro(SqrtVD) macro(SqrtVD)
macro(LShiftCntV) macro(LShiftCntV)
macro(RShiftCntV) macro(RShiftCntV)

View File

@ -2336,7 +2336,7 @@ void Compile::Code_Gen() {
debug_only( cfg.verify(); ) debug_only( cfg.verify(); )
} }
PhaseChaitin regalloc(unique(), cfg, matcher); PhaseChaitin regalloc(unique(), cfg, matcher, false);
_regalloc = &regalloc; _regalloc = &regalloc;
{ {
TracePhase tp("regalloc", &timers[_t_registerAllocation]); TracePhase tp("regalloc", &timers[_t_registerAllocation]);

View File

@ -1208,12 +1208,6 @@ class Compile : public Phase {
// Compute the name of old_SP. See <arch>.ad for frame layout. // Compute the name of old_SP. See <arch>.ad for frame layout.
OptoReg::Name compute_old_SP(); OptoReg::Name compute_old_SP();
#ifdef ENABLE_ZAP_DEAD_LOCALS
static bool is_node_getting_a_safepoint(Node*);
void Insert_zap_nodes();
Node* call_zap_node(MachSafePointNode* n, int block_no);
#endif
private: private:
// Phase control: // Phase control:
void Init(int aliaslevel); // Prepare for a single compilation void Init(int aliaslevel); // Prepare for a single compilation

View File

@ -34,6 +34,7 @@
#include "opto/phaseX.hpp" #include "opto/phaseX.hpp"
#include "opto/rootnode.hpp" #include "opto/rootnode.hpp"
#include "opto/runtime.hpp" #include "opto/runtime.hpp"
#include "opto/chaitin.hpp"
#include "runtime/deoptimization.hpp" #include "runtime/deoptimization.hpp"
// Portions of code courtesy of Clifford Click // Portions of code courtesy of Clifford Click
@ -1363,6 +1364,44 @@ void PhaseCFG::global_code_motion() {
} }
} }
bool block_size_threshold_ok = false;
intptr_t *recalc_pressure_nodes = NULL;
if (OptoRegScheduling) {
for (uint i = 0; i < number_of_blocks(); i++) {
Block* block = get_block(i);
if (block->number_of_nodes() > 10) {
block_size_threshold_ok = true;
break;
}
}
}
// Enabling the scheduler for register pressure plus finding blocks of size to schedule for it
// is key to enabling this feature.
PhaseChaitin regalloc(C->unique(), *this, _matcher, true);
ResourceArea live_arena; // Arena for liveness
ResourceMark rm_live(&live_arena);
PhaseLive live(*this, regalloc._lrg_map.names(), &live_arena, true);
PhaseIFG ifg(&live_arena);
if (OptoRegScheduling && block_size_threshold_ok) {
regalloc.mark_ssa();
Compile::TracePhase tp("computeLive", &timers[_t_computeLive]);
rm_live.reset_to_mark(); // Reclaim working storage
IndexSet::reset_memory(C, &live_arena);
uint node_size = regalloc._lrg_map.max_lrg_id();
ifg.init(node_size); // Empty IFG
regalloc.set_ifg(ifg);
regalloc.set_live(live);
regalloc.gather_lrg_masks(false); // Collect LRG masks
live.compute(node_size); // Compute liveness
recalc_pressure_nodes = NEW_RESOURCE_ARRAY(intptr_t, node_size);
for (uint i = 0; i < node_size; i++) {
recalc_pressure_nodes[i] = 0;
}
}
_regalloc = &regalloc;
#ifndef PRODUCT #ifndef PRODUCT
if (trace_opto_pipelining()) { if (trace_opto_pipelining()) {
tty->print("\n---- Start Local Scheduling ----\n"); tty->print("\n---- Start Local Scheduling ----\n");
@ -1375,13 +1414,15 @@ void PhaseCFG::global_code_motion() {
visited.Clear(); visited.Clear();
for (uint i = 0; i < number_of_blocks(); i++) { for (uint i = 0; i < number_of_blocks(); i++) {
Block* block = get_block(i); Block* block = get_block(i);
if (!schedule_local(block, ready_cnt, visited)) { if (!schedule_local(block, ready_cnt, visited, recalc_pressure_nodes)) {
if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) { if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
C->record_method_not_compilable("local schedule failed"); C->record_method_not_compilable("local schedule failed");
} }
_regalloc = NULL;
return; return;
} }
} }
_regalloc = NULL;
// If we inserted any instructions between a Call and his CatchNode, // If we inserted any instructions between a Call and his CatchNode,
// clone the instructions on all paths below the Catch. // clone the instructions on all paths below the Catch.

View File

@ -439,8 +439,10 @@ void PhaseChaitin::lower_pressure(Block* b, uint location, LRG& lrg, IndexSet* l
} }
} }
} }
if (_scheduling_info_generated == false) {
assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect"); assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect"); assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
}
} }
/* Go to the first non-phi index in a block */ /* Go to the first non-phi index in a block */
@ -517,6 +519,58 @@ void PhaseChaitin::compute_initial_block_pressure(Block* b, IndexSet* liveout, P
assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect"); assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
} }
/*
* Computes the entry register pressure of a block, looking at all live
* ranges in the livein. The register pressure is computed for both float
* and int/pointer registers.
*/
void PhaseChaitin::compute_entry_block_pressure(Block* b) {
IndexSet* livein = _live->livein(b);
IndexSetIterator elements(livein);
uint lid = elements.next();
while (lid != 0) {
LRG& lrg = lrgs(lid);
raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure);
lid = elements.next();
}
// Now check phis for locally defined inputs
for (uint j = 0; j < b->number_of_nodes(); j++) {
Node* n = b->get_node(j);
if (n->is_Phi()) {
for (uint k = 1; k < n->req(); k++) {
Node* phi_in = n->in(k);
// Because we are talking about phis, raise register pressure once for each
// instance of a phi to account for a single value
if (_cfg.get_block_for_node(phi_in) == b) {
LRG& lrg = lrgs(phi_in->_idx);
raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure);
break;
}
}
}
}
_sched_int_pressure.set_start_pressure(_sched_int_pressure.current_pressure());
_sched_float_pressure.set_start_pressure(_sched_float_pressure.current_pressure());
}
/*
* Computes the exit register pressure of a block, looking at all live
* ranges in the liveout. The register pressure is computed for both float
* and int/pointer registers.
*/
void PhaseChaitin::compute_exit_block_pressure(Block* b) {
IndexSet* livein = _live->live(b);
IndexSetIterator elements(livein);
_sched_int_pressure.set_current_pressure(0);
_sched_float_pressure.set_current_pressure(0);
uint lid = elements.next();
while (lid != 0) {
LRG& lrg = lrgs(lid);
raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure);
lid = elements.next();
}
}
/* /*
* Remove dead node if it's not used. * Remove dead node if it's not used.
* We only remove projection nodes if the node "defining" the projection is * We only remove projection nodes if the node "defining" the projection is
@ -737,6 +791,16 @@ void PhaseChaitin::adjust_high_pressure_index(Block* b, uint& block_hrp_index, P
block_hrp_index = i; block_hrp_index = i;
} }
void PhaseChaitin::print_pressure_info(Pressure& pressure, const char *str) {
if (str != NULL) {
tty->print_cr("# *** %s ***", str);
}
tty->print_cr("# start pressure is = %d", pressure.start_pressure());
tty->print_cr("# max pressure is = %d", pressure.final_pressure());
tty->print_cr("# end pressure is = %d", pressure.current_pressure());
tty->print_cr("#");
}
/* Build an interference graph: /* Build an interference graph:
* That is, if 2 live ranges are simultaneously alive but in their acceptable * That is, if 2 live ranges are simultaneously alive but in their acceptable
* register sets do not overlap, then they do not interfere. The IFG is built * register sets do not overlap, then they do not interfere. The IFG is built

View File

@ -31,6 +31,7 @@
#include "opto/cfgnode.hpp" #include "opto/cfgnode.hpp"
#include "opto/machnode.hpp" #include "opto/machnode.hpp"
#include "opto/runtime.hpp" #include "opto/runtime.hpp"
#include "opto/chaitin.hpp"
#include "runtime/sharedRuntime.hpp" #include "runtime/sharedRuntime.hpp"
// Optimization - Graph Style // Optimization - Graph Style
@ -443,7 +444,13 @@ void PhaseCFG::implicit_null_check(Block* block, Node *proj, Node *val, int allo
// remaining cases (most), choose the instruction with the greatest latency // remaining cases (most), choose the instruction with the greatest latency
// (that is, the most number of pseudo-cycles required to the end of the // (that is, the most number of pseudo-cycles required to the end of the
// routine). If there is a tie, choose the instruction with the most inputs. // routine). If there is a tie, choose the instruction with the most inputs.
Node* PhaseCFG::select(Block* block, Node_List &worklist, GrowableArray<int> &ready_cnt, VectorSet &next_call, uint sched_slot) { Node* PhaseCFG::select(
Block* block,
Node_List &worklist,
GrowableArray<int> &ready_cnt,
VectorSet &next_call,
uint sched_slot,
intptr_t* recalc_pressure_nodes) {
// If only a single entry on the stack, use it // If only a single entry on the stack, use it
uint cnt = worklist.size(); uint cnt = worklist.size();
@ -458,6 +465,7 @@ Node* PhaseCFG::select(Block* block, Node_List &worklist, GrowableArray<int> &re
uint score = 0; // Bigger is better uint score = 0; // Bigger is better
int idx = -1; // Index in worklist int idx = -1; // Index in worklist
int cand_cnt = 0; // Candidate count int cand_cnt = 0; // Candidate count
bool block_size_threshold_ok = (block->number_of_nodes() > 10) ? true : false;
for( uint i=0; i<cnt; i++ ) { // Inspect entire worklist for( uint i=0; i<cnt; i++ ) { // Inspect entire worklist
// Order in worklist is used to break ties. // Order in worklist is used to break ties.
@ -539,6 +547,46 @@ Node* PhaseCFG::select(Block* block, Node_List &worklist, GrowableArray<int> &re
uint n_latency = get_latency_for_node(n); uint n_latency = get_latency_for_node(n);
uint n_score = n->req(); // Many inputs get high score to break ties uint n_score = n->req(); // Many inputs get high score to break ties
if (OptoRegScheduling && block_size_threshold_ok) {
if (recalc_pressure_nodes[n->_idx] == 0x7fff7fff) {
_regalloc->_scratch_int_pressure.init(_regalloc->_sched_int_pressure.high_pressure_limit());
_regalloc->_scratch_float_pressure.init(_regalloc->_sched_float_pressure.high_pressure_limit());
// simulate the notion that we just picked this node to schedule
n->add_flag(Node::Flag_is_scheduled);
// now caculate its effect upon the graph if we did
adjust_register_pressure(n, block, recalc_pressure_nodes, false);
// return its state for finalize in case somebody else wins
n->remove_flag(Node::Flag_is_scheduled);
// now save the two final pressure components of register pressure, limiting pressure calcs to short size
short int_pressure = (short)_regalloc->_scratch_int_pressure.current_pressure();
short float_pressure = (short)_regalloc->_scratch_float_pressure.current_pressure();
recalc_pressure_nodes[n->_idx] = int_pressure;
recalc_pressure_nodes[n->_idx] |= (float_pressure << 16);
}
if (_scheduling_for_pressure) {
latency = n_latency;
if (n_choice != 3) {
// Now evaluate each register pressure component based on threshold in the score.
// In general the defining register type will dominate the score, ergo we will not see register pressure grow on both banks
// on a single instruction, but we might see it shrink on both banks.
// For each use of register that has a register class that is over the high pressure limit, we build n_score up for
// live ranges that terminate on this instruction.
if (_regalloc->_sched_int_pressure.current_pressure() > _regalloc->_sched_int_pressure.high_pressure_limit()) {
short int_pressure = (short)recalc_pressure_nodes[n->_idx];
n_score = (int_pressure < 0) ? ((score + n_score) - int_pressure) : (int_pressure > 0) ? 1 : n_score;
}
if (_regalloc->_sched_float_pressure.current_pressure() > _regalloc->_sched_float_pressure.high_pressure_limit()) {
short float_pressure = (short)(recalc_pressure_nodes[n->_idx] >> 16);
n_score = (float_pressure < 0) ? ((score + n_score) - float_pressure) : (float_pressure > 0) ? 1 : n_score;
}
} else {
// make sure we choose these candidates
score = 0;
}
}
}
// Keep best latency found // Keep best latency found
cand_cnt++; cand_cnt++;
if (choice < n_choice || if (choice < n_choice ||
@ -562,6 +610,100 @@ Node* PhaseCFG::select(Block* block, Node_List &worklist, GrowableArray<int> &re
return n; return n;
} }
//-------------------------adjust_register_pressure----------------------------
void PhaseCFG::adjust_register_pressure(Node* n, Block* block, intptr_t* recalc_pressure_nodes, bool finalize_mode) {
PhaseLive* liveinfo = _regalloc->get_live();
IndexSet* liveout = liveinfo->live(block);
// first adjust the register pressure for the sources
for (uint i = 1; i < n->req(); i++) {
bool lrg_ends = false;
Node *src_n = n->in(i);
if (src_n == NULL) continue;
if (!src_n->is_Mach()) continue;
uint src = _regalloc->_lrg_map.find(src_n);
if (src == 0) continue;
LRG& lrg_src = _regalloc->lrgs(src);
// detect if the live range ends or not
if (liveout->member(src) == false) {
lrg_ends = true;
for (DUIterator_Fast jmax, j = src_n->fast_outs(jmax); j < jmax; j++) {
Node* m = src_n->fast_out(j); // Get user
if (m == n) continue;
if (!m->is_Mach()) continue;
MachNode *mach = m->as_Mach();
bool src_matches = false;
int iop = mach->ideal_Opcode();
switch (iop) {
case Op_StoreB:
case Op_StoreC:
case Op_StoreCM:
case Op_StoreD:
case Op_StoreF:
case Op_StoreI:
case Op_StoreL:
case Op_StoreP:
case Op_StoreN:
case Op_StoreVector:
case Op_StoreNKlass:
for (uint k = 1; k < m->req(); k++) {
Node *in = m->in(k);
if (in == src_n) {
src_matches = true;
break;
}
}
break;
default:
src_matches = true;
break;
}
// If we have a store as our use, ignore the non source operands
if (src_matches == false) continue;
// Mark every unscheduled use which is not n with a recalculation
if ((get_block_for_node(m) == block) && (!m->is_scheduled())) {
if (finalize_mode && !m->is_Phi()) {
recalc_pressure_nodes[m->_idx] = 0x7fff7fff;
}
lrg_ends = false;
}
}
}
// if none, this live range ends and we can adjust register pressure
if (lrg_ends) {
if (finalize_mode) {
_regalloc->lower_pressure(block, 0, lrg_src, NULL, _regalloc->_sched_int_pressure, _regalloc->_sched_float_pressure);
} else {
_regalloc->lower_pressure(block, 0, lrg_src, NULL, _regalloc->_scratch_int_pressure, _regalloc->_scratch_float_pressure);
}
}
}
// now add the register pressure from the dest and evaluate which heuristic we should use:
// 1.) The default, latency scheduling
// 2.) Register pressure scheduling based on the high pressure limit threshold for int or float register stacks
uint dst = _regalloc->_lrg_map.find(n);
if (dst != 0) {
LRG& lrg_dst = _regalloc->lrgs(dst);
if (finalize_mode) {
_regalloc->raise_pressure(block, lrg_dst, _regalloc->_sched_int_pressure, _regalloc->_sched_float_pressure);
// check to see if we fall over the register pressure cliff here
if (_regalloc->_sched_int_pressure.current_pressure() > _regalloc->_sched_int_pressure.high_pressure_limit()) {
_scheduling_for_pressure = true;
} else if (_regalloc->_sched_float_pressure.current_pressure() > _regalloc->_sched_float_pressure.high_pressure_limit()) {
_scheduling_for_pressure = true;
} else {
// restore latency scheduling mode
_scheduling_for_pressure = false;
}
} else {
_regalloc->raise_pressure(block, lrg_dst, _regalloc->_scratch_int_pressure, _regalloc->_scratch_float_pressure);
}
}
}
//------------------------------set_next_call---------------------------------- //------------------------------set_next_call----------------------------------
void PhaseCFG::set_next_call(Block* block, Node* n, VectorSet& next_call) { void PhaseCFG::set_next_call(Block* block, Node* n, VectorSet& next_call) {
@ -644,7 +786,7 @@ uint PhaseCFG::sched_call(Block* block, uint node_cnt, Node_List& worklist, Grow
continue; continue;
} }
if( m->is_Phi() ) continue; if( m->is_Phi() ) continue;
int m_cnt = ready_cnt.at(m->_idx)-1; int m_cnt = ready_cnt.at(m->_idx) - 1;
ready_cnt.at_put(m->_idx, m_cnt); ready_cnt.at_put(m->_idx, m_cnt);
if( m_cnt == 0 ) if( m_cnt == 0 )
worklist.push(m); worklist.push(m);
@ -711,7 +853,7 @@ uint PhaseCFG::sched_call(Block* block, uint node_cnt, Node_List& worklist, Grow
//------------------------------schedule_local--------------------------------- //------------------------------schedule_local---------------------------------
// Topological sort within a block. Someday become a real scheduler. // Topological sort within a block. Someday become a real scheduler.
bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call) { bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, VectorSet& next_call, intptr_t *recalc_pressure_nodes) {
// Already "sorted" are the block start Node (as the first entry), and // Already "sorted" are the block start Node (as the first entry), and
// the block-ending Node and any trailing control projections. We leave // the block-ending Node and any trailing control projections. We leave
// these alone. PhiNodes and ParmNodes are made to follow the block start // these alone. PhiNodes and ParmNodes are made to follow the block start
@ -733,10 +875,24 @@ bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, Vecto
return true; return true;
} }
bool block_size_threshold_ok = (block->number_of_nodes() > 10) ? true : false;
// We track the uses of local definitions as input dependences so that
// we know when a given instruction is avialable to be scheduled.
uint i;
if (OptoRegScheduling && block_size_threshold_ok) {
for (i = 1; i < block->number_of_nodes(); i++) { // setup nodes for pressure calc
Node *n = block->get_node(i);
n->remove_flag(Node::Flag_is_scheduled);
if (!n->is_Phi()) {
recalc_pressure_nodes[n->_idx] = 0x7fff7fff;
}
}
}
// Move PhiNodes and ParmNodes from 1 to cnt up to the start // Move PhiNodes and ParmNodes from 1 to cnt up to the start
uint node_cnt = block->end_idx(); uint node_cnt = block->end_idx();
uint phi_cnt = 1; uint phi_cnt = 1;
uint i;
for( i = 1; i<node_cnt; i++ ) { // Scan for Phi for( i = 1; i<node_cnt; i++ ) { // Scan for Phi
Node *n = block->get_node(i); Node *n = block->get_node(i);
if( n->is_Phi() || // Found a PhiNode or ParmNode if( n->is_Phi() || // Found a PhiNode or ParmNode
@ -744,6 +900,10 @@ bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, Vecto
// Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt
block->map_node(block->get_node(phi_cnt), i); block->map_node(block->get_node(phi_cnt), i);
block->map_node(n, phi_cnt++); // swap Phi/Parm up front block->map_node(n, phi_cnt++); // swap Phi/Parm up front
if (OptoRegScheduling && block_size_threshold_ok) {
// mark n as scheduled
n->add_flag(Node::Flag_is_scheduled);
}
} else { // All others } else { // All others
// Count block-local inputs to 'n' // Count block-local inputs to 'n'
uint cnt = n->len(); // Input count uint cnt = n->len(); // Input count
@ -791,12 +951,18 @@ bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, Vecto
// All the prescheduled guys do not hold back internal nodes // All the prescheduled guys do not hold back internal nodes
uint i3; uint i3;
for(i3 = 0; i3<phi_cnt; i3++ ) { // For all pre-scheduled for (i3 = 0; i3 < phi_cnt; i3++) { // For all pre-scheduled
Node *n = block->get_node(i3); // Get pre-scheduled Node *n = block->get_node(i3); // Get pre-scheduled
for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) { for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
Node* m = n->fast_out(j); Node* m = n->fast_out(j);
if (get_block_for_node(m) == block) { // Local-block user if (get_block_for_node(m) == block) { // Local-block user
int m_cnt = ready_cnt.at(m->_idx)-1; int m_cnt = ready_cnt.at(m->_idx)-1;
if (OptoRegScheduling && block_size_threshold_ok) {
// mark m as scheduled
if (m_cnt < 0) {
m->add_flag(Node::Flag_is_scheduled);
}
}
ready_cnt.at_put(m->_idx, m_cnt); // Fix ready count ready_cnt.at_put(m->_idx, m_cnt); // Fix ready count
} }
} }
@ -827,6 +993,18 @@ bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, Vecto
worklist.push(d); worklist.push(d);
} }
if (OptoRegScheduling && block_size_threshold_ok) {
// To stage register pressure calculations we need to examine the live set variables
// breaking them up by register class to compartmentalize the calculations.
uint float_pressure = Matcher::float_pressure(FLOATPRESSURE);
_regalloc->_sched_int_pressure.init(INTPRESSURE);
_regalloc->_sched_float_pressure.init(float_pressure);
_regalloc->_scratch_int_pressure.init(INTPRESSURE);
_regalloc->_scratch_float_pressure.init(float_pressure);
_regalloc->compute_entry_block_pressure(block);
}
// Warm up the 'next_call' heuristic bits // Warm up the 'next_call' heuristic bits
needed_for_next_call(block, block->head(), next_call); needed_for_next_call(block, block->head(), next_call);
@ -858,9 +1036,18 @@ bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, Vecto
#endif #endif
// Select and pop a ready guy from worklist // Select and pop a ready guy from worklist
Node* n = select(block, worklist, ready_cnt, next_call, phi_cnt); Node* n = select(block, worklist, ready_cnt, next_call, phi_cnt, recalc_pressure_nodes);
block->map_node(n, phi_cnt++); // Schedule him next block->map_node(n, phi_cnt++); // Schedule him next
if (OptoRegScheduling && block_size_threshold_ok) {
n->add_flag(Node::Flag_is_scheduled);
// Now adjust the resister pressure with the node we selected
if (!n->is_Phi()) {
adjust_register_pressure(n, block, recalc_pressure_nodes, true);
}
}
#ifndef PRODUCT #ifndef PRODUCT
if (trace_opto_pipelining()) { if (trace_opto_pipelining()) {
tty->print("# select %d: %s", n->_idx, n->Name()); tty->print("# select %d: %s", n->_idx, n->Name());
@ -906,7 +1093,7 @@ bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, Vecto
assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types"); assert(m->is_MachProj() && n->is_Mach() && n->as_Mach()->has_call(), "unexpected node types");
continue; continue;
} }
int m_cnt = ready_cnt.at(m->_idx)-1; int m_cnt = ready_cnt.at(m->_idx) - 1;
ready_cnt.at_put(m->_idx, m_cnt); ready_cnt.at_put(m->_idx, m_cnt);
if( m_cnt == 0 ) if( m_cnt == 0 )
worklist.push(m); worklist.push(m);
@ -925,6 +1112,12 @@ bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, Vecto
return false; return false;
} }
if (OptoRegScheduling && block_size_threshold_ok) {
_regalloc->compute_exit_block_pressure(block);
block->_reg_pressure = _regalloc->_sched_int_pressure.final_pressure();
block->_freg_pressure = _regalloc->_sched_float_pressure.final_pressure();
}
#ifndef PRODUCT #ifndef PRODUCT
if (trace_opto_pipelining()) { if (trace_opto_pipelining()) {
tty->print_cr("#"); tty->print_cr("#");
@ -933,11 +1126,17 @@ bool PhaseCFG::schedule_local(Block* block, GrowableArray<int>& ready_cnt, Vecto
tty->print("# "); tty->print("# ");
block->get_node(i)->fast_dump(); block->get_node(i)->fast_dump();
} }
tty->print_cr("# ");
if (OptoRegScheduling && block_size_threshold_ok) {
tty->print_cr("# pressure info : %d", block->_pre_order);
_regalloc->print_pressure_info(_regalloc->_sched_int_pressure, "int register info");
_regalloc->print_pressure_info(_regalloc->_sched_float_pressure, "float register info");
}
tty->cr(); tty->cr();
} }
#endif #endif
return true; return true;
} }

View File

@ -222,7 +222,6 @@ class LibraryCallKit : public GraphKit {
bool inline_math_negateExactL(); bool inline_math_negateExactL();
bool inline_math_subtractExactI(bool is_decrement); bool inline_math_subtractExactI(bool is_decrement);
bool inline_math_subtractExactL(bool is_decrement); bool inline_math_subtractExactL(bool is_decrement);
bool inline_exp();
bool inline_pow(); bool inline_pow();
Node* finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName); Node* finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName);
bool inline_min_max(vmIntrinsics::ID id); bool inline_min_max(vmIntrinsics::ID id);
@ -1535,20 +1534,6 @@ Node* LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeF
} }
} }
//------------------------------inline_exp-------------------------------------
// Inline exp instructions, if possible. The Intel hardware only misses
// really odd corner cases (+/- Infinity). Just uncommon-trap them.
bool LibraryCallKit::inline_exp() {
Node* arg = round_double_node(argument(0));
Node* n = _gvn.transform(new ExpDNode(C, control(), arg));
n = finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
set_result(n);
C->set_has_split_ifs(true); // Has chance for split-if optimization
return true;
}
//------------------------------inline_pow------------------------------------- //------------------------------inline_pow-------------------------------------
// Inline power instructions, if possible. // Inline power instructions, if possible.
bool LibraryCallKit::inline_pow() { bool LibraryCallKit::inline_pow() {
@ -1776,7 +1761,8 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
case vmIntrinsics::_dsqrt: return Matcher::match_rule_supported(Op_SqrtD) ? inline_math(id) : false; case vmIntrinsics::_dsqrt: return Matcher::match_rule_supported(Op_SqrtD) ? inline_math(id) : false;
case vmIntrinsics::_dabs: return Matcher::has_match_rule(Op_AbsD) ? inline_math(id) : false; case vmIntrinsics::_dabs: return Matcher::has_match_rule(Op_AbsD) ? inline_math(id) : false;
case vmIntrinsics::_dexp: return Matcher::has_match_rule(Op_ExpD) ? inline_exp() : case vmIntrinsics::_dexp:
return (UseSSE >= 2) ? runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dexp(), "dexp") :
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dexp), "EXP"); runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dexp), "EXP");
case vmIntrinsics::_dpow: return Matcher::has_match_rule(Op_PowD) ? inline_pow() : case vmIntrinsics::_dpow: return Matcher::has_match_rule(Op_PowD) ? inline_pow() :
runtime_math(OptoRuntime::Math_DD_D_Type(), FN_PTR(SharedRuntime::dpow), "POW"); runtime_math(OptoRuntime::Math_DD_D_Type(), FN_PTR(SharedRuntime::dpow), "POW");

View File

@ -41,7 +41,14 @@
// block is put on the worklist. // block is put on the worklist.
// The locally live-in stuff is computed once and added to predecessor // The locally live-in stuff is computed once and added to predecessor
// live-out sets. This separate compilation is done in the outer loop below. // live-out sets. This separate compilation is done in the outer loop below.
PhaseLive::PhaseLive( const PhaseCFG &cfg, const LRG_List &names, Arena *arena ) : Phase(LIVE), _cfg(cfg), _names(names), _arena(arena), _live(0) { PhaseLive::PhaseLive(const PhaseCFG &cfg, const LRG_List &names, Arena *arena, bool keep_deltas)
: Phase(LIVE),
_cfg(cfg),
_names(names),
_arena(arena),
_live(0),
_livein(0),
_keep_deltas(keep_deltas) {
} }
void PhaseLive::compute(uint maxlrg) { void PhaseLive::compute(uint maxlrg) {
@ -56,6 +63,13 @@ void PhaseLive::compute(uint maxlrg) {
_live[i].initialize(_maxlrg); _live[i].initialize(_maxlrg);
} }
if (_keep_deltas) {
_livein = (IndexSet*)_arena->Amalloc(sizeof(IndexSet) * _cfg.number_of_blocks());
for (i = 0; i < _cfg.number_of_blocks(); i++) {
_livein[i].initialize(_maxlrg);
}
}
// Init the sparse arrays for delta-sets. // Init the sparse arrays for delta-sets.
ResourceMark rm; // Nuke temp storage on exit ResourceMark rm; // Nuke temp storage on exit
@ -124,7 +138,10 @@ void PhaseLive::compute(uint maxlrg) {
// PhiNode uses go in the live-out set of prior blocks. // PhiNode uses go in the live-out set of prior blocks.
for (uint k = i; k > 0; k--) { for (uint k = i; k > 0; k--) {
add_liveout(p, _names.at(block->get_node(k-1)->in(l)->_idx), first_pass); Node *phi = block->get_node(k - 1);
if (l < phi->req()) {
add_liveout(p, _names.at(phi->in(l)->_idx), first_pass);
}
} }
} }
freeset(block); freeset(block);
@ -200,8 +217,11 @@ IndexSet *PhaseLive::getfreeset( ) {
} }
// Free an IndexSet from a block. // Free an IndexSet from a block.
void PhaseLive::freeset( const Block *p ) { void PhaseLive::freeset( Block *p ) {
IndexSet *f = _deltas[p->_pre_order-1]; IndexSet *f = _deltas[p->_pre_order-1];
if ( _keep_deltas ) {
add_livein(p, f);
}
f->set_next(_free_IndexSet); f->set_next(_free_IndexSet);
_free_IndexSet = f; // Drop onto free list _free_IndexSet = f; // Drop onto free list
_deltas[p->_pre_order-1] = NULL; _deltas[p->_pre_order-1] = NULL;
@ -249,10 +269,23 @@ void PhaseLive::add_liveout( Block *p, IndexSet *lo, VectorSet &first_pass ) {
} }
} }
// Add a vector of live-in values to a given blocks live-in set.
void PhaseLive::add_livein(Block *p, IndexSet *lo) {
IndexSet *livein = &_livein[p->_pre_order-1];
IndexSetIterator elements(lo);
uint r;
while ((r = elements.next()) != 0) {
livein->insert(r); // Then add to live-in set
}
}
#ifndef PRODUCT #ifndef PRODUCT
// Dump the live-out set for a block // Dump the live-out set for a block
void PhaseLive::dump( const Block *b ) const { void PhaseLive::dump( const Block *b ) const {
tty->print("Block %d: ",b->_pre_order); tty->print("Block %d: ",b->_pre_order);
if ( _keep_deltas ) {
tty->print("LiveIn: "); _livein[b->_pre_order-1].dump();
}
tty->print("LiveOut: "); _live[b->_pre_order-1].dump(); tty->print("LiveOut: "); _live[b->_pre_order-1].dump();
uint cnt = b->number_of_nodes(); uint cnt = b->number_of_nodes();
for( uint i=0; i<cnt; i++ ) { for( uint i=0; i<cnt; i++ ) {

View File

@ -46,7 +46,8 @@ typedef GrowableArray<uint> LRG_List;
class PhaseLive : public Phase { class PhaseLive : public Phase {
// Array of Sets of values live at the start of a block. // Array of Sets of values live at the start of a block.
// Indexed by block pre-order number. // Indexed by block pre-order number.
IndexSet *_live; IndexSet *_live; // live out
IndexSet *_livein; // live in
// Array of Sets of values defined locally in the block // Array of Sets of values defined locally in the block
// Indexed by block pre-order number. // Indexed by block pre-order number.
@ -62,15 +63,17 @@ class PhaseLive : public Phase {
const LRG_List &_names; // Mapping from Nodes to live ranges const LRG_List &_names; // Mapping from Nodes to live ranges
uint _maxlrg; // Largest live-range number uint _maxlrg; // Largest live-range number
Arena *_arena; Arena *_arena;
bool _keep_deltas; // Retain live in information
IndexSet *getset( Block *p ); IndexSet *getset( Block *p );
IndexSet *getfreeset( ); IndexSet *getfreeset( );
void freeset( const Block *p ); void freeset( Block *p );
void add_liveout( Block *p, uint r, VectorSet &first_pass ); void add_liveout( Block *p, uint r, VectorSet &first_pass );
void add_liveout( Block *p, IndexSet *lo, VectorSet &first_pass ); void add_liveout( Block *p, IndexSet *lo, VectorSet &first_pass );
void add_livein( Block *p, IndexSet *lo );
public: public:
PhaseLive(const PhaseCFG &cfg, const LRG_List &names, Arena *arena); PhaseLive(const PhaseCFG &cfg, const LRG_List &names, Arena *arena, bool keep_deltas);
~PhaseLive() {} ~PhaseLive() {}
// Compute liveness info // Compute liveness info
void compute(uint maxlrg); void compute(uint maxlrg);
@ -79,6 +82,7 @@ public:
// Return the live-out set for this block // Return the live-out set for this block
IndexSet *live( const Block * b ) { return &_live[b->_pre_order-1]; } IndexSet *live( const Block * b ) { return &_live[b->_pre_order-1]; }
IndexSet *livein( const Block * b ) { return &_livein[b->_pre_order - 1]; }
#ifndef PRODUCT #ifndef PRODUCT
void dump( const Block *b ) const; void dump( const Block *b ) const;

View File

@ -290,6 +290,7 @@ public:
if (phi() == NULL) { if (phi() == NULL) {
return NULL; return NULL;
} }
assert(phi()->is_Phi(), "should be PhiNode");
Node *ln = phi()->in(0); Node *ln = phi()->in(0);
if (ln->is_CountedLoop() && ln->as_CountedLoop()->loopexit() == this) { if (ln->is_CountedLoop() && ln->as_CountedLoop()->loopexit() == this) {
return (CountedLoopNode*)ln; return (CountedLoopNode*)ln;

Some files were not shown because too many files have changed in this diff Show More