6813212: factor duplicated assembly code for general subclass check (for 6655638)

Code in interp_masm, stubGenerator, c1_LIRAssembler, and AD files moved into MacroAssembler.

Reviewed-by: kvn
This commit is contained in:
John R Rose 2009-03-13 18:39:22 -07:00
parent de67e52949
commit b8dbe8d8f6
17 changed files with 734 additions and 617 deletions

View File

@ -2767,6 +2767,268 @@ void MacroAssembler::lookup_interface_method(Register recv_klass,
}
void MacroAssembler::check_klass_subtype(Register sub_klass,
Register super_klass,
Register temp_reg,
Register temp2_reg,
Label& L_success) {
Label L_failure, L_pop_to_failure;
check_klass_subtype_fast_path(sub_klass, super_klass,
temp_reg, temp2_reg,
&L_success, &L_failure, NULL);
Register sub_2 = sub_klass;
Register sup_2 = super_klass;
if (!sub_2->is_global()) sub_2 = L0;
if (!sup_2->is_global()) sup_2 = L1;
save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
check_klass_subtype_slow_path(sub_2, sup_2,
L2, L3, L4, L5,
NULL, &L_pop_to_failure);
// on success:
restore();
ba(false, L_success);
delayed()->nop();
// on failure:
bind(L_pop_to_failure);
restore();
bind(L_failure);
}
void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
Register super_klass,
Register temp_reg,
Register temp2_reg,
Label* L_success,
Label* L_failure,
Label* L_slow_path,
RegisterConstant super_check_offset,
Register instanceof_hack) {
int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::secondary_super_cache_offset_in_bytes());
int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::super_check_offset_offset_in_bytes());
bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
bool need_slow_path = (must_load_sco ||
super_check_offset.constant_or_zero() == sco_offset);
assert_different_registers(sub_klass, super_klass, temp_reg);
if (super_check_offset.is_register()) {
assert_different_registers(sub_klass, super_klass,
super_check_offset.as_register());
} else if (must_load_sco) {
assert(temp2_reg != noreg, "supply either a temp or a register offset");
}
Label L_fallthrough;
int label_nulls = 0;
if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
assert(label_nulls <= 1 || instanceof_hack != noreg ||
(L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path),
"at most one NULL in the batch, usually");
// Support for the instanceof hack, which uses delay slots to
// set a destination register to zero or one.
bool do_bool_sets = (instanceof_hack != noreg);
#define BOOL_SET(bool_value) \
if (do_bool_sets && bool_value >= 0) \
set(bool_value, instanceof_hack)
#define DELAYED_BOOL_SET(bool_value) \
if (do_bool_sets && bool_value >= 0) \
delayed()->set(bool_value, instanceof_hack); \
else delayed()->nop()
// Hacked ba(), which may only be used just before L_fallthrough.
#define FINAL_JUMP(label, bool_value) \
if (&(label) == &L_fallthrough) { \
BOOL_SET(bool_value); \
} else { \
ba((do_bool_sets && bool_value >= 0), label); \
DELAYED_BOOL_SET(bool_value); \
}
// If the pointers are equal, we are done (e.g., String[] elements).
// This self-check enables sharing of secondary supertype arrays among
// non-primary types such as array-of-interface. Otherwise, each such
// type would need its own customized SSA.
// We move this check to the front of the fast path because many
// type checks are in fact trivially successful in this manner,
// so we get a nicely predicted branch right at the start of the check.
cmp(super_klass, sub_klass);
brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success);
DELAYED_BOOL_SET(1);
// Check the supertype display:
if (must_load_sco) {
// The super check offset is always positive...
lduw(super_klass, sco_offset, temp2_reg);
super_check_offset = RegisterConstant(temp2_reg);
}
ld_ptr(sub_klass, super_check_offset, temp_reg);
cmp(super_klass, temp_reg);
// This check has worked decisively for primary supers.
// Secondary supers are sought in the super_cache ('super_cache_addr').
// (Secondary supers are interfaces and very deeply nested subtypes.)
// This works in the same check above because of a tricky aliasing
// between the super_cache and the primary super display elements.
// (The 'super_check_addr' can address either, as the case requires.)
// Note that the cache is updated below if it does not help us find
// what we need immediately.
// So if it was a primary super, we can just fail immediately.
// Otherwise, it's the slow path for us (no success at this point).
if (super_check_offset.is_register()) {
brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success);
delayed(); if (do_bool_sets) BOOL_SET(1);
// if !do_bool_sets, sneak the next cmp into the delay slot:
cmp(super_check_offset.as_register(), sc_offset);
if (L_failure == &L_fallthrough) {
brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_slow_path);
delayed()->nop();
BOOL_SET(0); // fallthrough on failure
} else {
brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure);
DELAYED_BOOL_SET(0);
FINAL_JUMP(*L_slow_path, -1); // -1 => vanilla delay slot
}
} else if (super_check_offset.as_constant() == sc_offset) {
// Need a slow path; fast failure is impossible.
if (L_slow_path == &L_fallthrough) {
brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success);
DELAYED_BOOL_SET(1);
} else {
brx(Assembler::notEqual, false, Assembler::pn, *L_slow_path);
delayed()->nop();
FINAL_JUMP(*L_success, 1);
}
} else {
// No slow path; it's a fast decision.
if (L_failure == &L_fallthrough) {
brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success);
DELAYED_BOOL_SET(1);
BOOL_SET(0);
} else {
brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure);
DELAYED_BOOL_SET(0);
FINAL_JUMP(*L_success, 1);
}
}
bind(L_fallthrough);
#undef final_jump
#undef bool_set
#undef DELAYED_BOOL_SET
#undef final_jump
}
void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
Register super_klass,
Register count_temp,
Register scan_temp,
Register scratch_reg,
Register coop_reg,
Label* L_success,
Label* L_failure) {
assert_different_registers(sub_klass, super_klass,
count_temp, scan_temp, scratch_reg, coop_reg);
Label L_fallthrough, L_loop;
int label_nulls = 0;
if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
assert(label_nulls <= 1, "at most one NULL in the batch");
// a couple of useful fields in sub_klass:
int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::secondary_supers_offset_in_bytes());
int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::secondary_super_cache_offset_in_bytes());
// Do a linear scan of the secondary super-klass chain.
// This code is rarely used, so simplicity is a virtue here.
#ifndef PRODUCT
int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
inc_counter((address) pst_counter, count_temp, scan_temp);
#endif
// We will consult the secondary-super array.
ld_ptr(sub_klass, ss_offset, scan_temp);
// Compress superclass if necessary.
Register search_key = super_klass;
bool decode_super_klass = false;
if (UseCompressedOops) {
if (coop_reg != noreg) {
encode_heap_oop_not_null(super_klass, coop_reg);
search_key = coop_reg;
} else {
encode_heap_oop_not_null(super_klass);
decode_super_klass = true; // scarce temps!
}
// The superclass is never null; it would be a basic system error if a null
// pointer were to sneak in here. Note that we have already loaded the
// Klass::super_check_offset from the super_klass in the fast path,
// so if there is a null in that register, we are already in the afterlife.
}
// Load the array length. (Positive movl does right thing on LP64.)
lduw(scan_temp, arrayOopDesc::length_offset_in_bytes(), count_temp);
// Check for empty secondary super list
tst(count_temp);
// Top of search loop
bind(L_loop);
br(Assembler::equal, false, Assembler::pn, *L_failure);
delayed()->add(scan_temp, heapOopSize, scan_temp);
assert(heapOopSize != 0, "heapOopSize should be initialized");
// Skip the array header in all array accesses.
int elem_offset = arrayOopDesc::base_offset_in_bytes(T_OBJECT);
elem_offset -= heapOopSize; // the scan pointer was pre-incremented also
// Load next super to check
if (UseCompressedOops) {
// Don't use load_heap_oop; we don't want to decode the element.
lduw( scan_temp, elem_offset, scratch_reg );
} else {
ld_ptr( scan_temp, elem_offset, scratch_reg );
}
// Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
cmp(scratch_reg, search_key);
// A miss means we are NOT a subtype and need to keep looping
brx(Assembler::notEqual, false, Assembler::pn, L_loop);
delayed()->deccc(count_temp); // decrement trip counter in delay slot
// Falling out the bottom means we found a hit; we ARE a subtype
if (decode_super_klass) decode_heap_oop(super_klass);
// Success. Cache the super we found and proceed in triumph.
st_ptr(super_klass, sub_klass, sc_offset);
if (L_success != &L_fallthrough) {
ba(false, *L_success);
delayed()->nop();
}
bind(L_fallthrough);
}
void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg,
Register temp_reg,
Label& done, Label* slow_case,

View File

@ -2327,6 +2327,46 @@ class MacroAssembler: public Assembler {
Register temp_reg, Register temp2_reg,
Label& no_such_interface);
// Test sub_klass against super_klass, with fast and slow paths.
// The fast path produces a tri-state answer: yes / no / maybe-slow.
// One of the three labels can be NULL, meaning take the fall-through.
// If super_check_offset is -1, the value is loaded up from super_klass.
// No registers are killed, except temp_reg and temp2_reg.
// If super_check_offset is not -1, temp2_reg is not used and can be noreg.
void check_klass_subtype_fast_path(Register sub_klass,
Register super_klass,
Register temp_reg,
Register temp2_reg,
Label* L_success,
Label* L_failure,
Label* L_slow_path,
RegisterConstant super_check_offset = RegisterConstant(-1),
Register instanceof_hack = noreg);
// The rest of the type check; must be wired to a corresponding fast path.
// It does not repeat the fast path logic, so don't use it standalone.
// The temp_reg can be noreg, if no temps are available.
// It can also be sub_klass or super_klass, meaning it's OK to kill that one.
// Updates the sub's secondary super cache as necessary.
void check_klass_subtype_slow_path(Register sub_klass,
Register super_klass,
Register temp_reg,
Register temp2_reg,
Register temp3_reg,
Register temp4_reg,
Label* L_success,
Label* L_failure);
// Simplified, combined version, good for typical uses.
// Falls through on failure.
void check_klass_subtype(Register sub_klass,
Register super_klass,
Register temp_reg,
Register temp2_reg,
Label& L_success);
// Stack overflow checking
// Note: this clobbers G3_scratch

View File

@ -2393,23 +2393,11 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
// get instance klass
load(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc), k_RInfo, T_OBJECT, NULL);
// get super_check_offset
load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1, T_INT, NULL);
// See if we get an immediate positive hit
__ ld_ptr(klass_RInfo, Rtmp1, FrameMap::O7_oop_opr->as_register());
__ cmp(k_RInfo, O7);
__ br(Assembler::equal, false, Assembler::pn, done);
__ delayed()->nop();
// check for immediate negative hit
__ cmp(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
__ br(Assembler::notEqual, false, Assembler::pn, *stub->entry());
__ delayed()->nop();
// check for self
__ cmp(klass_RInfo, k_RInfo);
__ br(Assembler::equal, false, Assembler::pn, done);
__ delayed()->nop();
// perform the fast part of the checking logic
__ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, &done, stub->entry(), NULL);
// assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
// call out-of-line instance of __ check_klass_subtype_slow_path(...):
assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
__ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
__ delayed()->nop();
__ cmp(G3, 0);
@ -2493,58 +2481,30 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
__ delayed()->nop();
__ bind(done);
} else {
bool need_slow_path = true;
if (k->is_loaded()) {
load(klass_RInfo, k->super_check_offset(), Rtmp1, T_OBJECT, NULL);
if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) {
// See if we get an immediate positive hit
__ cmp(Rtmp1, k_RInfo );
__ br(Assembler::notEqual, false, Assembler::pn, *stub->entry());
__ delayed()->nop();
} else {
// See if we get an immediate positive hit
assert_different_registers(Rtmp1, k_RInfo, klass_RInfo);
__ cmp(Rtmp1, k_RInfo );
__ br(Assembler::equal, false, Assembler::pn, done);
// check for self
__ delayed()->cmp(klass_RInfo, k_RInfo);
__ br(Assembler::equal, false, Assembler::pn, done);
__ delayed()->nop();
// assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
__ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
__ delayed()->nop();
__ cmp(G3, 0);
__ br(Assembler::equal, false, Assembler::pn, *stub->entry());
__ delayed()->nop();
}
__ bind(done);
if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())
need_slow_path = false;
// perform the fast part of the checking logic
__ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, noreg,
(need_slow_path ? &done : NULL),
stub->entry(), NULL,
RegisterConstant(k->super_check_offset()));
} else {
assert_different_registers(Rtmp1, klass_RInfo, k_RInfo);
load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1, T_INT, NULL);
// See if we get an immediate positive hit
load(klass_RInfo, Rtmp1, FrameMap::O7_oop_opr, T_OBJECT);
__ cmp(k_RInfo, O7);
__ br(Assembler::equal, false, Assembler::pn, done);
__ delayed()->nop();
// check for immediate negative hit
__ cmp(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
__ br(Assembler::notEqual, false, Assembler::pn, *stub->entry());
// check for self
__ delayed()->cmp(klass_RInfo, k_RInfo);
__ br(Assembler::equal, false, Assembler::pn, done);
__ delayed()->nop();
// assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
// perform the fast part of the checking logic
__ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7,
&done, stub->entry(), NULL);
}
if (need_slow_path) {
// call out-of-line instance of __ check_klass_subtype_slow_path(...):
assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
__ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
__ delayed()->nop();
__ cmp(G3, 0);
__ br(Assembler::equal, false, Assembler::pn, *stub->entry());
__ delayed()->nop();
__ bind(done);
}
__ bind(done);
}
__ mov(obj, dst);
} else if (code == lir_instanceof) {
@ -2582,58 +2542,32 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
__ set(0, dst);
__ bind(done);
} else {
bool need_slow_path = true;
if (k->is_loaded()) {
assert_different_registers(Rtmp1, klass_RInfo, k_RInfo);
load(klass_RInfo, k->super_check_offset(), Rtmp1, T_OBJECT, NULL);
if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) {
// See if we get an immediate positive hit
__ cmp(Rtmp1, k_RInfo );
__ br(Assembler::equal, true, Assembler::pt, done);
__ delayed()->set(1, dst);
__ set(0, dst);
__ bind(done);
} else {
// See if we get an immediate positive hit
assert_different_registers(Rtmp1, k_RInfo, klass_RInfo);
__ cmp(Rtmp1, k_RInfo );
__ br(Assembler::equal, true, Assembler::pt, done);
__ delayed()->set(1, dst);
// check for self
__ cmp(klass_RInfo, k_RInfo);
__ br(Assembler::equal, true, Assembler::pt, done);
__ delayed()->set(1, dst);
// assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
__ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
__ delayed()->nop();
__ mov(G3, dst);
__ bind(done);
}
if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())
need_slow_path = false;
// perform the fast part of the checking logic
__ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, O7, noreg,
(need_slow_path ? &done : NULL),
(need_slow_path ? &done : NULL), NULL,
RegisterConstant(k->super_check_offset()),
dst);
} else {
assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers");
load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), dst, T_INT, NULL);
// See if we get an immediate positive hit
load(klass_RInfo, dst, FrameMap::O7_oop_opr, T_OBJECT);
__ cmp(k_RInfo, O7);
__ br(Assembler::equal, true, Assembler::pt, done);
__ delayed()->set(1, dst);
// check for immediate negative hit
__ cmp(dst, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
__ br(Assembler::notEqual, true, Assembler::pt, done);
__ delayed()->set(0, dst);
// check for self
__ cmp(klass_RInfo, k_RInfo);
__ br(Assembler::equal, true, Assembler::pt, done);
__ delayed()->set(1, dst);
// assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
// perform the fast part of the checking logic
__ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, O7, dst,
&done, &done, NULL,
RegisterConstant(-1),
dst);
}
if (need_slow_path) {
// call out-of-line instance of __ check_klass_subtype_slow_path(...):
assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
__ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
__ delayed()->nop();
__ mov(G3, dst);
__ bind(done);
}
__ bind(done);
}
} else {
ShouldNotReachHere();

View File

@ -714,38 +714,19 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
// sub : G3, argument, destroyed
// super: G1, argument, not changed
// raddr: O7, blown by call
Label loop, miss;
Label miss;
__ save_frame(0); // Blow no registers!
__ ld_ptr( G3, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 );
__ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0); // length in l0
__ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1); // ptr into array
__ clr(L4); // Index
// Load a little early; will load 1 off the end of the array.
// Ok for now; revisit if we have other uses of this routine.
__ ld_ptr(L1,0,L2); // Will load a little early
// The scan loop
__ bind(loop);
__ add(L1,wordSize,L1); // Bump by OOP size
__ cmp(L4,L0);
__ br(Assembler::equal,false,Assembler::pn,miss);
__ delayed()->inc(L4); // Bump index
__ subcc(L2,G1,L3); // Check for match; zero in L3 for a hit
__ brx( Assembler::notEqual, false, Assembler::pt, loop );
__ delayed()->ld_ptr(L1,0,L2); // Will load a little early
// Got a hit; report success; set cache
__ st_ptr( G1, G3, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
__ check_klass_subtype_slow_path(G3, G1, L0, L1, L2, L4, NULL, &miss);
__ mov(1, G3);
__ ret(); // Result in G5 is ok; flags set
__ ret(); // Result in G5 is 'true'
__ delayed()->restore(); // free copy or add can go here
__ bind(miss);
__ mov(0, G3);
__ ret(); // Result in G5 is ok; flags set
__ ret(); // Result in G5 is 'false'
__ delayed()->restore(); // free copy or add can go here
}

View File

@ -866,65 +866,18 @@ void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
Register Rtmp2,
Register Rtmp3,
Label &ok_is_subtype ) {
Label not_subtype, loop;
Label not_subtype;
// Profile the not-null value's klass.
profile_typecheck(Rsub_klass, Rtmp1);
// Load the super-klass's check offset into Rtmp1
ld( Rsuper_klass, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1 );
// Load from the sub-klass's super-class display list, or a 1-word cache of
// the secondary superclass list, or a failing value with a sentinel offset
// if the super-klass is an interface or exceptionally deep in the Java
// hierarchy and we have to scan the secondary superclass list the hard way.
ld_ptr( Rsub_klass, Rtmp1, Rtmp2 );
// See if we get an immediate positive hit
cmp( Rtmp2, Rsuper_klass );
brx( Assembler::equal, false, Assembler::pt, ok_is_subtype );
// In the delay slot, check for immediate negative hit
delayed()->cmp( Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
br( Assembler::notEqual, false, Assembler::pt, not_subtype );
// In the delay slot, check for self
delayed()->cmp( Rsub_klass, Rsuper_klass );
brx( Assembler::equal, false, Assembler::pt, ok_is_subtype );
check_klass_subtype_fast_path(Rsub_klass, Rsuper_klass,
Rtmp1, Rtmp2,
&ok_is_subtype, &not_subtype, NULL);
// Now do a linear scan of the secondary super-klass chain.
delayed()->ld_ptr( Rsub_klass, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), Rtmp2 );
// compress superclass
if (UseCompressedOops) encode_heap_oop(Rsuper_klass);
// Rtmp2 holds the objArrayOop of secondary supers.
ld( Rtmp2, arrayOopDesc::length_offset_in_bytes(), Rtmp1 );// Load the array length
// Check for empty secondary super list
tst(Rtmp1);
// Top of search loop
bind( loop );
br( Assembler::equal, false, Assembler::pn, not_subtype );
delayed()->nop();
// load next super to check
if (UseCompressedOops) {
lduw( Rtmp2, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Rtmp3);
// Bump array pointer forward one oop
add( Rtmp2, 4, Rtmp2 );
} else {
ld_ptr( Rtmp2, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Rtmp3);
// Bump array pointer forward one oop
add( Rtmp2, wordSize, Rtmp2);
}
// Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
cmp( Rtmp3, Rsuper_klass );
// A miss means we are NOT a subtype and need to keep looping
brx( Assembler::notEqual, false, Assembler::pt, loop );
delayed()->deccc( Rtmp1 ); // dec trip counter in delay slot
// Falling out the bottom means we found a hit; we ARE a subtype
if (UseCompressedOops) decode_heap_oop(Rsuper_klass);
br( Assembler::always, false, Assembler::pt, ok_is_subtype );
// Update the cache
delayed()->st_ptr( Rsuper_klass, Rsub_klass,
sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
check_klass_subtype_slow_path(Rsub_klass, Rsuper_klass,
Rtmp1, Rtmp2, Rtmp3, /*hack:*/ noreg,
&ok_is_subtype, NULL);
bind(not_subtype);
profile_typecheck_failed(Rtmp1);

View File

@ -900,19 +900,7 @@ class StubGenerator: public StubCodeGenerator {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
address start = __ pc();
Label loop, miss;
// Compare super with sub directly, since super is not in its own SSA.
// The compiler used to emit this test, but we fold it in here,
// to increase overall code density, with no real loss of speed.
{ Label L;
__ cmp(O1, O2);
__ brx(Assembler::notEqual, false, Assembler::pt, L);
__ delayed()->nop();
__ retl();
__ delayed()->addcc(G0,0,O0); // set Z flags, zero result
__ bind(L);
}
Label miss;
#if defined(COMPILER2) && !defined(_LP64)
// Do not use a 'save' because it blows the 64-bit O registers.
@ -936,56 +924,12 @@ class StubGenerator: public StubCodeGenerator {
Register L2_super = L2;
Register L3_index = L3;
#ifdef _LP64
Register L4_ooptmp = L4;
__ check_klass_subtype_slow_path(Rsub, Rsuper,
L0, L1, L2, L3,
NULL, &miss);
if (UseCompressedOops) {
// this must be under UseCompressedOops check, as we rely upon fact
// that L4 not clobbered in C2 on 32-bit platforms, where we do explicit save
// on stack, see several lines above
__ encode_heap_oop(Rsuper, L4_ooptmp);
}
#endif
inc_counter_np(SharedRuntime::_partial_subtype_ctr, L0, L1);
__ ld_ptr( Rsub, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 );
__ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0_ary_len);
__ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1_ary_ptr);
__ clr(L3_index); // zero index
// Load a little early; will load 1 off the end of the array.
// Ok for now; revisit if we have other uses of this routine.
if (UseCompressedOops) {
__ lduw(L1_ary_ptr,0,L2_super);// Will load a little early
} else {
__ ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early
}
assert(heapOopSize != 0, "heapOopSize should be initialized");
// The scan loop
__ BIND(loop);
__ add(L1_ary_ptr, heapOopSize, L1_ary_ptr); // Bump by OOP size
__ cmp(L3_index,L0_ary_len);
__ br(Assembler::equal,false,Assembler::pn,miss);
__ delayed()->inc(L3_index); // Bump index
if (UseCompressedOops) {
#ifdef _LP64
__ subcc(L2_super,L4_ooptmp,Rret); // Check for match; zero in Rret for a hit
__ br( Assembler::notEqual, false, Assembler::pt, loop );
__ delayed()->lduw(L1_ary_ptr,0,L2_super);// Will load a little early
#else
ShouldNotReachHere();
#endif
} else {
__ subcc(L2_super,Rsuper,Rret); // Check for match; zero in Rret for a hit
__ brx( Assembler::notEqual, false, Assembler::pt, loop );
__ delayed()->ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early
}
// Got a hit; report success; set cache. Cache load doesn't
// happen here; for speed it is directly emitted by the compiler.
__ st_ptr( Rsuper, Rsub, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
// Match falls through here.
__ addcc(G0,0,Rret); // set Z flags, Z result
#if defined(COMPILER2) && !defined(_LP64)
__ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
@ -999,7 +943,6 @@ class StubGenerator: public StubCodeGenerator {
__ delayed()->restore();
#endif
// Hit or miss falls through here
__ BIND(miss);
__ addcc(G0,1,Rret); // set NZ flags, NZ result
@ -2330,51 +2273,31 @@ class StubGenerator: public StubCodeGenerator {
Register super_check_offset,
Register super_klass,
Register temp,
Label& L_success,
Register deccc_hack = noreg) {
Label& L_success) {
assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
BLOCK_COMMENT("type_check:");
Label L_miss;
Label L_miss, L_pop_to_miss;
assert_clean_int(super_check_offset, temp);
// maybe decrement caller's trip count:
#define DELAY_SLOT delayed(); \
{ if (deccc_hack == noreg) __ nop(); else __ deccc(deccc_hack); }
// if the pointers are equal, we are done (e.g., String[] elements)
__ cmp(sub_klass, super_klass);
__ brx(Assembler::equal, true, Assembler::pt, L_success);
__ DELAY_SLOT;
// check the supertype display:
__ ld_ptr(sub_klass, super_check_offset, temp); // query the super type
__ cmp(super_klass, temp); // test the super type
__ brx(Assembler::equal, true, Assembler::pt, L_success);
__ DELAY_SLOT;
int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::secondary_super_cache_offset_in_bytes());
__ cmp(super_klass, sc_offset);
__ brx(Assembler::notEqual, true, Assembler::pt, L_miss);
__ delayed()->nop();
__ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg,
&L_success, &L_miss, NULL,
super_check_offset);
BLOCK_COMMENT("type_check_slow_path:");
__ save_frame(0);
__ mov(sub_klass->after_save(), O1);
// mov(super_klass->after_save(), O2); //fill delay slot
assert(StubRoutines::Sparc::_partial_subtype_check != NULL, "order of generation");
__ call(StubRoutines::Sparc::_partial_subtype_check);
__ delayed()->mov(super_klass->after_save(), O2);
__ check_klass_subtype_slow_path(sub_klass->after_save(),
super_klass->after_save(),
L0, L1, L2, L4,
NULL, &L_pop_to_miss);
__ ba(false, L_success);
__ delayed()->restore();
__ bind(L_pop_to_miss);
__ restore();
// Upon return, the condition codes are already set.
__ brx(Assembler::equal, true, Assembler::pt, L_success);
__ DELAY_SLOT;
#undef DELAY_SLOT
// Fall through on failure!
__ BIND(L_miss);
}
@ -2411,7 +2334,7 @@ class StubGenerator: public StubCodeGenerator {
gen_write_ref_array_pre_barrier(O1, O2);
#ifdef ASSERT
// We sometimes save a frame (see partial_subtype_check below).
// We sometimes save a frame (see generate_type_check below).
// If this will cause trouble, let's fail now instead of later.
__ save_frame(0);
__ restore();
@ -2455,41 +2378,39 @@ class StubGenerator: public StubCodeGenerator {
// G3, G4, G5 --- current oop, oop.klass, oop.klass.super
__ align(16);
__ bind(store_element);
// deccc(G1_remain); // decrement the count (hoisted)
__ BIND(store_element);
__ deccc(G1_remain); // decrement the count
__ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop
__ inc(O5_offset, heapOopSize); // step to next offset
__ brx(Assembler::zero, true, Assembler::pt, do_card_marks);
__ delayed()->set(0, O0); // return -1 on success
// ======== loop entry is here ========
__ bind(load_element);
__ BIND(load_element);
__ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop
__ br_null(G3_oop, true, Assembler::pt, store_element);
__ delayed()->deccc(G1_remain); // decrement the count
__ delayed()->nop();
__ load_klass(G3_oop, G4_klass); // query the object klass
generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
// branch to this on success:
store_element,
// decrement this on success:
G1_remain);
store_element);
// ======== end loop ========
// It was a real error; we must depend on the caller to finish the job.
// Register G1 has number of *remaining* oops, O2 number of *total* oops.
// Emit GC store barriers for the oops we have copied (O2 minus G1),
// and report their number to the caller.
__ bind(fail);
__ BIND(fail);
__ subcc(O2_count, G1_remain, O2_count);
__ brx(Assembler::zero, false, Assembler::pt, done);
__ delayed()->not1(O2_count, O0); // report (-1^K) to caller
__ bind(do_card_marks);
__ BIND(do_card_marks);
gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2]
__ bind(done);
__ BIND(done);
inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4);
__ retl();
__ delayed()->nop(); // return value in 00

View File

@ -7286,6 +7286,225 @@ void MacroAssembler::lookup_interface_method(Register recv_klass,
}
void MacroAssembler::check_klass_subtype(Register sub_klass,
Register super_klass,
Register temp_reg,
Label& L_success) {
Label L_failure;
check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
bind(L_failure);
}
void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
Register super_klass,
Register temp_reg,
Label* L_success,
Label* L_failure,
Label* L_slow_path,
RegisterConstant super_check_offset) {
assert_different_registers(sub_klass, super_klass, temp_reg);
bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
if (super_check_offset.is_register()) {
assert_different_registers(sub_klass, super_klass,
super_check_offset.as_register());
} else if (must_load_sco) {
assert(temp_reg != noreg, "supply either a temp or a register offset");
}
Label L_fallthrough;
int label_nulls = 0;
if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
assert(label_nulls <= 1, "at most one NULL in the batch");
int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::secondary_super_cache_offset_in_bytes());
int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::super_check_offset_offset_in_bytes());
Address super_check_offset_addr(super_klass, sco_offset);
// Hacked jcc, which "knows" that L_fallthrough, at least, is in
// range of a jccb. If this routine grows larger, reconsider at
// least some of these.
#define local_jcc(assembler_cond, label) \
if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \
else jcc( assembler_cond, label) /*omit semi*/
// Hacked jmp, which may only be used just before L_fallthrough.
#define final_jmp(label) \
if (&(label) == &L_fallthrough) { /*do nothing*/ } \
else jmp(label) /*omit semi*/
// If the pointers are equal, we are done (e.g., String[] elements).
// This self-check enables sharing of secondary supertype arrays among
// non-primary types such as array-of-interface. Otherwise, each such
// type would need its own customized SSA.
// We move this check to the front of the fast path because many
// type checks are in fact trivially successful in this manner,
// so we get a nicely predicted branch right at the start of the check.
cmpptr(sub_klass, super_klass);
local_jcc(Assembler::equal, *L_success);
// Check the supertype display:
if (must_load_sco) {
// Positive movl does right thing on LP64.
movl(temp_reg, super_check_offset_addr);
super_check_offset = RegisterConstant(temp_reg);
}
Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
cmpptr(super_klass, super_check_addr); // load displayed supertype
// This check has worked decisively for primary supers.
// Secondary supers are sought in the super_cache ('super_cache_addr').
// (Secondary supers are interfaces and very deeply nested subtypes.)
// This works in the same check above because of a tricky aliasing
// between the super_cache and the primary super display elements.
// (The 'super_check_addr' can address either, as the case requires.)
// Note that the cache is updated below if it does not help us find
// what we need immediately.
// So if it was a primary super, we can just fail immediately.
// Otherwise, it's the slow path for us (no success at this point).
if (super_check_offset.is_register()) {
local_jcc(Assembler::equal, *L_success);
cmpl(super_check_offset.as_register(), sc_offset);
if (L_failure == &L_fallthrough) {
local_jcc(Assembler::equal, *L_slow_path);
} else {
local_jcc(Assembler::notEqual, *L_failure);
final_jmp(*L_slow_path);
}
} else if (super_check_offset.as_constant() == sc_offset) {
// Need a slow path; fast failure is impossible.
if (L_slow_path == &L_fallthrough) {
local_jcc(Assembler::equal, *L_success);
} else {
local_jcc(Assembler::notEqual, *L_slow_path);
final_jmp(*L_success);
}
} else {
// No slow path; it's a fast decision.
if (L_failure == &L_fallthrough) {
local_jcc(Assembler::equal, *L_success);
} else {
local_jcc(Assembler::notEqual, *L_failure);
final_jmp(*L_success);
}
}
bind(L_fallthrough);
#undef local_jcc
#undef final_jmp
}
void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
Register super_klass,
Register temp_reg,
Register temp2_reg,
Label* L_success,
Label* L_failure,
bool set_cond_codes) {
assert_different_registers(sub_klass, super_klass, temp_reg);
if (temp2_reg != noreg)
assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
Label L_fallthrough;
int label_nulls = 0;
if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
assert(label_nulls <= 1, "at most one NULL in the batch");
// a couple of useful fields in sub_klass:
int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::secondary_supers_offset_in_bytes());
int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::secondary_super_cache_offset_in_bytes());
Address secondary_supers_addr(sub_klass, ss_offset);
Address super_cache_addr( sub_klass, sc_offset);
// Do a linear scan of the secondary super-klass chain.
// This code is rarely used, so simplicity is a virtue here.
// The repne_scan instruction uses fixed registers, which we must spill.
// Don't worry too much about pre-existing connections with the input regs.
assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)
assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)
// Get super_klass value into rax (even if it was in rdi or rcx).
bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
if (super_klass != rax || UseCompressedOops) {
if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
mov(rax, super_klass);
}
if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
#ifndef PRODUCT
int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
ExternalAddress pst_counter_addr((address) pst_counter);
NOT_LP64( incrementl(pst_counter_addr) );
LP64_ONLY( lea(rcx, pst_counter_addr) );
LP64_ONLY( incrementl(Address(rcx, 0)) );
#endif //PRODUCT
// We will consult the secondary-super array.
movptr(rdi, secondary_supers_addr);
// Load the array length. (Positive movl does right thing on LP64.)
movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
// Skip to start of data.
addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
// Scan RCX words at [RDI] for an occurrence of RAX.
// Set NZ/Z based on last compare.
#ifdef _LP64
// This part is tricky, as values in supers array could be 32 or 64 bit wide
// and we store values in objArrays always encoded, thus we need to encode
// the value of rax before repne. Note that rax is dead after the repne.
if (UseCompressedOops) {
encode_heap_oop_not_null(rax);
// The superclass is never null; it would be a basic system error if a null
// pointer were to sneak in here. Note that we have already loaded the
// Klass::super_check_offset from the super_klass in the fast path,
// so if there is a null in that register, we are already in the afterlife.
repne_scanl();
} else
#endif // _LP64
repne_scan();
// Unspill the temp. registers:
if (pushed_rdi) pop(rdi);
if (pushed_rcx) pop(rcx);
if (pushed_rax) pop(rax);
if (set_cond_codes) {
// Special hack for the AD files: rdi is guaranteed non-zero.
assert(!pushed_rdi, "rdi must be left non-NULL");
// Also, the condition codes are properly set Z/NZ on succeed/failure.
}
if (L_failure == &L_fallthrough)
jccb(Assembler::notEqual, *L_failure);
else jcc(Assembler::notEqual, *L_failure);
// Success. Cache the super we found and proceed in triumph.
movptr(super_cache_addr, super_klass);
if (L_success != &L_fallthrough) {
jmp(*L_success);
}
#undef IS_A_TEMP
bind(L_fallthrough);
}
void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
ucomisd(dst, as_Address(src));
}

View File

@ -1807,6 +1807,40 @@ class MacroAssembler: public Assembler {
Register scan_temp,
Label& no_such_interface);
// Test sub_klass against super_klass, with fast and slow paths.
// The fast path produces a tri-state answer: yes / no / maybe-slow.
// One of the three labels can be NULL, meaning take the fall-through.
// If super_check_offset is -1, the value is loaded up from super_klass.
// No registers are killed, except temp_reg.
void check_klass_subtype_fast_path(Register sub_klass,
Register super_klass,
Register temp_reg,
Label* L_success,
Label* L_failure,
Label* L_slow_path,
RegisterConstant super_check_offset = RegisterConstant(-1));
// The rest of the type check; must be wired to a corresponding fast path.
// It does not repeat the fast path logic, so don't use it standalone.
// The temp_reg and temp2_reg can be noreg, if no temps are available.
// Updates the sub's secondary super cache as necessary.
// If set_cond_codes, condition codes will be Z on success, NZ on failure.
void check_klass_subtype_slow_path(Register sub_klass,
Register super_klass,
Register temp_reg,
Register temp2_reg,
Label* L_success,
Label* L_failure,
bool set_cond_codes = false);
// Simplified, combined version, good for typical uses.
// Falls through on failure.
void check_klass_subtype(Register sub_klass,
Register super_klass,
Register temp_reg,
Label& L_success);
//----
void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0

View File

@ -1598,18 +1598,9 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
// get instance klass
__ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
// get super_check_offset
__ movl(Rtmp1, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes()));
// See if we get an immediate positive hit
__ cmpptr(k_RInfo, Address(klass_RInfo, Rtmp1, Address::times_1));
__ jcc(Assembler::equal, done);
// check for immediate negative hit
__ cmpl(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
__ jcc(Assembler::notEqual, *stub->entry());
// check for self
__ cmpptr(klass_RInfo, k_RInfo);
__ jcc(Assembler::equal, done);
// perform the fast part of the checking logic
__ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, &done, stub->entry(), NULL);
// call out-of-line instance of __ check_klass_subtype_slow_path(...):
__ push(klass_RInfo);
__ push(k_RInfo);
__ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
@ -1735,17 +1726,9 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
}
__ bind(done);
} else {
__ movl(Rtmp1, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes()));
// See if we get an immediate positive hit
__ cmpptr(k_RInfo, Address(klass_RInfo, Rtmp1, Address::times_1));
__ jcc(Assembler::equal, done);
// check for immediate negative hit
__ cmpl(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
__ jcc(Assembler::notEqual, *stub->entry());
// check for self
__ cmpptr(klass_RInfo, k_RInfo);
__ jcc(Assembler::equal, done);
// perform the fast part of the checking logic
__ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, &done, stub->entry(), NULL);
// call out-of-line instance of __ check_klass_subtype_slow_path(...):
__ push(klass_RInfo);
__ push(k_RInfo);
__ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
@ -1821,23 +1804,15 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
__ pop(dst);
__ jmp(done);
}
} else {
#else
{ // YUCK
}
else // next block is unconditional if LP64:
#endif // LP64
{
assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers");
__ movl(dst, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes()));
// See if we get an immediate positive hit
__ cmpptr(k_RInfo, Address(klass_RInfo, dst, Address::times_1));
__ jcc(Assembler::equal, one);
// check for immediate negative hit
__ cmpl(dst, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
__ jcc(Assembler::notEqual, zero);
// check for self
__ cmpptr(klass_RInfo, k_RInfo);
__ jcc(Assembler::equal, one);
// perform the fast part of the checking logic
__ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, dst, &one, &zero, NULL);
// call out-of-line instance of __ check_klass_subtype_slow_path(...):
__ push(klass_RInfo);
__ push(k_RInfo);
__ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));

View File

@ -1354,6 +1354,13 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
case slow_subtype_check_id:
{
// Typical calling sequence:
// __ push(klass_RInfo); // object klass or other subclass
// __ push(sup_k_RInfo); // array element klass or other superclass
// __ call(slow_subtype_check);
// Note that the subclass is pushed first, and is therefore deepest.
// Previous versions of this code reversed the names 'sub' and 'super'.
// This was operationally harmless but made the code unreadable.
enum layout {
rax_off, SLOT2(raxH_off)
rcx_off, SLOT2(rcxH_off)
@ -1361,9 +1368,10 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
rdi_off, SLOT2(rdiH_off)
// saved_rbp_off, SLOT2(saved_rbpH_off)
return_off, SLOT2(returnH_off)
sub_off, SLOT2(subH_off)
super_off, SLOT2(superH_off)
framesize
sup_k_off, SLOT2(sup_kH_off)
klass_off, SLOT2(superH_off)
framesize,
result_off = klass_off // deepest argument is also the return value
};
__ set_info("slow_subtype_check", dont_gc_arguments);
@ -1373,19 +1381,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ push(rax);
// This is called by pushing args and not with C abi
__ movptr(rsi, Address(rsp, (super_off) * VMRegImpl::stack_slot_size)); // super
__ movptr(rax, Address(rsp, (sub_off ) * VMRegImpl::stack_slot_size)); // sub
__ movptr(rdi,Address(rsi,sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()));
// since size is postive movl does right thing on 64bit
__ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
__ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
__ movptr(rsi, Address(rsp, (klass_off) * VMRegImpl::stack_slot_size)); // subclass
__ movptr(rax, Address(rsp, (sup_k_off) * VMRegImpl::stack_slot_size)); // superclass
Label miss;
__ repne_scan();
__ jcc(Assembler::notEqual, miss);
__ movptr(Address(rsi,sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()), rax);
__ movptr(Address(rsp, (super_off) * VMRegImpl::stack_slot_size), 1); // result
__ check_klass_subtype_slow_path(rsi, rax, rcx, rdi, NULL, &miss);
// fallthrough on success:
__ movptr(Address(rsp, (result_off) * VMRegImpl::stack_slot_size), 1); // result
__ pop(rax);
__ pop(rcx);
__ pop(rsi);
@ -1393,7 +1396,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ ret(0);
__ bind(miss);
__ movptr(Address(rsp, (super_off) * VMRegImpl::stack_slot_size), NULL_WORD); // result
__ movptr(Address(rsp, (result_off) * VMRegImpl::stack_slot_size), NULL_WORD); // result
__ pop(rax);
__ pop(rcx);
__ pop(rsi);

View File

@ -219,47 +219,16 @@ void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, R
// Resets EDI to locals. Register sub_klass cannot be any of the above.
void InterpreterMacroAssembler::gen_subtype_check( Register Rsub_klass, Label &ok_is_subtype ) {
assert( Rsub_klass != rax, "rax, holds superklass" );
assert( Rsub_klass != rcx, "rcx holds 2ndary super array length" );
assert( Rsub_klass != rdi, "rdi holds 2ndary super array scan ptr" );
Label not_subtype, loop;
assert( Rsub_klass != rcx, "used as a temp" );
assert( Rsub_klass != rdi, "used as a temp, restored from locals" );
// Profile the not-null value's klass.
profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, rdi
profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, reloads rdi
// Load the super-klass's check offset into ECX
movl( rcx, Address(rax, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes() ) );
// Load from the sub-klass's super-class display list, or a 1-word cache of
// the secondary superclass list, or a failing value with a sentinel offset
// if the super-klass is an interface or exceptionally deep in the Java
// hierarchy and we have to scan the secondary superclass list the hard way.
// See if we get an immediate positive hit
cmpptr( rax, Address(Rsub_klass,rcx,Address::times_1) );
jcc( Assembler::equal,ok_is_subtype );
// Do the check.
check_klass_subtype(Rsub_klass, rax, rcx, ok_is_subtype); // blows rcx
// Check for immediate negative hit
cmpl( rcx, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
jcc( Assembler::notEqual, not_subtype );
// Check for self
cmpptr( Rsub_klass, rax );
jcc( Assembler::equal, ok_is_subtype );
// Now do a linear scan of the secondary super-klass chain.
movptr( rdi, Address(Rsub_klass, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()) );
// EDI holds the objArrayOop of secondary supers.
movl( rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));// Load the array length
// Skip to start of data; also clear Z flag incase ECX is zero
addptr( rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT) );
// Scan ECX words at [EDI] for occurance of EAX
// Set NZ/Z based on last compare
repne_scan();
restore_locals(); // Restore EDI; Must not blow flags
// Not equal?
jcc( Assembler::notEqual, not_subtype );
// Must be equal but missed in cache. Update cache.
movptr( Address(Rsub_klass, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()), rax );
jmp( ok_is_subtype );
bind(not_subtype);
// Profile the failure of the check.
profile_typecheck_failed(rcx); // blows rcx
}

View File

@ -232,65 +232,13 @@ void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
assert(Rsub_klass != rcx, "rcx holds 2ndary super array length");
assert(Rsub_klass != rdi, "rdi holds 2ndary super array scan ptr");
Label not_subtype, not_subtype_pop, loop;
// Profile the not-null value's klass.
profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, rdi
profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, reloads rdi
// Load the super-klass's check offset into rcx
movl(rcx, Address(rax, sizeof(oopDesc) +
Klass::super_check_offset_offset_in_bytes()));
// Load from the sub-klass's super-class display list, or a 1-word
// cache of the secondary superclass list, or a failing value with a
// sentinel offset if the super-klass is an interface or
// exceptionally deep in the Java hierarchy and we have to scan the
// secondary superclass list the hard way. See if we get an
// immediate positive hit
cmpptr(rax, Address(Rsub_klass, rcx, Address::times_1));
jcc(Assembler::equal,ok_is_subtype);
// Do the check.
check_klass_subtype(Rsub_klass, rax, rcx, ok_is_subtype); // blows rcx
// Check for immediate negative hit
cmpl(rcx, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
jcc( Assembler::notEqual, not_subtype );
// Check for self
cmpptr(Rsub_klass, rax);
jcc(Assembler::equal, ok_is_subtype);
// Now do a linear scan of the secondary super-klass chain.
movptr(rdi, Address(Rsub_klass, sizeof(oopDesc) +
Klass::secondary_supers_offset_in_bytes()));
// rdi holds the objArrayOop of secondary supers.
// Load the array length
movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
// Skip to start of data; also clear Z flag incase rcx is zero
addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
// Scan rcx words at [rdi] for occurance of rax
// Set NZ/Z based on last compare
// this part is kind tricky, as values in supers array could be 32 or 64 bit wide
// and we store values in objArrays always encoded, thus we need to encode value
// before repne
if (UseCompressedOops) {
push(rax);
encode_heap_oop(rax);
repne_scanl();
// Not equal?
jcc(Assembler::notEqual, not_subtype_pop);
// restore heap oop here for movq
pop(rax);
} else {
repne_scan();
jcc(Assembler::notEqual, not_subtype);
}
// Must be equal but missed in cache. Update cache.
movptr(Address(Rsub_klass, sizeof(oopDesc) +
Klass::secondary_super_cache_offset_in_bytes()), rax);
jmp(ok_is_subtype);
bind(not_subtype_pop);
// restore heap oop here for miss
if (UseCompressedOops) pop(rax);
bind(not_subtype);
// Profile the failure of the check.
profile_typecheck_failed(rcx); // blows rcx
}

View File

@ -1310,81 +1310,51 @@ class StubGenerator: public StubCodeGenerator {
Address& super_check_offset_addr,
Address& super_klass_addr,
Register temp,
Label* L_success_ptr, Label* L_failure_ptr) {
Label* L_success, Label* L_failure) {
BLOCK_COMMENT("type_check:");
Label L_fallthrough;
bool fall_through_on_success = (L_success_ptr == NULL);
if (fall_through_on_success) {
L_success_ptr = &L_fallthrough;
} else {
L_failure_ptr = &L_fallthrough;
}
Label& L_success = *L_success_ptr;
Label& L_failure = *L_failure_ptr;
#define LOCAL_JCC(assembler_con, label_ptr) \
if (label_ptr != NULL) __ jcc(assembler_con, *(label_ptr)); \
else __ jcc(assembler_con, L_fallthrough) /*omit semi*/
// The following is a strange variation of the fast path which requires
// one less register, because needed values are on the argument stack.
// __ check_klass_subtype_fast_path(sub_klass, *super_klass*, temp,
// L_success, L_failure, NULL);
assert_different_registers(sub_klass, temp);
// a couple of useful fields in sub_klass:
int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::secondary_supers_offset_in_bytes());
int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::secondary_super_cache_offset_in_bytes());
Address secondary_supers_addr(sub_klass, ss_offset);
Address super_cache_addr( sub_klass, sc_offset);
// if the pointers are equal, we are done (e.g., String[] elements)
__ cmpptr(sub_klass, super_klass_addr);
__ jcc(Assembler::equal, L_success);
LOCAL_JCC(Assembler::equal, L_success);
// check the supertype display:
__ movl2ptr(temp, super_check_offset_addr);
Address super_check_addr(sub_klass, temp, Address::times_1, 0);
__ movptr(temp, super_check_addr); // load displayed supertype
__ cmpptr(temp, super_klass_addr); // test the super type
__ jcc(Assembler::equal, L_success);
LOCAL_JCC(Assembler::equal, L_success);
// if it was a primary super, we can just fail immediately
__ cmpl(super_check_offset_addr, sc_offset);
__ jcc(Assembler::notEqual, L_failure);
LOCAL_JCC(Assembler::notEqual, L_failure);
// Now do a linear scan of the secondary super-klass chain.
// This code is rarely used, so simplicity is a virtue here.
inc_counter_np(SharedRuntime::_partial_subtype_ctr);
{
// The repne_scan instruction uses fixed registers, which we must spill.
// (We need a couple more temps in any case.)
__ push(rax);
__ push(rcx);
__ push(rdi);
assert_different_registers(sub_klass, rax, rcx, rdi);
// The repne_scan instruction uses fixed registers, which will get spilled.
// We happen to know this works best when super_klass is in rax.
Register super_klass = temp;
__ movptr(super_klass, super_klass_addr);
__ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg,
L_success, L_failure);
__ movptr(rdi, secondary_supers_addr);
// Load the array length.
__ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
// Skip to start of data.
__ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
// Scan rcx words at [edi] for occurance of rax,
// Set NZ/Z based on last compare
__ movptr(rax, super_klass_addr);
__ repne_scan();
// Unspill the temp. registers:
__ pop(rdi);
__ pop(rcx);
__ pop(rax);
}
__ jcc(Assembler::notEqual, L_failure);
// Success. Cache the super we found and proceed in triumph.
__ movptr(temp, super_klass_addr); // note: rax, is dead
__ movptr(super_cache_addr, temp);
if (!fall_through_on_success)
__ jmp(L_success);
// Fall through on failure!
__ bind(L_fallthrough);
if (L_success == NULL) { BLOCK_COMMENT("L_success:"); }
if (L_failure == NULL) { BLOCK_COMMENT("L_failure:"); }
#undef LOCAL_JCC
}
//

View File

@ -2091,66 +2091,9 @@ class StubGenerator: public StubCodeGenerator {
Label L_miss;
// a couple of useful fields in sub_klass:
int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::secondary_supers_offset_in_bytes());
int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::secondary_super_cache_offset_in_bytes());
Address secondary_supers_addr(sub_klass, ss_offset);
Address super_cache_addr( sub_klass, sc_offset);
// if the pointers are equal, we are done (e.g., String[] elements)
__ cmpptr(super_klass, sub_klass);
__ jcc(Assembler::equal, L_success);
// check the supertype display:
Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
__ cmpptr(super_klass, super_check_addr); // test the super type
__ jcc(Assembler::equal, L_success);
// if it was a primary super, we can just fail immediately
__ cmpl(super_check_offset, sc_offset);
__ jcc(Assembler::notEqual, L_miss);
// Now do a linear scan of the secondary super-klass chain.
// The repne_scan instruction uses fixed registers, which we must spill.
// (We need a couple more temps in any case.)
// This code is rarely used, so simplicity is a virtue here.
inc_counter_np(SharedRuntime::_partial_subtype_ctr);
{
__ push(rax);
__ push(rcx);
__ push(rdi);
assert_different_registers(sub_klass, super_klass, rax, rcx, rdi);
__ movptr(rdi, secondary_supers_addr);
// Load the array length.
__ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
// Skip to start of data.
__ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
// Scan rcx words at [rdi] for occurance of rax
// Set NZ/Z based on last compare
__ movptr(rax, super_klass);
if (UseCompressedOops) {
// Compare against compressed form. Don't need to uncompress because
// looks like orig rax is restored in popq below.
__ encode_heap_oop(rax);
__ repne_scanl();
} else {
__ repne_scan();
}
// Unspill the temp. registers:
__ pop(rdi);
__ pop(rcx);
__ pop(rax);
__ jcc(Assembler::notEqual, L_miss);
}
// Success. Cache the super we found and proceed in triumph.
__ movptr(super_cache_addr, super_klass); // note: rax is dead
__ jmp(L_success);
__ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL,
super_check_offset);
__ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
// Fall through on failure!
__ BIND(L_miss);

View File

@ -1692,26 +1692,15 @@ encode %{
Register Reax = as_Register(EAX_enc); // super class
Register Recx = as_Register(ECX_enc); // killed
Register Resi = as_Register(ESI_enc); // sub class
Label hit, miss;
Label miss;
MacroAssembler _masm(&cbuf);
// Compare super with sub directly, since super is not in its own SSA.
// The compiler used to emit this test, but we fold it in here,
// to allow platform-specific tweaking on sparc.
__ cmpptr(Reax, Resi);
__ jcc(Assembler::equal, hit);
#ifndef PRODUCT
__ incrementl(ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr));
#endif //PRODUCT
__ movptr(Redi,Address(Resi,sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()));
__ movl(Recx,Address(Redi,arrayOopDesc::length_offset_in_bytes()));
__ addptr(Redi,arrayOopDesc::base_offset_in_bytes(T_OBJECT));
__ repne_scan();
__ jcc(Assembler::notEqual, miss);
__ movptr(Address(Resi,sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()),Reax);
__ bind(hit);
if( $primary )
__ xorptr(Redi,Redi);
__ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
NULL, &miss,
/*set_cond_codes:*/ true);
if ($primary) {
__ xorptr(Redi, Redi);
}
__ bind(miss);
%}
@ -12566,15 +12555,12 @@ instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXReg
effect( KILL rcx, KILL cr );
ins_cost(1100); // slightly larger than the next version
format %{ "CMPL EAX,ESI\n\t"
"JEQ,s hit\n\t"
"MOV EDI,[$sub+Klass::secondary_supers]\n\t"
format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
"MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
"ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
"REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
"JNE,s miss\t\t# Missed: EDI not-zero\n\t"
"MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
"hit:\n\t"
"XOR $result,$result\t\t Hit: EDI zero\n\t"
"miss:\t" %}
@ -12588,9 +12574,7 @@ instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super,
effect( KILL rcx, KILL result );
ins_cost(1000);
format %{ "CMPL EAX,ESI\n\t"
"JEQ,s miss\t# Actually a hit; we are done.\n\t"
"MOV EDI,[$sub+Klass::secondary_supers]\n\t"
format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
"MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
"ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
"REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"

View File

@ -2575,43 +2575,13 @@ encode %{
Register Rrax = as_Register(RAX_enc); // super class
Register Rrcx = as_Register(RCX_enc); // killed
Register Rrsi = as_Register(RSI_enc); // sub class
Label hit, miss, cmiss;
Label miss;
const bool set_cond_codes = true;
MacroAssembler _masm(&cbuf);
// Compare super with sub directly, since super is not in its own SSA.
// The compiler used to emit this test, but we fold it in here,
// to allow platform-specific tweaking on sparc.
__ cmpptr(Rrax, Rrsi);
__ jcc(Assembler::equal, hit);
#ifndef PRODUCT
__ lea(Rrcx, ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr));
__ incrementl(Address(Rrcx, 0));
#endif //PRODUCT
__ movptr(Rrdi, Address(Rrsi,
sizeof(oopDesc) +
Klass::secondary_supers_offset_in_bytes()));
__ movl(Rrcx, Address(Rrdi, arrayOopDesc::length_offset_in_bytes()));
__ addptr(Rrdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
if (UseCompressedOops) {
__ push(Rrax);
__ encode_heap_oop(Rrax);
__ repne_scanl();
__ pop(Rrax);
__ jccb(Assembler::notEqual, miss);
__ movptr(Address(Rrsi,
sizeof(oopDesc) +
Klass::secondary_super_cache_offset_in_bytes()),
Rrax);
__ jmp(hit);
} else {
__ repne_scan();
__ jccb(Assembler::notEqual, miss);
__ movptr(Address(Rrsi,
sizeof(oopDesc) +
Klass::secondary_super_cache_offset_in_bytes()),
Rrax);
}
__ bind(hit);
__ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
NULL, &miss,
/*set_cond_codes:*/ true);
if ($primary) {
__ xorptr(Rrdi, Rrdi);
}
@ -12179,15 +12149,12 @@ instruct partialSubtypeCheck(rdi_RegP result,
effect(KILL rcx, KILL cr);
ins_cost(1100); // slightly larger than the next version
format %{ "cmpq rax, rsi\n\t"
"jeq,s hit\n\t"
"movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
format %{ "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
"movl rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
"addq rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
"repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
"jne,s miss\t\t# Missed: rdi not-zero\n\t"
"movq [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
"hit:\n\t"
"xorq $result, $result\t\t Hit: rdi zero\n\t"
"miss:\t" %}
@ -12205,9 +12172,7 @@ instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
effect(KILL rcx, KILL result);
ins_cost(1000);
format %{ "cmpq rax, rsi\n\t"
"jeq,s miss\t# Actually a hit; we are done.\n\t"
"movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
format %{ "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
"movl rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
"addq rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
"repne scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"

View File

@ -2277,7 +2277,7 @@ Node* GraphKit::gen_subtype_check(Node* subklass, Node* superklass) {
r_not_subtype->init_req(1, _gvn.transform( new (C, 1) IfTrueNode (iff2) ) );
set_control( _gvn.transform( new (C, 1) IfFalseNode(iff2) ) );
// Check for self. Very rare to get here, but its taken 1/3 the time.
// Check for self. Very rare to get here, but it is taken 1/3 the time.
// No performance impact (too rare) but allows sharing of secondary arrays
// which has some footprint reduction.
Node *cmp3 = _gvn.transform( new (C, 3) CmpPNode( subklass, superklass ) );
@ -2286,11 +2286,27 @@ Node* GraphKit::gen_subtype_check(Node* subklass, Node* superklass) {
r_ok_subtype->init_req(2, _gvn.transform( new (C, 1) IfTrueNode ( iff3 ) ) );
set_control( _gvn.transform( new (C, 1) IfFalseNode( iff3 ) ) );
// -- Roads not taken here: --
// We could also have chosen to perform the self-check at the beginning
// of this code sequence, as the assembler does. This would not pay off
// the same way, since the optimizer, unlike the assembler, can perform
// static type analysis to fold away many successful self-checks.
// Non-foldable self checks work better here in second position, because
// the initial primary superclass check subsumes a self-check for most
// types. An exception would be a secondary type like array-of-interface,
// which does not appear in its own primary supertype display.
// Finally, we could have chosen to move the self-check into the
// PartialSubtypeCheckNode, and from there out-of-line in a platform
// dependent manner. But it is worthwhile to have the check here,
// where it can be perhaps be optimized. The cost in code space is
// small (register compare, branch).
// Now do a linear scan of the secondary super-klass array. Again, no real
// performance impact (too rare) but it's gotta be done.
// (The stub also contains the self-check of subklass == superklass.
// Since the code is rarely used, there is no penalty for moving it
// out of line, and it can only improve I-cache density.)
// out of line, and it can only improve I-cache density.
// The decision to inline or out-of-line this final check is platform
// dependent, and is found in the AD file definition of PartialSubtypeCheck.
Node* psc = _gvn.transform(
new (C, 3) PartialSubtypeCheckNode(control(), subklass, superklass) );