8305959: x86: Improve itable_stub

Reviewed-by: phh, shade, aph
This commit is contained in:
Boris Ulasevich 2023-06-07 11:27:52 +00:00
parent 9233dcc838
commit 8cdd95e8a2
5 changed files with 271 additions and 45 deletions

@ -4300,6 +4300,125 @@ void MacroAssembler::lookup_interface_method(Register recv_klass,
}
}
// Look up the method for a megamorphic invokeinterface call in a single pass over itable:
// - check recv_klass (actual object class) is a subtype of resolved_klass from CompiledICHolder
// - find a holder_klass (class that implements the method) vtable offset and get the method from vtable by index
// The target method is determined by <holder_klass, itable_index>.
// The receiver klass is in recv_klass.
// On success, the result will be in method_result, and execution falls through.
// On failure, execution transfers to the given label.
void MacroAssembler::lookup_interface_method_stub(Register recv_klass,
Register holder_klass,
Register resolved_klass,
Register method_result,
Register scan_temp,
Register temp_reg2,
Register receiver,
int itable_index,
Label& L_no_such_interface) {
assert_different_registers(recv_klass, method_result, holder_klass, resolved_klass, scan_temp, temp_reg2, receiver);
Register temp_itbl_klass = method_result;
Register temp_reg = (temp_reg2 == noreg ? recv_klass : temp_reg2); // reuse recv_klass register on 32-bit x86 impl
int vtable_base = in_bytes(Klass::vtable_start_offset());
int itentry_off = in_bytes(itableMethodEntry::method_offset());
int scan_step = itableOffsetEntry::size() * wordSize;
int vte_size = vtableEntry::size_in_bytes();
int ioffset = in_bytes(itableOffsetEntry::interface_offset());
int ooffset = in_bytes(itableOffsetEntry::offset_offset());
Address::ScaleFactor times_vte_scale = Address::times_ptr;
assert(vte_size == wordSize, "adjust times_vte_scale");
Label L_loop_scan_resolved_entry, L_resolved_found, L_holder_found;
// temp_itbl_klass = recv_klass.itable[0]
// scan_temp = &recv_klass.itable[0] + step
movl(scan_temp, Address(recv_klass, Klass::vtable_length_offset()));
movptr(temp_itbl_klass, Address(recv_klass, scan_temp, times_vte_scale, vtable_base + ioffset));
lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base + ioffset + scan_step));
xorptr(temp_reg, temp_reg);
// Initial checks:
// - if (holder_klass != resolved_klass), go to "scan for resolved"
// - if (itable[0] == 0), no such interface
// - if (itable[0] == holder_klass), shortcut to "holder found"
cmpptr(holder_klass, resolved_klass);
jccb(Assembler::notEqual, L_loop_scan_resolved_entry);
testptr(temp_itbl_klass, temp_itbl_klass);
jccb(Assembler::zero, L_no_such_interface);
cmpptr(holder_klass, temp_itbl_klass);
jccb(Assembler::equal, L_holder_found);
// Loop: Look for holder_klass record in itable
// do {
// tmp = itable[index];
// index += step;
// if (tmp == holder_klass) {
// goto L_holder_found; // Found!
// }
// } while (tmp != 0);
// goto L_no_such_interface // Not found.
Label L_scan_holder;
bind(L_scan_holder);
movptr(temp_itbl_klass, Address(scan_temp, 0));
addptr(scan_temp, scan_step);
cmpptr(holder_klass, temp_itbl_klass);
jccb(Assembler::equal, L_holder_found);
testptr(temp_itbl_klass, temp_itbl_klass);
jccb(Assembler::notZero, L_scan_holder);
jmpb(L_no_such_interface);
// Loop: Look for resolved_class record in itable
// do {
// tmp = itable[index];
// index += step;
// if (tmp == holder_klass) {
// // Also check if we have met a holder klass
// holder_tmp = itable[index-step-ioffset];
// }
// if (tmp == resolved_klass) {
// goto L_resolved_found; // Found!
// }
// } while (tmp != 0);
// goto L_no_such_interface // Not found.
//
Label L_loop_scan_resolved;
bind(L_loop_scan_resolved);
movptr(temp_itbl_klass, Address(scan_temp, 0));
addptr(scan_temp, scan_step);
bind(L_loop_scan_resolved_entry);
cmpptr(holder_klass, temp_itbl_klass);
cmovl(Assembler::equal, temp_reg, Address(scan_temp, ooffset - ioffset - scan_step));
cmpptr(resolved_klass, temp_itbl_klass);
jccb(Assembler::equal, L_resolved_found);
testptr(temp_itbl_klass, temp_itbl_klass);
jccb(Assembler::notZero, L_loop_scan_resolved);
jmpb(L_no_such_interface);
Label L_ready;
// See if we already have a holder klass. If not, go and scan for it.
bind(L_resolved_found);
testptr(temp_reg, temp_reg);
jccb(Assembler::zero, L_scan_holder);
jmpb(L_ready);
bind(L_holder_found);
movl(temp_reg, Address(scan_temp, ooffset - ioffset - scan_step));
// Finally, temp_reg contains holder_klass vtable offset
bind(L_ready);
assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
if (temp_reg2 == noreg) { // recv_klass register is clobbered for 32-bit x86 impl
load_klass(scan_temp, receiver, noreg);
movptr(method_result, Address(scan_temp, temp_reg, Address::times_1, itable_index * wordSize + itentry_off));
} else {
movptr(method_result, Address(recv_klass, temp_reg, Address::times_1, itable_index * wordSize + itentry_off));
}
}
// virtual method calling
void MacroAssembler::lookup_virtual_method(Register recv_klass,

@ -603,6 +603,16 @@ public:
Label& no_such_interface,
bool return_method = true);
void lookup_interface_method_stub(Register recv_klass,
Register holder_klass,
Register resolved_klass,
Register method_result,
Register scan_temp,
Register temp_reg2,
Register receiver,
int itable_index,
Label& L_no_such_interface);
// virtual method calling
void lookup_virtual_method(Register recv_klass,
RegisterOrConstant vtable_index,

@ -179,14 +179,16 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
// rax: CompiledICHolder
// rcx: Receiver
// Most registers are in use; we'll use rax, rbx, rsi, rdi
// Most registers are in use; we'll use rax, rbx, rcx, rdx, rsi, rdi
// (If we need to make rsi, rdi callee-save, do a push/pop here.)
const Register recv_klass_reg = rsi;
const Register holder_klass_reg = rax; // declaring interface klass (DECC)
const Register resolved_klass_reg = rbx; // resolved interface klass (REFC)
const Register temp_reg = rdi;
const Register resolved_klass_reg = rdi; // resolved interface klass (REFC)
const Register temp_reg = rdx;
const Register method = rbx;
const Register icholder_reg = rax;
const Register receiver = rcx;
const Register icholder_reg = rax;
__ movptr(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset()));
__ movptr(holder_klass_reg, Address(icholder_reg, CompiledICHolder::holder_metadata_offset()));
@ -198,35 +200,26 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
__ load_klass(recv_klass_reg, rcx, noreg);
start_pc = __ pc();
__ push(rdx); // temp_reg
// Receiver subtype check against REFC.
// Destroys recv_klass_reg value.
__ lookup_interface_method(// inputs: rec. class, interface
recv_klass_reg, resolved_klass_reg, noreg,
// outputs: scan temp. reg1, scan temp. reg2
recv_klass_reg, temp_reg,
L_no_such_interface,
/*return_method=*/false);
const ptrdiff_t typecheckSize = __ pc() - start_pc;
start_pc = __ pc();
// Get selected method from declaring class and itable index
const Register method = rbx;
__ load_klass(recv_klass_reg, rcx, noreg); // restore recv_klass_reg
__ lookup_interface_method(// inputs: rec. class, interface, itable index
recv_klass_reg, holder_klass_reg, itable_index,
// outputs: method, scan temp. reg
method, temp_reg,
L_no_such_interface);
__ lookup_interface_method_stub(recv_klass_reg, // input
holder_klass_reg, // input
resolved_klass_reg, // input
method, // output
temp_reg,
noreg,
receiver, // input (x86_32 only: to restore recv_klass value)
itable_index,
L_no_such_interface);
const ptrdiff_t lookupSize = __ pc() - start_pc;
// We expect we need index_dependent_slop extra bytes. Reason:
// The emitted code in lookup_interface_method changes when itable_index exceeds 31.
// For windows, a narrow estimate was found to be 104. Other OSes not tested.
const ptrdiff_t estimate = 104;
const ptrdiff_t codesize = typecheckSize + lookupSize + index_dependent_slop;
const ptrdiff_t codesize = lookupSize + index_dependent_slop;
slop_delta = (int)(estimate - codesize);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
@ -246,6 +239,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
}
#endif // ASSERT
__ pop(rdx);
address ame_addr = __ pc();
__ jmp(Address(method, Method::from_compiled_offset()));
@ -255,6 +249,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
// We force resolving of the call site by jumping to the "handle
// wrong method" stub, and so let the interpreter runtime do all the
// dirty work.
__ pop(rdx);
__ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
masm->flush();

@ -175,10 +175,12 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
// (various calling sequences use r[cd]x, r[sd]i, r[89]; stay away from them)
const Register recv_klass_reg = r10;
const Register holder_klass_reg = rax; // declaring interface klass (DECC)
const Register resolved_klass_reg = rbx; // resolved interface klass (REFC)
const Register resolved_klass_reg = r14; // resolved interface klass (REFC)
const Register temp_reg = r11;
const Register temp_reg2 = r13;
const Register method = rbx;
const Register icholder_reg = rax;
const Register icholder_reg = rax;
__ movptr(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset()));
__ movptr(holder_klass_reg, Address(icholder_reg, CompiledICHolder::holder_metadata_offset()));
@ -192,25 +194,16 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
start_pc = __ pc();
// Receiver subtype check against REFC.
// Destroys recv_klass_reg value.
__ lookup_interface_method(// inputs: rec. class, interface
recv_klass_reg, resolved_klass_reg, noreg,
// outputs: scan temp. reg1, scan temp. reg2
recv_klass_reg, temp_reg,
L_no_such_interface,
/*return_method=*/false);
const ptrdiff_t typecheckSize = __ pc() - start_pc;
start_pc = __ pc();
// Get selected method from declaring class and itable index
const Register method = rbx;
__ load_klass(recv_klass_reg, j_rarg0, temp_reg); // restore recv_klass_reg
__ lookup_interface_method(// inputs: rec. class, interface, itable index
recv_klass_reg, holder_klass_reg, itable_index,
// outputs: method, scan temp. reg
method, temp_reg,
L_no_such_interface);
__ lookup_interface_method_stub(recv_klass_reg, // input
holder_klass_reg, // input
resolved_klass_reg, // input
method, // output
temp_reg,
temp_reg2,
noreg,
itable_index,
L_no_such_interface);
const ptrdiff_t lookupSize = __ pc() - start_pc;
@ -218,7 +211,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
// The emitted code in lookup_interface_method changes when itable_index exceeds 15.
// For linux, a very narrow estimate would be 112, but Solaris requires some more space (130).
const ptrdiff_t estimate = 136;
const ptrdiff_t codesize = typecheckSize + lookupSize + index_dependent_slop;
const ptrdiff_t codesize = lookupSize + index_dependent_slop;
slop_delta = (int)(estimate - codesize);
slop_bytes += slop_delta;
assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);

@ -24,6 +24,7 @@ package org.openjdk.bench.vm.compiler;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.CompilerControl;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
@ -52,6 +53,14 @@ public class InterfaceCalls {
public int getIntSecond();
}
interface FirstInterfaceExt extends FirstInterface {
default int getIntFirst() { return 44; }
}
interface FirstInterfaceExtExt extends FirstInterfaceExt {
default int getIntFirst() { return 45; }
}
class FirstClass implements FirstInterface, SecondInterface {
public int getIntFirst() {
return 1;
@ -102,8 +111,80 @@ public class InterfaceCalls {
}
}
class FirstClassDontInline implements FirstInterface {
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int getIntFirst() {
return -1;
}
}
class SecondClassDontInline implements FirstInterface {
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int getIntFirst() {
return -2;
}
}
class ThirdClassDontInline implements FirstInterface {
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int getIntFirst() {
return -3;
}
}
class FourthClassDontInline implements FirstInterface {
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int getIntFirst() {
return -4;
}
}
class FifthClassDontInline implements FirstInterface {
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int getIntFirst() {
return -5;
}
}
class FirstClassDontInlineExtExt implements FirstInterfaceExtExt {
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int getIntFirst() {
return -1;
}
}
class SecondClassDontInlineExtExt implements FirstInterfaceExtExt {
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int getIntFirst() {
return -2;
}
}
class ThirdClassDontInlineExtExt implements FirstInterfaceExtExt {
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int getIntFirst() {
return -3;
}
}
class FourthClassDontInlineExtExt implements FirstInterfaceExtExt {
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int getIntFirst() {
return -4;
}
}
class FifthClassDontInlineExtExt implements FirstInterfaceExtExt {
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
public int getIntFirst() {
return -5;
}
}
final int asLength = 5;
public FirstInterface[] as = new FirstInterface[asLength];
public FirstInterface[] noninlined = new FirstInterface[asLength];
public FirstInterfaceExtExt[] noninlinedextext = new FirstInterfaceExtExt[asLength];
@Setup
@ -113,6 +194,18 @@ public class InterfaceCalls {
as[2] = new ThirdClass();
as[3] = new FourthClass();
as[4] = new FifthClass();
noninlined[0] = new FirstClassDontInline();
noninlined[1] = new SecondClassDontInline();
noninlined[2] = new ThirdClassDontInline();
noninlined[3] = new FourthClassDontInline();
noninlined[4] = new FifthClassDontInline();
noninlinedextext[0] = new FirstClassDontInlineExtExt();
noninlinedextext[1] = new SecondClassDontInlineExtExt();
noninlinedextext[2] = new ThirdClassDontInlineExtExt();
noninlinedextext[3] = new FourthClassDontInlineExtExt();
noninlinedextext[4] = new FifthClassDontInlineExtExt();
}
/**
@ -126,6 +219,22 @@ public class InterfaceCalls {
int l = 0;
/** Tests single base interface method call */
@Benchmark
public int testIfaceCall(Blackhole bh) {
FirstInterface ai = noninlined[l];
l = ++ l % asLength;
return ai.getIntFirst();
}
/** Tests extended interface method call */
@Benchmark
public int testIfaceExtCall(Blackhole bh) {
FirstInterfaceExtExt ai = noninlinedextext[l];
l = ++ l % asLength;
return ai.getIntFirst();
}
/**
* Interface call address computation within loop but the receiver preexists
* the loop and the ac can be moved outside of the loop