8144448: Avoid placing CTI immediately following or preceding RDPC instruction

Best practice for new SPARC CPUs

Reviewed-by: kvn
This commit is contained in:
Patric Hedlin 2017-06-27 15:46:16 +02:00 committed by Nils Eliasson
parent 065a8981f5
commit 6a9aa18f63
9 changed files with 1546 additions and 852 deletions

View File

@ -26,6 +26,36 @@
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "assembler_sparc.hpp"
int AbstractAssembler::code_fill_byte() {
return 0x00; // illegal instruction 0x00000000
}
#ifdef VALIDATE_PIPELINE
/* Walk over the current code section and verify that there are no obvious
* pipeline hazards exposed in the code generated.
*/
void Assembler::validate_no_pipeline_hazards() {
const CodeSection* csect = code_section();
address addr0 = csect->start();
address addrN = csect->end();
uint32_t prev = 0;
assert((addrN - addr0) % BytesPerInstWord == 0, "must be");
for (address pc = addr0; pc != addrN; pc += BytesPerInstWord) {
uint32_t insn = *reinterpret_cast<uint32_t*>(pc);
// 1. General case: No CTI immediately after other CTI
assert(!(is_cti(prev) && is_cti(insn)), "CTI-CTI not allowed.");
// 2. Special case: No CTI immediately after/before RDPC
assert(!(is_cti(prev) && is_rdpc(insn)), "CTI-RDPC not allowed.");
assert(!(is_rdpc(prev) && is_cti(insn)), "RDPC-CTI not allowed.");
prev = insn;
}
}
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -651,9 +651,9 @@ void MacroAssembler::card_table_write(jbyte* byte_map_base,
void MacroAssembler::internal_sethi(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) {
address save_pc;
int shiftcnt;
# ifdef CHECK_DELAY
assert_not_delayed((char*) "cannot put two instructions in delay slot");
# endif
#ifdef VALIDATE_PIPELINE
assert_no_delay("Cannot put two instructions in delay-slot.");
#endif
v9_dep();
save_pc = pc();
@ -752,7 +752,7 @@ void MacroAssembler::internal_set(const AddressLiteral& addrlit, Register d, boo
return;
}
}
assert_not_delayed((char*) "cannot put two instructions in delay slot");
assert_no_delay("Cannot put two instructions in delay-slot.");
internal_sethi(addrlit, d, ForceRelocatable);
if (ForceRelocatable || addrlit.rspec().type() != relocInfo::none || addrlit.low10() != 0) {
add(d, addrlit.low10(), d, addrlit.rspec());

View File

@ -662,9 +662,6 @@ class MacroAssembler : public Assembler {
inline void fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
inline void fbp( Condition c, bool a, CC cc, Predict p, Label& L );
// get PC the best way
inline int get_pc( Register d );
// Sparc shorthands(pp 85, V8 manual, pp 289 V9 manual)
inline void cmp( Register s1, Register s2 );
inline void cmp( Register s1, int simm13a );
@ -1396,7 +1393,7 @@ public:
void movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2);
void movftoi_revbytes(FloatRegister src, Register dst, Register tmp1, Register tmp2);
// CRC32 code for java.util.zip.CRC32::updateBytes0() instrinsic.
// CRC32 code for java.util.zip.CRC32::updateBytes0() intrinsic.
void kernel_crc32(Register crc, Register buf, Register len, Register table);
// Fold 128-bit data chunk
void fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register buf, int offset);
@ -1404,7 +1401,7 @@ public:
// Fold 8-bit data
void fold_8bit_crc32(Register xcrc, Register table, Register xtmp, Register tmp);
void fold_8bit_crc32(Register crc, Register table, Register tmp);
// CRC32C code for java.util.zip.CRC32C::updateBytes/updateDirectByteBuffer instrinsic.
// CRC32C code for java.util.zip.CRC32C::updateBytes/updateDirectByteBuffer intrinsic.
void kernel_crc32c(Register crc, Register buf, Register len, Register table);
};

View File

@ -185,7 +185,8 @@ inline void MacroAssembler::br( Condition c, bool a, Predict p, address d, reloc
}
inline void MacroAssembler::br( Condition c, bool a, Predict p, Label& L ) {
insert_nop_after_cbcond();
// See note[+] on 'avoid_pipeline_stalls()', in "assembler_sparc.inline.hpp".
avoid_pipeline_stall();
br(c, a, p, target(L));
}
@ -197,7 +198,7 @@ inline void MacroAssembler::brx( Condition c, bool a, Predict p, address d, relo
}
inline void MacroAssembler::brx( Condition c, bool a, Predict p, Label& L ) {
insert_nop_after_cbcond();
avoid_pipeline_stall();
brx(c, a, p, target(L));
}
@ -219,7 +220,7 @@ inline void MacroAssembler::fb( Condition c, bool a, Predict p, address d, reloc
}
inline void MacroAssembler::fb( Condition c, bool a, Predict p, Label& L ) {
insert_nop_after_cbcond();
avoid_pipeline_stall();
fb(c, a, p, target(L));
}
@ -268,13 +269,12 @@ inline void MacroAssembler::call( address d, RelocationHolder const& rspec ) {
}
}
inline void MacroAssembler::call( Label& L, relocInfo::relocType rt ) {
insert_nop_after_cbcond();
MacroAssembler::call( target(L), rt);
inline void MacroAssembler::call( Label& L, relocInfo::relocType rt ) {
avoid_pipeline_stall();
MacroAssembler::call(target(L), rt);
}
inline void MacroAssembler::callr( Register s1, Register s2 ) { jmpl( s1, s2, O7 ); }
inline void MacroAssembler::callr( Register s1, int simm13a, RelocationHolder const& rspec ) { jmpl( s1, simm13a, O7, rspec); }
@ -304,13 +304,6 @@ inline void MacroAssembler::retl( bool trace ) {
}
}
// clobbers o7 on V8!!
// returns delta from gotten pc to addr after
inline int MacroAssembler::get_pc( Register d ) {
int x = offset();
rdpc(d);
return offset() - x;
}
inline void MacroAssembler::cmp( Register s1, Register s2 ) { subcc( s1, s2, G0 ); }
inline void MacroAssembler::cmp( Register s1, int simm13a ) { subcc( s1, simm13a, G0 ); }

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -27,7 +27,7 @@
// Adapters
enum /* platform_dependent_constants */ {
adapter_code_size = NOT_LP64(23000 DEBUG_ONLY(+ 40000)) LP64_ONLY(35000 DEBUG_ONLY(+ 50000))
adapter_code_size = 35000 DEBUG_ONLY(+ 50000)
};
// Additional helper methods for MethodHandles code generation:

View File

@ -67,11 +67,8 @@ class NativeInstruction VALUE_OBJ_CLASS_SPEC {
bool is_illegal();
bool is_zombie() {
int x = long_at(0);
return is_op3(x,
Assembler::ldsw_op3,
Assembler::ldst_op)
&& Assembler::inv_rs1(x) == G0
&& Assembler::inv_rd(x) == O7;
return (is_op3(x, Assembler::ldsw_op3, Assembler::ldst_op) &&
inv_rs1(x) == G0 && inv_rd(x) == O7);
}
bool is_ic_miss_trap(); // Inline-cache uses a trap to detect a miss
bool is_return() {
@ -129,29 +126,11 @@ class NativeInstruction VALUE_OBJ_CLASS_SPEC {
bool is_load_store_with_small_offset(Register reg);
public:
#ifdef ASSERT
static int rdpc_instruction() { return Assembler::op(Assembler::arith_op ) | Assembler::op3(Assembler::rdreg_op3) | Assembler::u_field(5, 18, 14) | Assembler::rd(O7); }
#else
// Temporary fix: in optimized mode, u_field is a macro for efficiency reasons (see Assembler::u_field) - needs to be fixed
static int rdpc_instruction() { return Assembler::op(Assembler::arith_op ) | Assembler::op3(Assembler::rdreg_op3) | u_field(5, 18, 14) | Assembler::rd(O7); }
#endif
static int nop_instruction() { return Assembler::op(Assembler::branch_op) | Assembler::op2(Assembler::sethi_op2); }
static int illegal_instruction(); // the output of __ breakpoint_trap()
static int call_instruction(address destination, address pc) { return Assembler::op(Assembler::call_op) | Assembler::wdisp((intptr_t)destination, (intptr_t)pc, 30); }
static int branch_instruction(Assembler::op2s op2val, Assembler::Condition c, bool a) {
return Assembler::op(Assembler::branch_op) | Assembler::op2(op2val) | Assembler::annul(a) | Assembler::cond(c);
}
static int op3_instruction(Assembler::ops opval, Register rd, Assembler::op3s op3val, Register rs1, int simm13a) {
return Assembler::op(opval) | Assembler::rd(rd) | Assembler::op3(op3val) | Assembler::rs1(rs1) | Assembler::immed(true) | Assembler::simm(simm13a, 13);
}
static int sethi_instruction(Register rd, int imm22a) {
return Assembler::op(Assembler::branch_op) | Assembler::rd(rd) | Assembler::op2(Assembler::sethi_op2) | Assembler::hi22(imm22a);
}
protected:
protected:
address addr_at(int offset) const { return address(this) + offset; }
int long_at(int offset) const { return *(int*)addr_at(offset); }
void set_long_at(int offset, int i); /* deals with I-cache */

View File

@ -1072,7 +1072,13 @@ void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
__ rdpc(r);
if (disp != 0) {
if (disp == 0) {
// Emitting an additional 'nop' instruction in order not to cause a code
// size adjustment in the code following the table setup (if the instruction
// immediately following after this section is a CTI).
__ nop();
}
else {
assert(r != O7, "need temporary");
__ sub(r, __ ensure_simm13_or_reg(disp, O7), r);
}
@ -8624,7 +8630,7 @@ instruct branch_short(label labl) %{
predicate(UseCBCond);
effect(USE labl);
size(4);
size(4); // Assuming no NOP inserted.
ins_cost(BRANCH_COST);
format %{ "BA $labl\t! short branch" %}
ins_encode %{
@ -8965,7 +8971,7 @@ instruct cmpI_reg_branch_short(cmpOp cmp, iRegI op1, iRegI op2, label labl, flag
predicate(UseCBCond);
effect(USE labl, KILL icc);
size(4);
size(4); // Assuming no NOP inserted.
ins_cost(BRANCH_COST);
format %{ "CWB$cmp $op1,$op2,$labl\t! int" %}
ins_encode %{
@ -8983,7 +8989,7 @@ instruct cmpI_imm_branch_short(cmpOp cmp, iRegI op1, immI5 op2, label labl, flag
predicate(UseCBCond);
effect(USE labl, KILL icc);
size(4);
size(4); // Assuming no NOP inserted.
ins_cost(BRANCH_COST);
format %{ "CWB$cmp $op1,$op2,$labl\t! int" %}
ins_encode %{
@ -9001,7 +9007,7 @@ instruct cmpU_reg_branch_short(cmpOpU cmp, iRegI op1, iRegI op2, label labl, fla
predicate(UseCBCond);
effect(USE labl, KILL icc);
size(4);
size(4); // Assuming no NOP inserted.
ins_cost(BRANCH_COST);
format %{ "CWB$cmp $op1,$op2,$labl\t! unsigned" %}
ins_encode %{
@ -9019,7 +9025,7 @@ instruct cmpU_imm_branch_short(cmpOpU cmp, iRegI op1, immI5 op2, label labl, fla
predicate(UseCBCond);
effect(USE labl, KILL icc);
size(4);
size(4); // Assuming no NOP inserted.
ins_cost(BRANCH_COST);
format %{ "CWB$cmp $op1,$op2,$labl\t! unsigned" %}
ins_encode %{
@ -9037,7 +9043,7 @@ instruct cmpL_reg_branch_short(cmpOp cmp, iRegL op1, iRegL op2, label labl, flag
predicate(UseCBCond);
effect(USE labl, KILL xcc);
size(4);
size(4); // Assuming no NOP inserted.
ins_cost(BRANCH_COST);
format %{ "CXB$cmp $op1,$op2,$labl\t! long" %}
ins_encode %{
@ -9055,7 +9061,7 @@ instruct cmpL_imm_branch_short(cmpOp cmp, iRegL op1, immL5 op2, label labl, flag
predicate(UseCBCond);
effect(USE labl, KILL xcc);
size(4);
size(4); // Assuming no NOP inserted.
ins_cost(BRANCH_COST);
format %{ "CXB$cmp $op1,$op2,$labl\t! long" %}
ins_encode %{
@ -9074,7 +9080,7 @@ instruct cmpP_reg_branch_short(cmpOpP cmp, iRegP op1, iRegP op2, label labl, fla
predicate(UseCBCond);
effect(USE labl, KILL pcc);
size(4);
size(4); // Assuming no NOP inserted.
ins_cost(BRANCH_COST);
format %{ "CXB$cmp $op1,$op2,$labl\t! ptr" %}
ins_encode %{
@ -9092,7 +9098,7 @@ instruct cmpP_null_branch_short(cmpOpP cmp, iRegP op1, immP0 null, label labl, f
predicate(UseCBCond);
effect(USE labl, KILL pcc);
size(4);
size(4); // Assuming no NOP inserted.
ins_cost(BRANCH_COST);
format %{ "CXB$cmp $op1,0,$labl\t! ptr" %}
ins_encode %{
@ -9110,7 +9116,7 @@ instruct cmpN_reg_branch_short(cmpOp cmp, iRegN op1, iRegN op2, label labl, flag
predicate(UseCBCond);
effect(USE labl, KILL icc);
size(4);
size(4); // Assuming no NOP inserted.
ins_cost(BRANCH_COST);
format %{ "CWB$cmp $op1,$op2,$labl\t! compressed ptr" %}
ins_encode %{
@ -9128,7 +9134,7 @@ instruct cmpN_null_branch_short(cmpOp cmp, iRegN op1, immN0 null, label labl, fl
predicate(UseCBCond);
effect(USE labl, KILL icc);
size(4);
size(4); // Assuming no NOP inserted.
ins_cost(BRANCH_COST);
format %{ "CWB$cmp $op1,0,$labl\t! compressed ptr" %}
ins_encode %{
@ -9147,7 +9153,7 @@ instruct cmpI_reg_branchLoopEnd_short(cmpOp cmp, iRegI op1, iRegI op2, label lab
predicate(UseCBCond);
effect(USE labl, KILL icc);
size(4);
size(4); // Assuming no NOP inserted.
ins_cost(BRANCH_COST);
format %{ "CWB$cmp $op1,$op2,$labl\t! Loop end" %}
ins_encode %{
@ -9165,7 +9171,7 @@ instruct cmpI_imm_branchLoopEnd_short(cmpOp cmp, iRegI op1, immI5 op2, label lab
predicate(UseCBCond);
effect(USE labl, KILL icc);
size(4);
size(4); // Assuming no NOP inserted.
ins_cost(BRANCH_COST);
format %{ "CWB$cmp $op1,$op2,$labl\t! Loop end" %}
ins_encode %{