8241438: Move IntelJccErratum mitigation code to platform-specific code

Reviewed-by: vlivanov, kvn
This commit is contained in:
Erik Österlund 2020-04-08 15:34:56 +00:00
parent 6c1f8e1dc2
commit 76a8557d0c
12 changed files with 194 additions and 55 deletions

@ -1027,6 +1027,13 @@ class HandlerImpl {
}
};
class Node::PD {
public:
enum NodeFlags {
_last_flag = Node::_last_flag
};
};
bool is_CAS(int opcode, bool maybe_volatile);
// predicates controlling emit of ldr<x>/ldar<x> and associated dmb
@ -1051,6 +1058,17 @@ source %{
// Derived RegMask with conditionally allocatable registers
void PhaseOutput::pd_perform_mach_node_analysis() {
}
int MachNode::pd_alignment_required() const {
return 1;
}
int MachNode::compute_padding(int current_offset) const {
return 0;
}
RegMask _ANY_REG32_mask;
RegMask _ANY_REG_mask;
RegMask _PTR_REG_mask;

@ -116,6 +116,13 @@ class HandlerImpl {
};
class Node::PD {
public:
enum NodeFlags {
_last_flag = Node::_last_flag
};
};
%}
source %{
@ -124,6 +131,16 @@ source %{
static FloatRegister reg_to_FloatRegister_object(int register_encoding);
static Register reg_to_register_object(int register_encoding);
void PhaseOutput::pd_perform_mach_node_analysis() {
}
int MachNode::pd_alignment_required() const {
return 1;
}
int MachNode::compute_padding(int current_offset) const {
return 0;
}
// ****************************************************************************

@ -982,6 +982,17 @@ source_hpp %{
source %{
void PhaseOutput::pd_perform_mach_node_analysis() {
}
int MachNode::pd_alignment_required() const {
return 1;
}
int MachNode::compute_padding(int current_offset) const {
return 0;
}
// Should the Matcher clone shifts on addressing modes, expecting them
// to be subsumed into complex addressing expressions or compute them
// into registers?
@ -2164,6 +2175,13 @@ class HandlerImpl {
};
class Node::PD {
public:
enum NodeFlags {
_last_flag = Node::_last_flag
};
};
%} // end source_hpp
source %{

@ -605,6 +605,17 @@ static Register reg_to_register_object(int register_encoding);
// from the start of the call to the point where the return address
// will point.
void PhaseOutput::pd_perform_mach_node_analysis() {
}
int MachNode::pd_alignment_required() const {
return 1;
}
int MachNode::compute_padding(int current_offset) const {
return 0;
}
int MachCallStaticJavaNode::ret_addr_offset() {
if (_method) {
return 8;
@ -1423,6 +1434,13 @@ class HandlerImpl {
}
};
class Node::PD {
public:
enum NodeFlags {
_last_flag = Node::_last_flag
};
};
%} // end source_hpp section
source %{

@ -471,6 +471,13 @@ class HandlerImpl {
}
};
class Node::PD {
public:
enum NodeFlags {
_last_flag = Node::_last_flag
};
};
%}
source %{
@ -483,6 +490,17 @@ static FloatRegister reg_to_SingleFloatRegister_object(int register_encoding);
static FloatRegister reg_to_DoubleFloatRegister_object(int register_encoding);
static Register reg_to_register_object(int register_encoding);
void PhaseOutput::pd_perform_mach_node_analysis() {
}
int MachNode::pd_alignment_required() const {
return 1;
}
int MachNode::compute_padding(int current_offset) const {
return 0;
}
// Used by the DFA in dfa_sparc.cpp.
// Check for being able to use a V9 branch-on-register. Requires a
// compare-vs-zero, equal/not-equal, of a value which was zero- or sign-

@ -53,7 +53,7 @@ bool IntelJccErratum::is_jcc_erratum_branch(const Block* block, const MachNode*
}
int IntelJccErratum::jcc_erratum_taint_node(MachNode* node, PhaseRegAlloc* regalloc) {
node->add_flag(Node::Flag_intel_jcc_erratum);
node->add_flag(Node::PD::Flag_intel_jcc_erratum);
return node->size(regalloc);
}
@ -99,7 +99,7 @@ int IntelJccErratum::compute_padding(uintptr_t current_offset, const MachNode* m
int jcc_size = mach->size(regalloc);
if (index_in_block < block->number_of_nodes() - 1) {
Node* next = block->get_node(index_in_block + 1);
if (next->is_Mach() && (next->as_Mach()->flags() & Node::Flag_intel_jcc_erratum)) {
if (next->is_Mach() && (next->as_Mach()->flags() & Node::PD::Flag_intel_jcc_erratum)) {
jcc_size += mach->size(regalloc);
}
}

@ -1165,11 +1165,51 @@ class HandlerImpl {
#endif
};
class Node::PD {
public:
enum NodeFlags {
Flag_intel_jcc_erratum = Node::_last_flag << 1,
_last_flag = Flag_intel_jcc_erratum
};
};
%} // end source_hpp
source %{
#include "opto/addnode.hpp"
#include "c2_intelJccErratum_x86.hpp"
void PhaseOutput::pd_perform_mach_node_analysis() {
if (VM_Version::has_intel_jcc_erratum()) {
int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
_buf_sizes._code += extra_padding;
}
}
int MachNode::pd_alignment_required() const {
PhaseOutput* output = Compile::current()->output();
Block* block = output->block();
int index = output->index();
if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(block, this, index)) {
// Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
return IntelJccErratum::largest_jcc_size() + 1;
} else {
return 1;
}
}
int MachNode::compute_padding(int current_offset) const {
if (flags() & Node::PD::Flag_intel_jcc_erratum) {
Compile* C = Compile::current();
PhaseOutput* output = C->output();
Block* block = output->block();
int index = output->index();
return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
} else {
return 0;
}
}
// Emit exception handler code.
// Stuff framesize into a register and call a VM stub routine.

@ -286,11 +286,12 @@ public:
// Return the alignment required (in units of relocInfo::addr_unit())
// for this instruction (must be a power of 2)
virtual int alignment_required() const { return 1; }
int pd_alignment_required() const;
virtual int alignment_required() const { return pd_alignment_required(); }
// Return the padding (in bytes) to be emitted before this
// instruction to properly align it.
virtual int compute_padding(int current_offset) const { return 0; }
virtual int compute_padding(int current_offset) const;
// Return number of relocatable values contained in this instruction
virtual int reloc() const { return 0; }

@ -28,6 +28,7 @@
#include "libadt/vectset.hpp"
#include "memory/allocation.inline.hpp"
#include "memory/resourceArea.hpp"
#include "opto/ad.hpp"
#include "opto/castnode.hpp"
#include "opto/cfgnode.hpp"
#include "opto/connode.hpp"
@ -1033,7 +1034,12 @@ bool Node::verify_jvms(const JVMState* using_jvms) const {
//------------------------------init_NodeProperty------------------------------
void Node::init_NodeProperty() {
assert(_max_classes <= max_jushort, "too many NodeProperty classes");
assert(_max_flags <= max_jushort, "too many NodeProperty flags");
assert(max_flags() <= max_jushort, "too many NodeProperty flags");
}
//-----------------------------max_flags---------------------------------------
juint Node::max_flags() {
return (PD::_last_flag << 1) - 1; // allow flags combination
}
#endif

@ -740,25 +740,28 @@ public:
Flag_is_scheduled = Flag_is_reduction << 1,
Flag_has_vector_mask_set = Flag_is_scheduled << 1,
Flag_is_expensive = Flag_has_vector_mask_set << 1,
Flag_intel_jcc_erratum = Flag_is_expensive << 1,
_max_flags = (Flag_intel_jcc_erratum << 1) - 1 // allow flags combination
_last_flag = Flag_is_expensive
};
class PD;
private:
jushort _class_id;
jushort _flags;
static juint max_flags();
protected:
// These methods should be called from constructors only.
void init_class_id(jushort c) {
_class_id = c; // cast out const
}
void init_flags(uint fl) {
assert(fl <= _max_flags, "invalid node flag");
assert(fl <= max_flags(), "invalid node flag");
_flags |= fl;
}
void clear_flag(uint fl) {
assert(fl <= _max_flags, "invalid node flag");
assert(fl <= max_flags(), "invalid node flag");
_flags &= ~fl;
}

@ -53,9 +53,6 @@
#include "utilities/macros.hpp"
#include "utilities/powerOfTwo.hpp"
#include "utilities/xmlstream.hpp"
#ifdef X86
#include "c2_intelJccErratum_x86.hpp"
#endif
#ifndef PRODUCT
#define DEBUG_ARG(x) , x
@ -243,7 +240,10 @@ PhaseOutput::PhaseOutput()
_node_bundling_limit(0),
_node_bundling_base(NULL),
_orig_pc_slot(0),
_orig_pc_slot_offset_in_bytes(0) {
_orig_pc_slot_offset_in_bytes(0),
_buf_sizes(),
_block(NULL),
_index(0) {
C->set_output(this);
if (C->stub_name() == NULL) {
_orig_pc_slot = C->fixed_slots() - (sizeof(address) / VMRegImpl::stack_slot_size);
@ -257,6 +257,15 @@ PhaseOutput::~PhaseOutput() {
}
}
void PhaseOutput::perform_mach_node_analysis() {
// Late barrier analysis must be done after schedule and bundle
// Otherwise liveness based spilling will fail
BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
bs->late_barrier_analysis();
pd_perform_mach_node_analysis();
}
// Convert Nodes to instruction bits and pass off to the VM
void PhaseOutput::Output() {
// RootNode goes
@ -320,10 +329,10 @@ void PhaseOutput::Output() {
}
// Keeper of sizing aspects
BufferSizingData buf_sizes = BufferSizingData();
_buf_sizes = BufferSizingData();
// Initialize code buffer
estimate_buffer_size(buf_sizes._const);
estimate_buffer_size(_buf_sizes._const);
if (C->failing()) return;
// Pre-compute the length of blocks and replace
@ -331,27 +340,17 @@ void PhaseOutput::Output() {
// Must be done before ScheduleAndBundle due to SPARC delay slots
uint* blk_starts = NEW_RESOURCE_ARRAY(uint, C->cfg()->number_of_blocks() + 1);
blk_starts[0] = 0;
shorten_branches(blk_starts, buf_sizes);
shorten_branches(blk_starts);
ScheduleAndBundle();
if (C->failing()) {
return;
}
// Late barrier analysis must be done after schedule and bundle
// Otherwise liveness based spilling will fail
BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
bs->late_barrier_analysis();
#ifdef X86
if (VM_Version::has_intel_jcc_erratum()) {
int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
buf_sizes._code += extra_padding;
}
#endif
perform_mach_node_analysis();
// Complete sizing of codebuffer
CodeBuffer* cb = init_buffer(buf_sizes);
CodeBuffer* cb = init_buffer();
if (cb == NULL || C->failing()) {
return;
}
@ -433,7 +432,7 @@ void PhaseOutput::compute_loop_first_inst_sizes() {
// The architecture description provides short branch variants for some long
// branch instructions. Replace eligible long branches with short branches.
void PhaseOutput::shorten_branches(uint* blk_starts, BufferSizingData& buf_sizes) {
void PhaseOutput::shorten_branches(uint* blk_starts) {
// Compute size of each block, method size, and relocation information size
uint nblocks = C->cfg()->number_of_blocks();
@ -468,6 +467,7 @@ void PhaseOutput::shorten_branches(uint* blk_starts, BufferSizingData& buf_sizes
uint nop_size = (new MachNopNode())->size(C->regalloc());
for (uint i = 0; i < nblocks; i++) { // For all blocks
Block* block = C->cfg()->get_block(i);
_block = block;
// During short branch replacement, we store the relative (to blk_starts)
// offset of jump in jmp_offset, rather than the absolute offset of jump.
@ -483,18 +483,12 @@ void PhaseOutput::shorten_branches(uint* blk_starts, BufferSizingData& buf_sizes
uint last_inst = block->number_of_nodes();
uint blk_size = 0;
for (uint j = 0; j < last_inst; j++) {
Node* nj = block->get_node(j);
_index = j;
Node* nj = block->get_node(_index);
// Handle machine instruction nodes
if (nj->is_Mach()) {
MachNode *mach = nj->as_Mach();
MachNode* mach = nj->as_Mach();
blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding
#ifdef X86
if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(block, mach, j)) {
// Conservatively add worst case padding
blk_size += IntelJccErratum::largest_jcc_size();
}
#endif
reloc_size += mach->reloc();
if (mach->is_MachCall()) {
// add size information for trampoline stub
@ -697,9 +691,9 @@ void PhaseOutput::shorten_branches(uint* blk_starts, BufferSizingData& buf_sizes
// The CodeBuffer will expand the locs array if this estimate is too low.
reloc_size *= 10 / sizeof(relocInfo);
buf_sizes._reloc = reloc_size;
buf_sizes._code = code_size;
buf_sizes._stub = stub_size;
_buf_sizes._reloc = reloc_size;
_buf_sizes._code = code_size;
_buf_sizes._stub = stub_size;
}
//------------------------------FillLocArray-----------------------------------
@ -1239,11 +1233,10 @@ void PhaseOutput::estimate_buffer_size(int& const_req) {
init_scratch_buffer_blob(const_req);
}
CodeBuffer* PhaseOutput::init_buffer(BufferSizingData& buf_sizes) {
int stub_req = buf_sizes._stub;
int code_req = buf_sizes._code;
int const_req = buf_sizes._const;
CodeBuffer* PhaseOutput::init_buffer() {
int stub_req = _buf_sizes._stub;
int code_req = _buf_sizes._code;
int const_req = _buf_sizes._const;
int pad_req = NativeCall::instruction_size;
@ -1272,7 +1265,7 @@ CodeBuffer* PhaseOutput::init_buffer(BufferSizingData& buf_sizes) {
total_req += deopt_handler_req; // deopt MH handler
CodeBuffer* cb = code_buffer();
cb->initialize(total_req, buf_sizes._reloc);
cb->initialize(total_req, _buf_sizes._reloc);
// Have we run out of code space?
if ((cb->blob() == NULL) || (!CompileBroker::should_compile_new_jobs())) {
@ -1361,6 +1354,7 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
for (uint i = 0; i < nblocks; i++) {
Block* block = C->cfg()->get_block(i);
_block = block;
Node* head = block->head();
// If this block needs to start aligned (i.e, can be reached other
@ -1391,6 +1385,7 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
// Emit block normally, except for last instruction.
// Emit means "dump code bits into code buffer".
for (uint j = 0; j<last_inst; j++) {
_index = j;
// Get the node
Node* n = block->get_node(j);
@ -1437,12 +1432,6 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
// Avoid back to back some instructions.
padding = nop_size;
}
#ifdef X86
if (mach->flags() & Node::Flag_intel_jcc_erratum) {
assert(padding == 0, "can't have contradicting padding requirements");
padding = IntelJccErratum::compute_padding(current_offset, mach, block, j, C->regalloc());
}
#endif
if (padding > 0) {
assert((padding % nop_size) == 0, "padding is not a multiple of NOP size");

@ -96,6 +96,13 @@ private:
ConstantTable _constant_table; // The constant table for this compilation unit.
BufferSizingData _buf_sizes;
Block* _block;
uint _index;
void perform_mach_node_analysis();
void pd_perform_mach_node_analysis();
public:
PhaseOutput();
~PhaseOutput();
@ -119,9 +126,13 @@ public:
// Constant table
ConstantTable& constant_table() { return _constant_table; }
// Code emission iterator
Block* block() { return _block; }
int index() { return _index; }
// The architecture description provides short branch variants for some long
// branch instructions. Replace eligible long branches with short branches.
void shorten_branches(uint* blk_starts, BufferSizingData& buf_sizes);
void shorten_branches(uint* blk_starts);
ObjectValue* sv_for_node_id(GrowableArray<ScopeValue*> *objs, int id);
void set_sv_for_object_node(GrowableArray<ScopeValue*> *objs, ObjectValue* sv);
void FillLocArray( int idx, MachSafePointNode* sfpt, Node *local,
@ -132,7 +143,7 @@ public:
// Initialize code buffer
void estimate_buffer_size(int& const_req);
CodeBuffer* init_buffer(BufferSizingData& buf_sizes);
CodeBuffer* init_buffer();
// Write out basic block data to code buffer
void fill_buffer(CodeBuffer* cb, uint* blk_starts);